summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick McCarty <patrick.mccarty@linux.intel.com>2013-04-12 10:40:25 -0700
committerPatrick McCarty <patrick.mccarty@linux.intel.com>2013-04-12 10:40:25 -0700
commit793eb3eb123664017100dfb1a5407f0da639a339 (patch)
tree2a6511443d115418007151f95f3a0fa8f5300974
downloadctags-793eb3eb123664017100dfb1a5407f0da639a339.tar.gz
ctags-793eb3eb123664017100dfb1a5407f0da639a339.tar.bz2
ctags-793eb3eb123664017100dfb1a5407f0da639a339.zip
Imported Upstream version 5.8upstream/5.8upstream
-rw-r--r--.indent.pro31
-rw-r--r--COPYING340
-rw-r--r--EXTENDING.html386
-rw-r--r--FAQ371
-rw-r--r--INSTALL215
-rw-r--r--INSTALL.oth73
-rw-r--r--MAINTAINERS88
-rw-r--r--Makefile.in222
-rw-r--r--NEWS871
-rw-r--r--README73
-rw-r--r--ant.c42
-rw-r--r--argproc.c505
-rw-r--r--args.c274
-rw-r--r--args.h63
-rw-r--r--asm.c387
-rw-r--r--asp.c328
-rw-r--r--awk.c81
-rw-r--r--basic.c203
-rw-r--r--beta.c321
-rw-r--r--c.c2932
-rw-r--r--cobol.c50
-rw-r--r--config.h.in277
-rwxr-xr-xconfigure7704
-rw-r--r--configure.ac532
-rw-r--r--ctags.11186
-rw-r--r--ctags.h28
-rw-r--r--ctags.html2087
-rw-r--r--ctags.spec40
-rw-r--r--debug.c113
-rw-r--r--debug.h70
-rw-r--r--descrip.mms68
-rw-r--r--dosbatch.c42
-rw-r--r--e_amiga.h24
-rw-r--r--e_djgpp.h47
-rw-r--r--e_mac.h143
-rw-r--r--e_msoft.h76
-rw-r--r--e_os2.h37
-rw-r--r--e_qdos.h34
-rw-r--r--e_riscos.h58
-rw-r--r--e_vms.h31
-rw-r--r--eiffel.c1352
-rw-r--r--entry.c847
-rw-r--r--entry.h103
-rw-r--r--erlang.c189
-rw-r--r--flex.c2243
-rw-r--r--fortran.c2197
-rw-r--r--general.h127
-rw-r--r--get.c669
-rw-r--r--get.h50
-rw-r--r--gnu_regex/.svn/all-wcprops47
-rw-r--r--gnu_regex/.svn/entries112
-rw-r--r--gnu_regex/.svn/text-base/README.txt.svn-base5
-rw-r--r--gnu_regex/.svn/text-base/regcomp.c.svn-base3818
-rw-r--r--gnu_regex/.svn/text-base/regex.c.svn-base74
-rw-r--r--gnu_regex/.svn/text-base/regex.h.svn-base575
-rw-r--r--gnu_regex/.svn/text-base/regex_internal.c.svn-base1713
-rw-r--r--gnu_regex/.svn/text-base/regex_internal.h.svn-base773
-rw-r--r--gnu_regex/.svn/text-base/regexec.c.svn-base4338
-rw-r--r--gnu_regex/README.txt5
-rw-r--r--gnu_regex/regcomp.c3818
-rw-r--r--gnu_regex/regex.c74
-rw-r--r--gnu_regex/regex.h575
-rw-r--r--gnu_regex/regex_internal.c1713
-rw-r--r--gnu_regex/regex_internal.h773
-rw-r--r--gnu_regex/regexec.c4338
-rw-r--r--html.c49
-rw-r--r--jscript.c1572
-rw-r--r--keyword.c258
-rw-r--r--keyword.h34
-rw-r--r--lisp.c139
-rw-r--r--lregex.c704
-rw-r--r--lua.c133
-rw-r--r--mac.c273
-rw-r--r--magic.diff21
-rw-r--r--main.c579
-rw-r--r--main.h32
-rw-r--r--maintainer.mak476
-rw-r--r--make.c217
-rw-r--r--matlab.c44
-rw-r--r--mk_bc3.mak46
-rw-r--r--mk_bc5.mak49
-rw-r--r--mk_djg.mak18
-rw-r--r--mk_manx.mak65
-rw-r--r--mk_mingw.mak31
-rw-r--r--mk_mpw.mak130
-rw-r--r--mk_mvc.mak40
-rw-r--r--mk_os2.mak104
-rw-r--r--mk_qdos.mak100
-rw-r--r--mk_sas.mak63
-rwxr-xr-xmkinstalldirs40
-rw-r--r--ocaml.c1842
-rw-r--r--options.c1829
-rw-r--r--options.h154
-rw-r--r--parse.c677
-rw-r--r--parse.h129
-rw-r--r--parsers.h63
-rw-r--r--pascal.c267
-rw-r--r--perl.c382
-rw-r--r--php.c237
-rw-r--r--python.c771
-rw-r--r--qdos.c106
-rw-r--r--read.c564
-rw-r--r--read.h116
-rw-r--r--readtags.c959
-rw-r--r--readtags.h252
-rw-r--r--rexx.c39
-rw-r--r--routines.c891
-rw-r--r--routines.h134
-rw-r--r--ruby.c408
-rw-r--r--scheme.c111
-rw-r--r--sh.c115
-rw-r--r--slang.c41
-rw-r--r--sml.c212
-rw-r--r--sort.c230
-rw-r--r--sort.h32
-rw-r--r--source.mak122
-rw-r--r--sql.c2112
-rw-r--r--strlist.c281
-rw-r--r--strlist.h54
-rw-r--r--tcl.c116
-rw-r--r--tex.c524
-rw-r--r--verilog.c340
-rw-r--r--vhdl.c835
-rw-r--r--vim.c636
-rw-r--r--vstring.c232
-rw-r--r--vstring.h85
-rw-r--r--yacc.c40
127 files changed, 72136 insertions, 0 deletions
diff --git a/.indent.pro b/.indent.pro
new file mode 100644
index 0000000..284f665
--- /dev/null
+++ b/.indent.pro
@@ -0,0 +1,31 @@
+--blank-before-sizeof
+--blank-lines-after-procedures
+--brace-indent0
+--braces-after-if-line
+--braces-on-struct-decl-line
+--break-after-boolean-operator
+--case-brace-indentation0
+--case-indentation0
+--comment-indentation0
+--continuation-indentation4
+--cuddle-do-while
+--declaration-comment-column0
+--declaration-indentation0
+--dont-break-function-decl-args
+--dont-break-procedure-type
+--dont-line-up-parentheses
+--honour-newlines
+--indent-level4
+--line-length80
+--paren-indentation4
+--preprocessor-indentation1
+--no-blank-lines-after-commas
+--space-after-cast
+--space-after-for
+--space-after-if
+--space-after-procedure-calls
+--space-after-while
+--space-special-semicolon
+--start-left-side-of-comments
+--struct-brace-indentation4
+--tab-size4
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..60549be
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,340 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) 19yy <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) 19yy name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/EXTENDING.html b/EXTENDING.html
new file mode 100644
index 0000000..7a7b865
--- /dev/null
+++ b/EXTENDING.html
@@ -0,0 +1,386 @@
+<!-- $Id: EXTENDING.html 198 2002-09-04 01:17:32Z darren $ -->
+<html>
+<head>
+<title>Exuberant Ctags: Adding support for a new language</title>
+</head>
+<body>
+
+<h1>How to Add Support for a New Language to Exuberant Ctags</h1>
+
+<p>
+<b>Exuberant Ctags</b> has been designed to make it very easy to add your own
+custom language parser. As an exercise, let us assume that I want to add
+support for my new language, <em>Swine</em>, the successor to Perl (i.e. Perl
+before Swine &lt;wince&gt;). This language consists of simple definitions of
+labels in the form "<code>def my_label</code>". Let us now examine the various
+ways to do this.
+</p>
+
+<h2>Operational background</h2>
+
+<p>
+As ctags considers each file name, it tries to determine the language of the
+file by applying the following three tests in order: if the file extension has
+been mapped to a language, if the file name matches a shell pattern mapped to
+a language, and finally if the file is executable and its first line specifies
+an interpreter using the Unix-style "#!" specification (if supported on the
+platform). If a language was identified, the file is opened and then the
+appropriate language parser is called to operate on the currently open file.
+The parser parses through the file and whenever it finds some interesting
+token, calls a function to define a tag entry.
+</p>
+
+<h2>Creating a user-defined language</h2>
+
+<p>
+The quickest and easiest way to do this is by defining a new language using
+the program options. In order to have Swine support available every time I
+start ctags, I will place the following lines into the file
+<code>$HOME/.ctags</code>, which is read in every time ctags starts:
+
+<code>
+<pre>
+ --langdef=swine
+ --langmap=swine:.swn
+ --regex-swine=/^def[ \t]*([a-zA-Z0-9_]+)/\1/d,definition/
+</pre>
+</code>
+The first line defines the new language, the second maps a file extension to
+it, and the third defines a regular expression to identify a language
+definition and generate a tag file entry for it.
+</p>
+
+<h2>Integrating a new language parser</h2>
+
+<p>
+Now suppose that I want to truly integrate compiled-in support for Swine into
+ctags. First, I create a new module, <code>swine.c</code>, and add one
+externally visible function to it, <code>extern parserDefinition
+*SwineParser(void)</code>, and add its name to the table in
+<code>parsers.h</code>. The job of this parser definition function is to
+create an instance of the <code>parserDefinition</code> structure (using
+<code>parserNew()</code>) and populate it with information defining how files
+of this language are recognized, what kinds of tags it can locate, and the
+function used to invoke the parser on the currently open file.
+</p>
+
+<p>
+The structure <code>parserDefinition</code> allows assignment of the following
+fields:
+
+<code>
+<pre>
+ const char *name; /* name of language */
+ kindOption *kinds; /* tag kinds handled by parser */
+ unsigned int kindCount; /* size of `kinds' list */
+ const char *const *extensions; /* list of default extensions */
+ const char *const *patterns; /* list of default file name patterns */
+ parserInitialize initialize; /* initialization routine, if needed */
+ simpleParser parser; /* simple parser (common case) */
+ rescanParser parser2; /* rescanning parser (unusual case) */
+ boolean regex; /* is this a regex parser? */
+</pre>
+</code>
+</p>
+
+<p>
+The <code>name</code> field must be set to a non-empty string. Also, unless
+<code>regex</code> is set true (see below), either <code>parser</code> or
+<code>parser2</code> must set to point to a parsing routine which will
+generate the tag entries. All other fields are optional.
+
+<p>
+Now all that is left is to implement the parser. In order to do its job, the
+parser should read the file stream using using one of the two I/O interfaces:
+either the character-oriented <code>fileGetc()</code>, or the line-oriented
+<code>fileReadLine()</code>. When using <code>fileGetc()</code>, the parser
+can put back a character using <code>fileUngetc()</code>. How our Swine parser
+actually parses the contents of the file is entirely up to the writer of the
+parser--it can be as crude or elegant as desired. You will note a variety of
+examples from the most complex (c.c) to the simplest (make.c).
+</p>
+
+<p>
+When the Swine parser identifies an interesting token for which it wants to
+add a tag to the tag file, it should create a <code>tagEntryInfo</code>
+structure and initialize it by calling <code>initTagEntry()</code>, which
+initializes defaults and fills information about the current line number and
+the file position of the beginning of the line. After filling in information
+defining the current entry (and possibly overriding the file position or other
+defaults), the parser passes this structure to <code>makeTagEntry()</code>.
+</p>
+
+<p>
+Instead of writing a character-oriented parser, it may be possible to specify
+regular expressions which define the tags. In this case, instead of defining a
+parsing function, <code>SwineParser()</code>, sets <code>regex</code> to true,
+and points <code>initialize</code> to a function which calls
+<code>addTagRegex()</code> to install the regular expressions which define its
+tags. The regular expressions thus installed are compared against each line
+of the input file and generate a specified tag when matched. It is usually
+much easier to write a regex-based parser, although they can be slower (one
+parser example was 4 times slower). Whether the speed difference matters to
+you depends upon how much code you have to parse. It is probably a good
+strategy to implement a regex-based parser first, and if it is too slow for
+you, then invest the time and effort to write a character-based parser.
+</p>
+
+<p>
+A regex-based parser is inherently line-oriented (i.e. the entire tag must be
+recognizable from looking at a single line) and context-insensitive (i.e the
+generation of the tag is entirely based upon when the regular expression
+matches a single line). However, a regex-based callback mechanism is also
+available, installed via the function <code>addCallbackRegex()</code>. This
+allows a specified function to be invoked whenever a specific regular
+expression is matched. This allows a character-oriented parser to operate
+based upon context of what happened on a previous line (e.g. the start or end
+of a multi-line comment). Note that regex callbacks are called just before the
+first character of that line can is read via either <code>fileGetc()</code> or
+using <code>fileGetc()</code>. The effect of this is that before either of
+these routines return, a callback routine may be invoked because the line
+matched a regex callback. A callback function to be installed is defined by
+these types:
+
+<code>
+<pre>
+ typedef void (*regexCallback) (const char *line, const regexMatch *matches, unsigned int count);
+
+ typedef struct {
+ size_t start; /* character index in line where match starts */
+ size_t length; /* length of match */
+ } regexMatch;
+</pre>
+</code>
+</p>
+
+<p>
+The callback function is passed the line matching the regular expression and
+an array of <code>count</code> structures defining the subexpression matches
+of the regular expression, starting from \0 (the entire line).
+</p>
+
+<p>
+Lastly, be sure to add your the name of the file containing your parser (e.g.
+swine.c) to the macro <code>SOURCES</code> in the file <code>source.mak</code>
+and an entry for the object file to the macro <code>OBJECTS</code> in the same
+file, so that your new module will be compiled into the program.
+</p>
+
+<p>
+This is all there is to it. All other details are specific to the parser and
+how it wants to do its job. There are some support functions which can take
+care of some commonly needed parsing tasks, such as keyword table lookups (see
+keyword.c), which you can make use of if desired (examples of its use can be
+found in c.c, eiffel.c, and fortran.c). Almost everything is already taken care
+of automatically for you by the infrastructure. Writing the actual parsing
+algorithm is the hardest part, but is not constrained by any need to conform
+to anything in ctags other than that mentioned above.
+</p>
+
+<p>
+There are several different approaches used in the parsers inside <b>Exuberant
+Ctags</b> and you can browse through these as examples of how to go about
+creating your own.
+</p>
+
+<h2>Examples</h2>
+
+<p>
+Below you will find several example parsers demonstrating most of the
+facilities available. These include three alternative implementations
+of a Swine parser, which generate tags for lines beginning with
+"<CODE>def</CODE>" followed by some name.
+</p>
+
+<code>
+<pre>
+/***************************************************************************
+ * swine.c
+ * Character-based parser for Swine definitions
+ **************************************************************************/
+/* INCLUDE FILES */
+#include "general.h" /* always include first */
+
+#include &lt;string.h&gt; /* to declare strxxx() functions */
+#include &lt;ctype.h&gt; /* to define isxxx() macros */
+
+#include "parse.h" /* always include */
+#include "read.h" /* to define file fileReadLine() */
+
+/* DATA DEFINITIONS */
+typedef enum eSwineKinds {
+ K_DEFINE
+} swineKind;
+
+static kindOption SwineKinds [] = {
+ { TRUE, 'd', "definition", "pig definition" }
+};
+
+/* FUNCTION DEFINITIONS */
+
+static void findSwineTags (void)
+{
+ vString *name = vStringNew ();
+ const unsigned char *line;
+
+ while ((line = fileReadLine ()) != NULL)
+ {
+ /* Look for a line beginning with "def" followed by name */
+ if (strncmp ((const char*) line, "def", (size_t) 3) == 0 &amp;&amp;
+ isspace ((int) line [3]))
+ {
+ const unsigned char *cp = line + 4;
+ while (isspace ((int) *cp))
+ ++cp;
+ while (isalnum ((int) *cp) || *cp == '_')
+ {
+ vStringPut (name, (int) *cp);
+ ++cp;
+ }
+ vStringTerminate (name);
+ makeSimpleTag (name, SwineKinds, K_DEFINE);
+ vStringClear (name);
+ }
+ }
+ vStringDelete (name);
+}
+
+/* Create parser definition stucture */
+extern parserDefinition* SwineParser (void)
+{
+ static const char *const extensions [] = { "swn", NULL };
+ parserDefinition* def = parserNew ("Swine");
+ def-&gt;kinds = SwineKinds;
+ def-&gt;kindCount = KIND_COUNT (SwineKinds);
+ def-&gt;extensions = extensions;
+ def-&gt;parser = findSwineTags;
+ return def;
+}
+</pre>
+</code>
+
+<p>
+<pre>
+<code>
+/***************************************************************************
+ * swine.c
+ * Regex-based parser for Swine
+ **************************************************************************/
+/* INCLUDE FILES */
+#include "general.h" /* always include first */
+#include "parse.h" /* always include */
+
+/* FUNCTION DEFINITIONS */
+
+static void installSwineRegex (const langType language)
+{
+ addTagRegex (language, "^def[ \t]*([a-zA-Z0-9_]+)", "\\1", "d,definition", NULL);
+}
+
+/* Create parser definition stucture */
+extern parserDefinition* SwineParser (void)
+{
+ static const char *const extensions [] = { "swn", NULL };
+ parserDefinition* def = parserNew ("Swine");
+ parserDefinition* const def = parserNew ("Makefile");
+ def-&gt;patterns = patterns;
+ def-&gt;extensions = extensions;
+ def-&gt;initialize = installMakefileRegex;
+ def-&gt;regex = TRUE;
+ return def;
+}
+</code>
+</pre>
+
+<p>
+<pre>
+/***************************************************************************
+ * swine.c
+ * Regex callback-based parser for Swine definitions
+ **************************************************************************/
+/* INCLUDE FILES */
+#include "general.h" /* always include first */
+
+#include "parse.h" /* always include */
+#include "read.h" /* to define file fileReadLine() */
+
+/* DATA DEFINITIONS */
+typedef enum eSwineKinds {
+ K_DEFINE
+} swineKind;
+
+static kindOption SwineKinds [] = {
+ { TRUE, 'd', "definition", "pig definition" }
+};
+
+/* FUNCTION DEFINITIONS */
+
+static void definition (const char *const line, const regexMatch *const matches,
+ const unsigned int count)
+{
+ if (count &gt; 1) /* should always be true per regex */
+ {
+ vString *const name = vStringNew ();
+ vStringNCopyS (name, line + matches [1].start, matches [1].length);
+ makeSimpleTag (name, SwineKinds, K_DEFINE);
+ }
+}
+
+static void findSwineTags (void)
+{
+ while (fileReadLine () != NULL)
+ ; /* don't need to do anything here since callback is sufficient */
+}
+
+static void installSwine (const langType language)
+{
+ addCallbackRegex (language, "^def[ \t]+([a-zA-Z0-9_]+)", NULL, definition);
+}
+
+/* Create parser definition stucture */
+extern parserDefinition* SwineParser (void)
+{
+ static const char *const extensions [] = { "swn", NULL };
+ parserDefinition* def = parserNew ("Swine");
+ def-&gt;kinds = SwineKinds;
+ def-&gt;kindCount = KIND_COUNT (SwineKinds);
+ def-&gt;extensions = extensions;
+ def-&gt;parser = findSwineTags;
+ def-&gt;initialize = installSwine;
+ return def;
+}
+</pre>
+
+<p>
+<pre>
+/***************************************************************************
+ * make.c
+ * Regex-based parser for makefile macros
+ **************************************************************************/
+/* INCLUDE FILES */
+#include "general.h" /* always include first */
+#include "parse.h" /* always include */
+
+/* FUNCTION DEFINITIONS */
+
+static void installMakefileRegex (const langType language)
+{
+ addTagRegex (language, "(^|[ \t])([A-Z0-9_]+)[ \t]*:?=", "\\2", "m,macro", "i");
+}
+
+/* Create parser definition stucture */
+extern parserDefinition* MakefileParser (void)
+{
+ static const char *const patterns [] = { "[Mm]akefile", NULL };
+ static const char *const extensions [] = { "mak", NULL };
+ parserDefinition* const def = parserNew ("Makefile");
+ def-&gt;patterns = patterns;
+ def-&gt;extensions = extensions;
+ def-&gt;initialize = installMakefileRegex;
+ def-&gt;regex = TRUE;
+ return def;
+}
+</pre>
+
+</body>
+</html>
diff --git a/FAQ b/FAQ
new file mode 100644
index 0000000..b62d4ec
--- /dev/null
+++ b/FAQ
@@ -0,0 +1,371 @@
+Frequently Asked Questions
+==========================
+
+ * 1. Why do you call it "Exuberant Ctags"?
+ * 2. Why doesn't my editor work with these tag files?
+ * 3. What are these strange bits of text beginning with ;"?
+ * 4. Why doesn't XEmacs' Speedbar module work with Exuberant Ctags?
+ * 5. Why doesn't Xemacs correctly locate the tag in the source file?
+ * 6. Why doesn't NEdit correctly locate the tag in the source file?
+ * 7. Why can't I jump to "class::member"?
+ * 8. How can I avoid having to specify my favorite option every time?
+ * 9. Why do I end up on the wrong line when I jump to a tag?
+ * 10. How do I jump to the tag I want instead of the wrong one by the
+ same name?
+ * 11. What is "Vim"?
+ * 12. How can I locate all references to a specific function or variable?
+ * 13. Why does appending tags to a tag file tag so long?
+ * 14. How do I get regex support for Win32?
+ * 15. How should I set up tag files for a multi-level directory hierarchy?
+
+ ----------------------------------------------------------------------
+1. Why do you call it "Exuberant Ctags"?
+
+Because one of the meanings of the word "exuberant" is:
+
+ exuberant : produced in extreme abundance : PLENTIFUL syn see PROFUSE
+
+Compare the tag file produced by Exuberant Ctags with that produced by any
+other ctags and you will see how appropriate the name is.
+
+ ----------------------------------------------------------------------
+2. Why doesn't my editor work with these tag files?
+
+3. What are these strange bits of text beginning with ;" which follow
+ many of the lines in the tag file?
+
+These are "extension flags". They are added in order to provide extra
+information about the tag that may be utilized by the editor in order to
+more intelligently handle tags. They are appended to the EX command part of
+the tag line in a manner that provides backwards compatibility with existing
+implementations of the Vi editor. The semicolon is an EX command separator
+and the double quote begins an EX comment. Thus, the extension flags appear
+as an EX comment and should be ignored by the editor when it processes the
+EX command.
+
+Some non-vi editors, however, implement only the bare minimum of EX commands
+in order to process the search command or line number in the third field of
+the tag file. If you encounter this problem, use the option "--format=1" to
+generate a tag file without these extensions (remember that you can set the
+CTAGS environment variable to any default arguments you wish to supply). Then
+ask the supplier of your editor to implement handling of this feature of EX
+commands.
+
+ ----------------------------------------------------------------------
+4. Why doesn't XEmacs' Speedbar module work with Exuberant Ctags?
+
+The default command line switches used by XEmacs for "etags" are not
+compatible with Exuberant Ctags options. By default, Exuberant Ctags installs
+a symbolic link, "etags", pointing to the ctags executable. When Exuberant
+Ctags is started with the name "etags", it produces Emacs-style tag files by
+default.
+
+To fix this, add the following lines to your .emacs file, replacing the path
+to "etags" with the path where the symbolic link was installed.
+
+(autoload 'speedbar "speedbar")
+(setq speedbar-fetch-etags-command "/usr/local/bin/etags"
+ speedbar-fetch-etags-arguments '("-f" "-"))
+
+ ----------------------------------------------------------------------
+5. Why doesn't Xemacs correctly locate the tag in the source file?
+
+This has been observed with version 20.3. It seems that when Xemacs searches
+for a tag, it searches using the tag name instead of the search string located
+in the TAGS file. This is a bug in Xemacs and does not occur in the GNU
+version of Emacs.
+
+ ----------------------------------------------------------------------
+6. Why doesn't NEdit correctly locate the tag in the source file?
+
+Versions of NEdit prior to 5.1 did not support the extended tag file format
+generated by Exuberant Ctags by default. Either upgrade to version 5.1 or
+specify the option "--format=1" when running ctags to output the old tag file
+format.
+
+ ----------------------------------------------------------------------
+7. Why can't I jump to "class::member"?
+
+Because, by default, ctags only generates tags for the separate identifiers
+found in the source files. If you specify the --extra=+q option, then
+ctags will also generate a second, class-qualified tag for each class member
+(data and function/method) in the form class::member for C++, and in the form
+class.method for Eiffel and Java.
+
+ ----------------------------------------------------------------------
+8. How can I avoid having to specify my favorite option every time?
+
+Either by setting the environment variable CTAGS to your custom
+options, or putting them into a .ctags file in your home directory.
+
+ ----------------------------------------------------------------------
+9. Why do I end up on the wrong line when I jump to a tag?
+
+By default, ctags encodes the line number in the file where macro (#define)
+tags are found. This was done to remain compatible with the original UNIX
+version of ctags. If you change the file containing the tag without
+rebuilding the tag file, the location of tag in the tag file may no longer
+match the current location.
+
+In order to avoid this problem, you can specify the option "--excmd=p",
+which causes ctags to use a search pattern to locate macro tags. I have
+never uncovered the reason why the original UNIX ctags used line numbers
+exclusively for macro tags, but have so far resisted changing the default
+behaviour of Exuberant Ctags to behave differently.
+
+ ----------------------------------------------------------------------
+10. How do I jump to the tag I want instead of the wrong one by the
+ same name?
+
+A tag file is simple a list of tag names and where to find them. If there
+are duplicate entries, you often end up going to the wrong one because the
+tag file is sorted and your editor locates the first one in the tag file.
+
+Standard Vi provides no facilities to alter this behavior. However, Vim
+has some nice features to minimize this problem, primarly by examining all
+matches and choosing the best one under the circumstances. Vim also provides
+commands which allow for selection of the desired matching tag.
+
+ ----------------------------------------------------------------------
+11. What is "Vim"?
+
+Vim is a vi-compatible editor available as source and compilable for any
+platform. Yeah, I know the first reaction is to shy away from this. But you
+will never regret getting it, and you will become greatly attached to its
+features, which you can learn gradually. I would be willing to say that it
+is the best vi-clone available within 4 light-years of Alpha Centauri. It
+works (nearly) exactly like standard vi, but provides some incredibly useful
+extensions (some of which I have participated in designing with the author).
+Most Linux distributions have adopted Vim as its standard vi.
+
+ ----------------------------------------------------------------------
+12. How can I locate all references to a specific function or variable?
+
+There are several packages already available which provide this capability.
+Namely, these are: GLOBAL source code tag system, GNU id-utils, cscope,
+and cflow. As of this writing, they can be found in the following locations:
+
+GLOBAL: http://www.gnu.org/software/global
+id-utils: http://www.gnu.org/software/idutils/idutils.html
+cscope: http://cscope.sourceforge.net
+cflow: ftp://www.ibiblio.org/pub/Linux/devel/lang/c
+
+ ----------------------------------------------------------------------
+13. Why does appending tags to a tag file tag so long?
+
+Sometimes, in an attempt to build a global tag file for all source files in
+a large source tree of many directories, someone will make an attempt to run
+ctags in append (-a) mode on every directory in the hierarchy. Each time
+ctags is invoked, its default behavior is to sort the tag file once the tags
+for that execution have been added. As the cumulative tag file grows, the sort
+time increases arithmetically.
+
+The best way to avoid this problem (and the most efficient) is to make
+use of the --recurse (or -R) option of ctags by executing the following
+command in the root of the directory hierarchy (thus running ctags only once):
+
+ ctags -R
+
+If you really insist on running ctags separately on each directory, you can
+avoid the sort pass each time by specifying the option "--sort=no". Once the
+tag file is completely built, use the sort command to manually sort the
+final tag file, or let the final invocation of ctags sort the file.
+
+ ----------------------------------------------------------------------
+14. How do I get regex support for Win32?
+
+You need to download the GNU regex package for Win32 from the following
+location:
+
+ http://people.delphiforums.com/gjc/gnu_regex.html
+
+Then point the makefile macro, REGEX_DIR, found in mk_mvc.mak and mk_bc5.mak,
+to the directory created by extracting this archive.
+
+ ----------------------------------------------------------------------
+15. How should I set up tag files for a multi-level directory hierarchy?
+
+There are a few ways of approaching this:
+
+1. A local tag file in each directory containing only the tags for source
+ files in that directory.
+
+2. One single big, global tag file present in the root directory of your
+ hierarchy, containing all tags present in all source files in the
+ hierarchy.
+
+3. A local tag file in each directory containing only the tags for source
+ files in that directory, in addition to one single global tag file
+ present in the root directory of your hierarchy, containing all
+ non-static tags present in all source files in the hierarchy.
+
+4. A local tag file in each directory of the hierarchy, each one
+ containing all tags present in source files in that directory and all
+ non-static tags in every directory below it (note that this implies
+ also having one big tag file in the root directory of the hierarchy).
+
+Each of these approaches has its own set of advantages and disadvantages,
+depending upon your particular conditions. Which approach is deemed best
+depends upon the following factors:
+
+A. The ability of your editor to use multiple tag files.
+
+ If your editor cannot make use of multiple tag files (original vi
+ implementations could not), then one large tag file is the only way to
+ go if you ever desire to jump to tags located in other directories. If
+ you never need to jump to tags in another directory (i.e. the source
+ in each directory is entirely self-contained), then a local tag file
+ in each directory will fit your needs.
+
+B. The time is takes for your editor to look up a tag in the tag file.
+
+ The significance of this factor depends upon the size of your source
+ tree and on whether the source files are located on a local or remote
+ file system. For source and tag files located on a local file system,
+ looking up a tag is not as big a hit as one might first imagine, since
+ vi implementations typically perform a binary search on a sorted tag
+ file. This may or may not be true for the editor you use. For files
+ located on a remote file system, reading a large file is an expensive
+ operation.
+
+C. Whether or not you expect the source code to change and the time it
+ takes to rebuild a tag file to account for changes to the source code.
+
+ While Exuberant Ctags is particularly fast in scanning source code
+ (around 1-2 MB/sec), a large project may still result in objectionable
+ delays if one wishes to keep their tag file(s) up to date on a
+ frequent basis, or if the files are located on a remote file system.
+
+D. The presence of duplicate tags in the source code and the ability to
+ handle them.
+
+ The impact of this factor is influenced by the following three issues:
+
+ 1. How common are duplicate tags in your project?
+
+ 2. Does your editor provide any facilities for dealing with duplicate
+ tags?
+
+ While standard vi does not, many modern vi implementations, such
+ as Vim have good facilities for selecting the desired match from
+ the list of duplicates. If your editor does not support duplicate
+ tags, then it will typically send you to only one of them, whether
+ or not that is the one you wanted (and not even notifying you that
+ there are other potential matches).
+
+ 3. What is the significance of duplicate tags?
+
+ For example, if you have two tags of the same name from entirely
+ isolated software components, jumping first to the match found
+ in component B while working in component A may be entirely
+ misleading, distracting or inconvenient (to keep having to choose
+ which one if your editor provides you with a list of matches).
+ However, if you have two tags of the same name for parallel builds
+ (say two initialization routines for different hosts), you may
+ always want to specify which one you want.
+
+Of the approaches listed above, I tend to favor Approach 3. My editor of
+choice is Vim, which provides a rich set of features for handling multiple
+tag files, which partly influences my choice. If you are working with
+source files on a remote file system, then I would recommend either
+Approach 3 or Approach 4, depending upon the hit when reading the global
+tag file.
+
+The advantages of Approach 3 are many (assuming that your editor has
+the ability to support both multiple tag files and duplicate tags). All
+lookups of tag located in the currect directory are fast and the local
+tag file can be quickly and easily regenerated in one second or less
+(I have even mapped a keystroke to do this easily). A lookup of a
+(necessarily non-static) tag found in another directory fails a lookup in
+the local tag file, but is found in the global tag file, which satisfies
+all cross-directory lookups. The global tag file can be automatically
+regenerated periodically with a cron job (and perhaps the local tag files
+also).
+
+Now I give an example of how you would implement Approach 3. Means of
+implementing the other approaches can be performed in a similar manner.
+
+Here is a visual representation of an example directory hierarchy:
+
+project
+ `-----misccomp
+ | `...
+ `-----sysint
+ `-----client
+ | `-----hdrs
+ | `-----lib
+ | `-----src
+ | `-----test
+ `-----common
+ | `-----hdrs
+ | `-----lib
+ | `-----src
+ | `-----test
+ `-----server
+ `-----hdrs
+ `-----lib
+ `-----src
+ `-----test
+
+Here is a recommended solution (conceptually) to build the tag files:
+
+1. Within each of the leaf nodes (i.e. hdrs, lib, src, test) build a tag
+ file using "ctags *.[ch]". This can be easily be done for the whole
+ hierarchy by making a shell script, call it "dirtags", containing the
+ following lines:
+
+ #!/bin/sh
+ cd $1
+ ctags *
+
+ Now execute the following command:
+
+ find * -type d -exec dirtags {} \;
+
+ These tag files are trivial (and extremely quick) to rebuild while
+ making changes within a directory. The following Vim key mapping is
+ quite useful to rebuild the tag file in the directory of the current
+ source file:
+
+ :nmap ,t :!(cd %:p:h;ctags *.[ch])&<CR><CR>
+
+2. Build the global tag file:
+
+ cd ~/project
+ ctags --file-scope=no -R
+
+ thus constructing a tag file containing only non-static tags for all
+ source files in all descendent directories.
+
+3. Configure your editor to read the local tag file first, then consult
+ the global tag file when not found in the local tag file. In Vim,
+ this is done as follows:
+
+ :set tags=./tags,tags,~/project/tags
+
+If you wish to implement Approach 4, you would need to replace the
+"dirtags" script of step 1 with the following:
+
+ #!/bin/sh
+ cd $1
+ ctags *
+ # Now append the non-static tags from descendent directories
+ find * -type d -prune -print | ctags -aR --file-scope=no -L-
+
+And replace the configuration of step 3 with this:
+
+ :set tags=./tags,./../tags,./../../tags,./../../../tags,tags
+
+As a caveat, it should be noted that step 2 builds a global tag file whose
+file names will be relative to the directory in which the global tag file
+is being built. This takes advantage of the Vim 'tagrelative' option,
+which causes the path to be interpreted a relative to the location of the
+tag file instead of the current directory. For standard vi, which always
+interprets the paths as relative to the current directory, we need to
+build the global tag file with absolute path names. This can be
+accomplished by replacing step 2 with the following:
+
+ cd ~/project
+ ctags --file-scope=no -R `pwd`
+
+--
diff --git a/INSTALL b/INSTALL
new file mode 100644
index 0000000..9f059c2
--- /dev/null
+++ b/INSTALL
@@ -0,0 +1,215 @@
+Custom Installation
+===================
+
+These installation instructions are for Unix or Unix-like platforms (or at
+least, those platforms which are able to run a Bourne shell script). If you
+are attempting to install Exuberant Ctags on some other platform, see the file
+INSTALL.oth.
+
+If you are not familiar with using the configure scripts generated by GNU
+autoconf, read the "Basic Installation" section below; then return here.
+The configure script in this package supports the following custom options:
+
+ --disable-etags By default, "make install" will install one
+ binary, "ctags", one man page, "ctags.1", and
+ create links to these two files by the names
+ "etags" and "etags.1". If you do not want to
+ install the "etags" links, use this option.
+
+ --disable-extended-format Ctags now appends "extension flags" to the
+ end of each tag entry in a manner which is
+ backwards with original Vi implementation
+ (they are placed into an EX comment). This
+ can be disabled via use of the ctags --format
+ option. This configure option changes the
+ default behavior of ctags to disable use of
+ these extension flags (i.e. use the original
+ tag file format).
+
+ --disable-external-sort Use this option to force use of an internal
+ sort algorithm. On UNIX-like systems, ctags
+ uses the sort utility of the operating system
+ by default because it is more memory efficient.
+
+ --enable-custom-config=FILE Defines a custom option configuration file to
+ establish site-wide defaults. Ctags will read
+ the following files at startup for options:
+ /etc/ctags.conf, /usr/local/etc/ctags.conf,
+ $HOME/.ctags, and .ctags. If you need a
+ different file, set this option to the full
+ path name of the file you want to be read, and
+ it will be read immediately before reading
+ $HOME/.ctags.
+
+ --enable-macro-patterns By default, line numbers are used in the tag
+ file for #define objects, in order to remain
+ compatible with the original UNIX ctags. This
+ option will make the default use patterns.
+
+ --enable-maintainer-mode Creates a special GNU-specific version of the
+ makefile which is used to maintain Exuberant
+ Ctags.
+
+ --enable-tmpdir=DIR When the library function mkstemp() is
+ available, this option allows specifying the
+ default directory to use for temporary files
+ generated by ctags. This default can be
+ changed at run time by setting the environment
+ variable TMPDIR.
+
+If you wish to change the name of the installed files, edit the makefile
+produced by the configure script ("Makefile") before performing the "make
+install" step. There are two lines at the top of the file where the names of
+the installed files may be customized.
+
+
+Basic Installation
+==================
+
+ These are generic installation instructions.
+
+ The `configure' shell script attempts to guess correct values for
+various system-dependent variables used during compilation. It uses
+those values to create a `Makefile' in each directory of the package.
+It may also create one or more `.h' files containing system-dependent
+definitions. Finally, it creates a shell script `config.status' that
+you can run in the future to recreate the current configuration, a file
+`config.cache' that saves the results of its tests to speed up
+reconfiguring, and a file `config.log' containing compiler output
+(useful mainly for debugging `configure').
+
+ If you need to do unusual things to compile the package, please try
+to figure out how `configure' could check whether to do them, and mail
+diffs or instructions to the address given in the `README' so they can
+be considered for the next release. If at some point `config.cache'
+contains results you don't want to keep, you may remove or edit it.
+
+ The file `configure.in' is used to create `configure' by a program
+called `autoconf'. You only need `configure.in' if you want to change
+it or regenerate `configure' using a newer version of `autoconf'.
+
+The simplest way to compile this package is:
+
+ 1. `cd' to the directory containing the package's source code and type
+ `./configure' to configure the package for your system. If you're
+ using `csh' on an old version of System V, you might need to type
+ `sh ./configure' instead to prevent `csh' from trying to execute
+ `configure' itself.
+
+ Running `configure' takes awhile. While running, it prints some
+ messages telling which features it is checking for.
+
+ 2. Type `make' to compile the package.
+
+ 3. Optionally, type `make check' to run any self-tests that come with
+ the package.
+
+ 4. Type `make install' to install the programs and any data files and
+ documentation.
+
+ 5. You can remove the program binaries and object files from the
+ source code directory by typing `make clean'. To also remove the
+ files that `configure' created (so you can compile the package for
+ a different kind of computer), type `make distclean'.
+
+Compilers and Options
+=====================
+
+ Some systems require unusual options for compilation or linking that
+the `configure' script does not know about. You can give `configure'
+initial values for variables by setting them in the environment. Using
+a Bourne-compatible shell, you can do that on the command line like
+this:
+ CC=c89 CFLAGS=-O2 LIBS=-lposix ./configure
+
+Or on systems that have the `env' program, you can do it like this:
+ env CPPFLAGS=-I/usr/local/include LDFLAGS=-s ./configure
+
+Compiling For Multiple Architectures
+====================================
+
+ You can compile the package for more than one kind of computer at the
+same time, by placing the object files for each architecture in their
+own directory. To do this, you must use a version of `make' that
+supports the `VPATH' variable, such as GNU `make'. `cd' to the
+directory where you want the object files and executables to go and run
+the `configure' script. `configure' automatically checks for the
+source code in the directory that `configure' is in and in `..'.
+
+ If you have to use a `make' that does not supports the `VPATH'
+variable, you have to compile the package for one architecture at a time
+in the source code directory. After you have installed the package for
+one architecture, use `make distclean' before reconfiguring for another
+architecture.
+
+Installation Names
+==================
+
+ By default, `make install' will install the package's files in
+`/usr/local/bin', `/usr/local/man', etc. You can specify an
+installation prefix other than `/usr/local' by giving `configure' the
+option `--prefix=PATH'.
+
+ You can specify separate installation prefixes for
+architecture-specific files and architecture-independent files. If you
+give `configure' the option `--exec-prefix=PATH', the package will use
+PATH as the prefix for installing programs and libraries.
+Documentation and other data files will still use the regular prefix.
+
+ In addition, if you use an unusual directory layout you can give
+options like `--bindir=PATH' to specify different values for particular
+kinds of files. Run `configure --help' for a list of the directories
+you can set and what kinds of files go in them.
+
+Optional Features
+=================
+
+ Some packages pay attention to `--enable-FEATURE' options to
+`configure', where FEATURE indicates an optional part of the package.
+They may also pay attention to `--with-PACKAGE' options, where PACKAGE
+is something like `gnu-as' or `x' (for the X Window System). The
+`README' should mention any `--enable-' and `--with-' options that the
+package recognizes.
+
+Sharing Defaults
+================
+
+ If you want to set default values for `configure' scripts to share,
+you can create a site shell script called `config.site' that gives
+default values for variables like `CC', `cache_file', and `prefix'.
+`configure' looks for `PREFIX/share/config.site' if it exists, then
+`PREFIX/etc/config.site' if it exists. Or, you can set the
+`CONFIG_SITE' environment variable to the location of the site script.
+A warning: not all `configure' scripts look for a site script.
+
+Operation Controls
+==================
+
+ `configure' recognizes the following options to control how it
+operates.
+
+`--cache-file=FILE'
+ Use and save the results of the tests in FILE instead of
+ `./config.cache'. Set FILE to `/dev/null' to disable caching, for
+ debugging `configure'.
+
+`--help'
+ Print a summary of the options to `configure', and exit.
+
+`--quiet'
+`--silent'
+`-q'
+ Do not print messages saying which checks are being made. To
+ suppress all normal output, redirect it to `/dev/null' (any error
+ messages will still be shown).
+
+`--srcdir=DIR'
+ Look for the package's source code in directory DIR. Usually
+ `configure' can determine that directory automatically.
+
+`--version'
+ Print the version of Autoconf used to generate the `configure'
+ script, and exit.
+
+`configure' also accepts some other, not widely useful, options.
+
diff --git a/INSTALL.oth b/INSTALL.oth
new file mode 100644
index 0000000..8577c7e
--- /dev/null
+++ b/INSTALL.oth
@@ -0,0 +1,73 @@
+If you are attempting to install Exuberant Ctags on a Unix-like platform
+(one that can at least run a Bourne shell script) see the file INSTALL.
+
+Installation Notes
+==================
+
+For non-Unix platforms, simple makefiles are provided:
+
+ descrip.mms For VMS using either DEC C or VAX C
+ mk_bc3.mak For MSDOS using Borland C/C++ 3.x
+ mk_bc5.mak For Win32 using Borland C++ 5.5
+ mk_djg.mak For MSDOS using DJGPP Gnu GCC (better to follow Unix install)
+ mk_manx.mak For Amiga using Aztec/Manx C 5.0
+ mk_mingw.mak For Win32 using MinGW
+ mk_mpw.mak For Macintosh using MPW
+ mk_mvc.mak For Win32 using Microsoft Visual C++
+ mk_os2.mak For OS/2 using GCC (EMX)
+ mk_qdos.mak For QDOS using C68
+ mk_riscos.mak For RISC OS using the GCC SDK <http://hard-mofo.dsvr.net>
+ mk_sas.mak For Amiga using SAS/C
+
+
+Special Notes
+=============
+
+DJGPP:
+------
+
+ It is better to follow the standard Unix install on DJGPP, but this requires
+ that you use BASH and requires a fairly complete installation of GJGPP
+ packages. You can use mk_djg.mak if you can't run configure.
+
+Macintosh with MPW:
+-------------------
+
+ To build Ctags on MPW you will have to have the Metrowerks compilers
+ or you will have to edit the makefiles yourself.
+
+ Since the makefile for MPW is called mk_mpw.mak you will have to give
+ the command: `Make -f mk_mpw.mak CTags` and then select the output
+ and execute it. Alternatively you could rename this makefile to
+ CTags.make and issue the command `Build CTags`.
+
+ If the build process goes wrong make sure the line endings for the
+ makefile are set to MacOS (CR instead of LF).
+
+ This version of Ctags only accepts and emits unix style paths. This was
+ done since it was easier to implement (few changes in main source files)
+ and since I ported Ctags to MPW to use with Pepper which also works with
+ unix style paths internally. Besides, since we're now moving to MacOS X
+ anyway, Mac programmers are getting used to unix style paths anyway.
+
+ Example, suppose you have a project using PowerPlant which is located in
+ 'HD20:tools:PowerPlant:' and you want a tags file for all powerplant
+ sources and your projects sources. Go to the root directory of your
+ project and type:
+
+ CTags -R . '/HD20/tools/PowerPlant/'
+
+ The '.' in this command means current directory. You will also have to
+ put quotes around the full unix style path since / has a special meaning
+ in MPW.
+
+RISC OS:
+--------
+
+ Regex support on RISC OS is available when compiled to use the RISC OS port
+ of the GNU regex library, which can be obtained from:
+
+ <http://www.sbellon.de/software.html>
+
+ Using 'Set RexEx$Path <Obey$Dir>.' in the regex-0/12 directory will ensure
+ it can be found.
diff --git a/MAINTAINERS b/MAINTAINERS
new file mode 100644
index 0000000..d46b50b
--- /dev/null
+++ b/MAINTAINERS
@@ -0,0 +1,88 @@
+The following individuals are registered as developers for the maintenance of
+Exuberant Ctags. They are listed by their SourgeForge username and by the
+To send email to any one of them, send it to <username@users.sourceforge.net>.
+
+Ctags SourgeForge Full
+Parser username Name
+---------- ----------- -----
+Ant dfishburn David Fishburn
+AWK jkoshy Joseph Koshy
+Basic elias Elias Pschernig
+C# elliotth Elliott Hughes
+DosBatch dfishburn David Fishburn
+Flex dfishburn David Fishburn
+Java elliotth Elliott Hughes
+JavaScript dfishburn David Fishburn
+MATlAB dfishburn David Fishburn
+OCaml vberthoux Vincent Berthoux
+Perl perlguy0 Dmitri Tikhonov
+PHP jafl John Lindal
+Python elias Elias Pschernig
+Ruby elliotth Elliott Hughes
+SML jkoshy Joseph Koshy
+SQL dfishburn David Fishburn
+TeX dfishburn David Fishburn
+Vim dfishburn David Fishburn
+All else dhiebert Darren Hiebert
+
+How To Build & Test Like A Maintainer
+=====================================
+
+Prerequisites
+-------------
+
+ Debian/Ubuntu:
+
+ sudo apt-get install build-essential subversion autoconf
+
+ Mac OS:
+
+ Install the Xcode developer tools, available here:
+ http://developer.apple.com/tools/download/
+
+ RedHat:
+
+ up2date --nosig subversion autoheader autoconf
+
+ Windows:
+
+ Install Cygwin plus its Subversion and GNU Make packages.
+
+Building
+--------
+
+ First time:
+
+ svn co https://ctags.svn.sourceforget.net/svnroot/ctags/trunk ctags
+ # Miss off the "/trunk" above if you want access to old releases or the
+ # web site.
+ cd ctags
+ autoheader
+ autoconf
+ ./configure --enable-maintainer-mode
+ make -j
+
+ Later:
+
+ cd ctags
+ svn update
+ make -j
+
+Testing
+-------
+
+ First time:
+
+ cd ctags
+ cp /usr/bin/ctags ctags.ref
+ # Edit "testing.mak" so CTAGS_TEST = ./dctags
+ # Edit "testing.mak" so CTAGS_REF = ./ctags.ref
+ make test
+
+ Later:
+
+ make test
+
+ Whenever you're happy with the results and update/commit:
+
+ cp ./dctags ./ctags.ref
diff --git a/Makefile.in b/Makefile.in
new file mode 100644
index 0000000..c24764e
--- /dev/null
+++ b/Makefile.in
@@ -0,0 +1,222 @@
+# $Id: Makefile.in 709 2009-07-04 05:29:28Z dhiebert $
+#
+# Makefile for UNIX-like platforms.
+#
+
+# These are the names of the installed programs, in case you wish to change
+# them.
+#
+CTAGS_PROG = ctags
+ETAGS_PROG = etags
+
+# Set this to the path to your shell (must run Bourne shell commands).
+#
+SHELL = /bin/sh
+
+# GNU Autoconf variables. These are set by the "configure" script when it
+# runs.
+#
+exec_prefix = @exec_prefix@
+datarootdir = @datarootdir@
+prefix = @prefix@
+bindir = @bindir@
+srcdir = @srcdir@
+libdir = @libdir@
+incdir = @includedir@
+mandir = @mandir@
+SLINK = @LN_S@
+STRIP = @STRIP@
+CC = @CC@
+DEFS = @DEFS@
+CFLAGS = @CFLAGS@
+LDFLAGS = @LDFLAGS@
+LIBS = @LIBS@
+EXEEXT = @EXEEXT@
+OBJEXT = @OBJEXT@
+
+# If you cannot run the "configure" script to set the variables above, then
+# uncomment the defines below and customize them for your environment. If
+# your system does not support symbolic (soft) links, then remove the -s
+# from SLINK.
+#
+#srcdir = .
+#bindir = /usr/local/bin
+#mandir = /usr/local/man
+#SLINK = ln -s
+#STRIP = strip
+#CC = cc
+#DEFS = -DHAVE_CONFIG_H
+#CFLAGS = -O
+#LDFLAGS=
+
+include $(srcdir)/source.mak
+
+#
+#--- You should not need to modify anything below this line. ---#
+#
+
+.SUFFIXES:
+.SUFFIXES: .c .$(OBJEXT)
+
+VPATH = $(srcdir)
+
+INSTALL = cp
+INSTALL_PROG = $(INSTALL)
+INSTALL_DATA = $(INSTALL)
+
+READ_LIB = readtags.$(OBJEXT)
+READ_INC = readtags.h
+
+MANPAGE = ctags.1
+
+AUTO_GEN = configure config.h.in
+CONFIG_GEN = config.cache config.log config.status config.run config.h Makefile
+
+#
+# names for installed man pages
+#
+manext = 1
+man1dir = $(mandir)/man1
+CMAN = $(CTAGS_PROG).$(manext)
+EMAN = $(ETAGS_PROG).$(manext)
+
+#
+# destinations for installed files
+#
+CTAGS_EXEC = $(CTAGS_PROG)$(EXEEXT)
+ETAGS_EXEC = $(ETAGS_PROG)$(EXEEXT)
+DEST_CTAGS = $(bindir)/$(CTAGS_EXEC)
+DEST_ETAGS = $(bindir)/$(ETAGS_EXEC)
+DEST_READ_LIB = $(libdir)/$(READ_LIB)
+DEST_READ_INC = $(incdir)/$(READ_INC)
+DEST_CMAN = $(man1dir)/$(CMAN)
+DEST_EMAN = $(man1dir)/$(EMAN)
+
+#
+# primary rules
+#
+all: $(CTAGS_EXEC) $(READ_LIB)
+
+$(CTAGS_EXEC): $(OBJECTS)
+ $(CC) $(LDFLAGS) -o $@ $(OBJECTS) $(LIBS)
+
+dctags$(EXEEXT): debug.c $(SOURCES) $(HEADERS)
+ $(CC) -I. -I$(srcdir) $(DEFS) -DDEBUG -g $(LDFLAGS) -o $@ debug.c $(SOURCES)
+
+readtags$(EXEEXT): readtags.c readtags.h
+ $(CC) -DREADTAGS_MAIN -I. -I$(srcdir) $(DEFS) $(CFLAGS) $(LDFLAGS) -o $@ readtags.c
+
+ETYPEREF_OBJS = etyperef.o keyword.o routines.o strlist.o vstring.o
+etyperef$(EXEEXT): $(ETYPEREF_OBJS)
+ $(CC) $(LDFLAGS) -o $@ $(ETYPEREF_OBJS)
+
+etyperef.o: eiffel.c
+ $(CC) -DTYPE_REFERENCE_TOOL -I. -I$(srcdir) $(DEFS) $(CFLAGS) -o $@ -c eiffel.c
+
+$(OBJECTS): $(HEADERS) config.h
+
+#
+# generic install rules
+#
+install: @install_targets@
+
+install-strip: install
+
+install-ctags: install-cbin install-cman
+install-etags: install-ebin install-eman
+
+$(bindir) $(man1dir) $(libdir) $(incdir):
+ $(srcdir)/mkinstalldirs $@
+
+FORCE:
+
+#
+# install the executables
+#
+install-bin: install-cbin install-ebin install-lib
+install-cbin: $(DEST_CTAGS)
+install-ebin: $(DEST_ETAGS)
+install-lib: $(DEST_READ_LIB) $(DEST_READ_INC)
+
+$(DEST_CTAGS): $(CTAGS_EXEC) $(bindir) FORCE
+ $(INSTALL_PROG) $(CTAGS_EXEC) $@ && chmod 755 $@
+
+$(DEST_ETAGS):
+ - if [ -x $(DEST_CTAGS) ]; then \
+ cd $(bindir) && $(SLINK) $(CTAGS_EXEC) $(ETAGS_EXEC); \
+ fi
+
+#
+# install the man pages
+#
+install-man: install-cman install-eman
+install-cman: $(DEST_CMAN)
+install-eman: $(DEST_EMAN)
+
+$(DEST_CMAN): $(man1dir) $(MANPAGE) FORCE
+ - $(INSTALL_DATA) $(srcdir)/$(MANPAGE) $@ && chmod 644 $@
+
+$(DEST_EMAN):
+ - if [ -f $(DEST_CMAN) ]; then \
+ cd $(man1dir) && $(SLINK) $(CMAN) $(EMAN); \
+ fi
+
+#
+# install the library
+#
+$(DEST_READ_LIB): $(READ_LIB) $(libdir) FORCE
+ $(INSTALL_PROG) $(READ_LIB) $@ && chmod 644 $@
+
+$(DEST_READ_INC): $(READ_INC) $(incdir) FORCE
+ $(INSTALL_PROG) $(READ_INC) $@ && chmod 644 $@
+
+
+#
+# rules for uninstalling
+#
+uninstall: uninstall-bin uninstall-lib uninstall-man
+
+uninstall-bin:
+ - rm -f $(DEST_CTAGS) $(DEST_ETAGS)
+
+uninstall-lib:
+ - rm -f $(DEST_READ_LIB) $(DEST_READ_INC)
+
+uninstall-man:
+ - rm -f $(DEST_CMAN) $(DEST_EMAN)
+
+uninstall-ctags:
+ - rm -f $(DEST_CTAGS) $(DEST_CMAN)
+
+uninstall-etags:
+ - rm -f $(DEST_ETAGS) $(DEST_EMAN)
+
+#
+# miscellaneous rules
+#
+tags: $(CTAGS_EXEC)
+ ./$(CTAGS_EXEC) $(srcdir)/*
+
+TAGS: $(CTAGS_EXEC)
+ ./$(CTAGS_EXEC) -e $(srcdir)/*
+
+clean:
+ rm -f $(OBJECTS) $(CTAGS_EXEC) tags TAGS $(READ_LIB)
+ rm -f dctags$(EXEEXT) readtags$(EXEEXT)
+ rm -f etyperef$(EXEEXT) etyperef.$(OBJEXT)
+
+mostlyclean: clean
+
+distclean: clean
+ rm -f $(CONFIG_GEN)
+
+maintainerclean: distclean
+ rm -f $(AUTO_GEN)
+
+#
+# implicit rules
+#
+.c.$(OBJEXT):
+ $(CC) -I. -I$(srcdir) $(DEFS) $(CFLAGS) -c $<
+
+# vi:set tabstop=8:
diff --git a/NEWS b/NEWS
new file mode 100644
index 0000000..aeb8a07
--- /dev/null
+++ b/NEWS
@@ -0,0 +1,871 @@
+Current Version: 5.8
+
+ctags-5.8 (09 Jul 2009)
+* Removed ".ml" as a Lisp extension (now OCaml) [Lisp].
+* Added support for Ant language, contributed by David Fishburn.
+* Added support for DOS Batch language, contributed by David Fishburn.
+* Added support for Flex (Adobe) language, contributed by David Fishburn.
+* Added support for MATLAB language, contributed by David Fishburn.
+* Added support for Objective Camel (OCaml), provided by Vincent Berthoux [Patch #2738723].
+* Added support for TeX language, contributed by David Fishburn.
+* Added support for VHDL language, contributed by Nicolas Vincent [Bug #1943306].
+* Added support for Pyrex/Cython declarations [Python].
+* Added support for "v" kind, for variables [Python].
+* Added support for class and member variables [PHP, Bug #1037086].
+* Added support for recent enhancements to Eiffel language [Eiffel].
+* Added support for ASP classes, contributed by Zendhi Nagao; changes meaning of 'c' kind flag [ASP].
+* Added regex support when compiling with MinGW. Gnu regex module now included in all distributions.
+* Fixed detection of triple strings inside other strings [Python, Bug #1988130].
+* Fixed an endless loop with comments in triple strings [Python, Bug #1988027].
+* Fixed bug where functions were sometimes seen as methods [Python, Bug #1988026].
+* Fixed parsing of method parameter annotations, fix contributed by Paolo "blaisorblade" Giarrusso [Java, Bug #2049723, #2117073].
+* Fixed parsing of global scope qualifiers in base class lists [C++, Bug #1799343].
+* Fixed bug where namespace members were given kinds corresponding to globals [C++, Bug #1924919, #1575055].
+* Fixed parsing of "else" [C#, Bug #1830344].
+* Fixed parsing of derived enums [C#, Bug #1515910].
+* Fixed parsing of "foreach" [C#, Bug #1830343].
+* Fixed parsing of simple generic classes [C#, Bug #1515910].
+* Fixed bug with detecting identifiers inside variables [Python, Bug #1809024].
+* Fixed bug with detecting identifiers at the start of variables [Python, Bug #1856363].
+* Fixed parsing of triple single-quoted multi-line strings [Python, Bug #1906062].
+* Changed to newer version of autoconf, changing configure.in to configure.ac.
+
+ctags-5.7 (04 Sep 2007)
+* Added support for DIM AS [Freebasic, Bug #1741778].
+* Added support for arbitrary nesting depth [Python, Bug #1684786, Debian bug #409078].
+* Added support for verbatim string literals [C#, Bug #1515910].
+* Added support for .ctags as well as ctags.cnf on Windows [Bug #1246506].
+* Added support for non-extern, non-static functions returning wchar_t, contributed by Aaron Peromsik [C++, Patch #1458930].
+* Added support for numerous revision control systems including Bazaar and Mercurial [Bug #1472894].
+* Added support for enums [Java, Bug #1730485, Bug #1517143, Patch #1027395, Patch #1528507].
+* Added support for multiple-level namespace declarations [C#].
+* Added .svn to list of directories ignored during recursion (--recurse).
+* Added support for BlitzBasic, PureBasic and FreeBasic [FR #1100506].
+* Added support for interfaces and static/public/protected/private functions [PHP].
+* Added support for 'package' keyword [Perl].
+* Added support for multi-line subroutine, package, and constant definitions [Perl].
+* Added support for optional subroutine declarations [Perl].
+* Added support for formats [Perl].
+* Added support for new convert keyword [Eiffel].
+* Added optional tags for forward variable declarations (e.g. 'struct C;') [C, C++].
+* Changed parsing of option input file (-L) to strip trailing white space.
+* Ignore comments mixed into definitions and declarations [Perl].
+* Fixed detecting labels with whitespace after label name [Perl, Bug #1752361]
+* Fixed parsing of generic classes/interfaces [Java, Bug #1447756].
+* Fixed misidentification of fully qualified function calls as labels [Perl].
+* Fixed parsing of inner classes [Python, Bug #1411963].
+* Fixed line continuation [Python, Bug #928001, Patch #819471].
+* Fixed parsing of annotations [Java, Bug #1691412].
+* Fixed block-comment parsing [Verilog, Patch #1458042, Bugs #960316, #1111214, #1606569, #1615060].
+* Fixed typo in man page [Debian bug #366412].
+* Fixed missing chunk of text in man page and over-use of hyphens in UTF-8 locales [Debian bug #271323].
+* Fixed parsing of ` as a method name [Ruby].
+* Fixed parsing of keywords in string literals [Ruby, Bug #1742588].
+* Fixed potential segmentation violation [Bug #1672834, Bug #1222926].
+* Fixed parsing of destructors with whitespace after the '~' [C++, Bug #1585745].
+* Fixed default access of unions to be public [C++, Bug #1548443].
+* Fixed various memory leaks, mostly contributed by Dmitry Antipov.
+* Fixed parsing of `define [Verilog, Bug #961001].
+* Fixed crashes involving '/' [Verilog, Bug #1743330].
+* Fixed compilation problem on MinGW [Bug #1517424].
+* Fixed generation of HTML-formatted man page [Bug #1645864].
+* Fixed recognition of Python scripts having '#!/usr/bin/python' as first line [Bug #1764148].
+* Fixed parsing of Fortran comment-to-end-of-line with no newline before EOF [Debian bug #432872].
+* Fixed parsing of << [C/C++, Bugs #1020715, #1093123, #1770479, #1770607].
+* Fixed parsing of fully-qualified type names [Java, Bug #814263].
+* Fixed handling of lone carriage-return characters in file [Bug #1773926].
+
+ctags-5.6 (Mon May 29 2006)
+* Reformatted code for independence of tab stop setting.
+* Changed default configuration to disable installation of etags links.
+* Changed --langmap to first unmap each supplied extension from other languages.
+* Added support for ASP constants [ASP, Patch #961842].
+* Added support for GNU make extensions [Make].
+* Added .mk as extension recognized as a make language file [Make].
+* Added missing help for list-maps options [Bug #1201826].
+* Added new extension field "typeref" [thanks to Bram Moolenaar].
+* Extended functionality of Ruby parser with patch from Elliott Hughes [Ruby].
+* Fixed creation of TAGS file with etags-include but no files [Bug #941233].
+* Fixed problem reading last line of list file (-L) without final newline.
+* Fixed infinite loop that could occur on files without final newline [C, Java].
+* Fixed incorrect tag for first field of table [SQL].
+* Fixed missing tags for functions beginning with underscore [Sh].
+* Fixed missing tags for functions with variable arg list [C, Bug #1201689].
+* Fixed parsing problem with parentheses in argument list [C, Bug #1085585].
+* Fixed problem in preprocessor directive handling [C, Bug #1086609].
+
+ctags-5.5.4 (Thu Mar 25 2004)
+* Fixed broken -R option.
+
+ctags-5.5.3 (Sun Mar 14 2004)
+* Removed forgotten debug statement [Bug #811704].
+* Added support for Perl labels.
+* Added support for Perl "use constant" [Perl, Patch #853704, Feature Request
+ #710017].
+* Added support for package qualification of tags, removing useless "package"
+ tag kind [Perl, Feature Request #448887].
+* Added support for "and" keyword [SML, Bug #816636].
+* Added support for variables [PHP].
+* Fixed problem destroying tag file with certain info options [Bug #845502].
+* Fixed portability problem [DJGPP].
+* Fixed problem of double characters in signature field [C, Bug #852368].
+* Fixed problem manifested by errant preprocessor conditionals [Bug #839162].
+* Fixed incorrect line address in tag file for SQL tags [SQL, Bug #823000].
+* Fixed incorrect recognition of POD paragraph [Perl, Bug #842077].
+* Fixed spurious tags for for C++ member templtates [C++, Bug #849591].
+* Fixed missing tags related to template specializations [C++, Bug #872494].
+* Fixed spurious local tags for statements following labels [C].
+* Fixed missing tags for certain scoped functions [Vim].
+* Fixed infinite loop in Fortran parser.
+* Fixed missing tags for certain initializers [Fortran, Bug #877956].
+* Fixed problem with comment line after continuation character [Fortran,
+ Bug #858165].
+
+ctags-5.5.2 (Wed Sep 17 2003)
+* Added tags for local variables for C-based languages [C/C++/C#/Java/Vera,
+ Feature Request #449503].
+* Fixed compilation problem due to type change made to accomodate change of
+ return type of _findfirst() in VisualStudio.Net [Win32, Bug #775789].
+* Fixed problems with certain bit fields.
+
+ctags-5.5.1 (Wed Jul 30 2003)
+* Changed supported tag kinds for Verilog parser during overhaul.
+* Restored exit of program after --help, --license, and --version options [Bug
+ #717311, #751240].
+* Removed inclusion of general.h (GPL) from readtags.c (public domain).
+* Added support for tags for labels [PL/SQL].
+* Added support for tags for constant definitions [PHP].
+* Fixed redundant parsing of configuration file [Windows, Bug #768814].
+* Fixed missing tags for definitions spanning lines [Verilog, Bug #762027].
+* Fixed compilation error for uncommon hosts.
+* Fixed missing tags for Korn shell specific function definitions. [Sh,
+ Bug #769184]
+* Fixed missing tags when semicolon separator appears [Fortran, Bug #734933].
+* Fixed missing tags when endsubroutine keyword appears [Fortran, Bug #726712].
+* Fixed problem with fixed-form line continuation following comment [Fortran,
+ Bug #726875].
+* Fixed missing tags for nested blocks [PL/SQL, Bug #722501].
+* Fixed missing tags for function typedefs [C].
+* Fixed inability to map empty extension when path contained dot [Bug #742689].
+
+ctags-5.5 (Tue Apr 1 2003)
+* Changed kind indicator for methods from 'f' to 'm' [Tcl].
+* Changed tags within interfaces to be disabled by default (like prototypes in
+ C/C++) [Fortran].
+* Removed explicit descriptions of individual --<LANG>-types options from
+ --help output. See new --list-languages and --list-kinds options.
+* Removed explicit list of supported languages and supported tag kinds and
+ mapping patterns from man page. See new --list-languages, --list-kinds, and
+ --list-maps options.
+* Renamed --<LANG>-types option to --<LANG>-kinds (still accepts old name).
+* Added --list-kinds option.
+* Added --list-maps option.
+* Added --list-languages option.
+* Added support for dimensioned variables, contributed by Simon Bohlin [ASP].
+* Added support for C# language.
+* Added support for Erlang language, contributed by Brent Fulgham.
+* Added support for HTML language files.
+* Added support for JavaScript language files.
+* Added support for SML (Standard ML) language, contributed by Venkatesh Prasad.
+* Added mapping for .plx to Perl.
+* Added tags for autocommand groups [Vim, Patch #664685].
+* Added support for numerous language extensions [Fortran].
+* Added '$', 'D', and 'd' in column 1 as comment characters [Fortran].
+* Added special handling of --options=NONE to disable automatic reading of
+ options from configuration files or environment.
+* Added check for case-insensitive filenames to configure.
+* Fixed problem with lower case <SID> tag [Vim, Bug #657327].
+* Fixed problem recognizing indented code [Vim, Patch #664685].
+* Fixed problem with infinite loop in certain comments [PL/SQL, Bug #629115].
+* Fixed problem of incorrect extension field [C, Bug #639639].
+* Fixed problem of empty scoping extension field [C, Bug #639644].
+* Fixed missing tags for functions split across lines [PHP, Bug #681824].
+* Fixed missing tags for nested subprograms using 'contains' [Fortran,
+ Bug #670433].
+* Fixed missing tags when variable has same name as keyword [Fortran].
+* Fixed spurious tag when an array-spec occurs within an entity-decl [Fortran].
+* Fixed mishandling of multiline raw strings [Python, Bug #699171].
+* Fixed missing scope extension field on namespaces [C++, C#, Bug #665086].
+* Fixed several bugs causing missed tags [Fortran].
+* Fixed problem with --langmap option preventing clearing of map [Bug #688442].
+* Fixed recognition of Unicode-8 characters [Java].
+* Fixed man page errors and omissions.
+* Fixed bug in readFieldValue() in readtags library.
+* Fixed bug in option parsing in readtags command-line program.
+* Fixed portability problems with DJGPP [Bug #692569].
+* Fixed portability problems with Cygwin.
+
+ctags-5.4 (Thu Oct 17 2002)
+* Improved ability for tagsOpen() in readtags library to report failure to
+ open tag file, adding new fields to tagFileInfo structure.
+* Improved Cobol support to include data, files, groups, and sections [Cobol].
+* Added '$' a valid character for C identifier [VMS].
+* Added support for recording routine argument declarations for C-like
+ languages. See the --fields option and man page section TAG FILE FORMAT for
+ more information [C, C++, Java].
+* Added class and method support to TCL parser [TCL].
+* Added support for PL/SQL language.
+* Added support for Vera language, inspired by Dave Eggum [Vera].
+* Fixed problem terminating Perl POD block [Perl, Bug #612621].
+* Fixed problem re whitespace preceding subprogram name [Pascal, Bug #612019].
+* Fixed problem with leading spaces before instruction [TCL, Bug #615928].
+* Fixed problem with double precision functions [Fortran, Bug #620288].
+* Fixed inverted test causing TMPDIR to be used for temporary files when
+ ctags is setuid instead of when not setuid [Bug #623713].
+
+ctags-5.3.1 (Thu Sep 12 2002)
+* Renamed tagsSetSorted() to tagsSetSortType() and "sorted" member of
+ tagFileInfo structure of readtags library to "sort".
+* Added new function, tagsFirst() to readtags library.
+* Fixed incorrect tag kinds [Verilog].
+* Fixed null tags for unnamed BLOCK DATA statements [Fortran].
+* Fixed missing tags for function preceded by "<SID>" [Vim].
+* Fixed missing tags for equate statements not in column 1 [Asm, Bug #538629].
+* Fixed Ruby parser (why didn't the compiler report my screw-up?) [Ruby].
+
+ctags-5.3 (Wed Jul 17 2002)
+* Allowed --etags-include option without input files.
+* Changed Asm parser to back to C-based parser to remove redundant tags,
+ and extending its support for more variants [Asm].
+* Changed to using _tempnam() to create temporary files on Windows, allowing
+ "TMP" environment variable to set temporary directory.
+* Changed the -x output to match that of traditional ctags when the --format=1
+ option is supplied. The new format was also changed slightly to conform more
+ closely to the original format, with the addition of the extra tag type field.
+* Added support for Verilog language, submitted by Nam SungHyun.
+* Added support for RISC OS platform, contributed by Andrew Wingate.
+* Added support for "#pragma weak", generating macro tags for weak symbols [C].
+* Added support for mixins and class methods to Ruby parser, submitted by
+ Matthias Veit [Ruby].
+* Added support to ctags and readtags library for case-folded sorting of tag
+ files, submitted by Flemming Madsen.
+* Added identification of class methods [Python].
+* Fixed portability problems [Bugs #541997, #571240].
+* Fixed bug in configure script [Solaris, Bug #542966].
+* Fixed invalid package name tags [Perl, Bug #535068].
+* Fixed failure to output relative paths into etags TAGS files on Win32
+ [Bug #568365].
+* Fixed incorrect line address in cases of line continuation [Fortran].
+* Fixed missing tags for certain cases of invalid syntax [C].
+* Fixed missing tags in Fortran with HPF extensions [Fortran, Bug #565813].
+* Fixed spurious tag for clients portion of feature clause when following
+ an empty feature clause [Eiffel].
+
+ctags-5.2.3 (Sun Feb 24 2002)
+* Fixed portability problem in makefile [Solaris, FreeBSD].
+* Fixed infinite loop for certain cases of invalid syntax [Eiffel].
+* Changed Asm parser to regex, extending its support for more variants [Asm].
+
+ctags-5.2.2 (Sat Feb 16 2002)
+* Fixed spurious tags following empty feature clause [Eiffel].
+* Fixed missing tags for classes specifying generic creation routine [Eiffel].
+* Fixed missing tags when label not followed by white space [YACC].
+* Fixed for portability [Solaris, MacOS X].
+* Added support for type reference tool [Eiffel].
+
+ctags-5.2.1 (Sun Jan 20 2002)
+* Portability fixes [Mingw32].
+* Added "RCS" and "CVS" to list of directories excluded by default.
+* Fixed missing tags for function pointers declared const or volatile
+ [C, Bug #503764].
+
+ctags-5.2 (Sun Dec 23 2001)
+* Portability fixes [HP-UX, Solaris, VMS, OS/2].
+* Made code compilable by a C++ compiler.
+* Changed reading of option files to ignore blank lines.
+* Changed and enhanced interface to readtags library (see readtags.h).
+* Changed from using addLanguageRegex() to addTagRegex() in regex-based
+ parsers.
+* Added support for Lua language, submitted by Max Ischenko.
+* Added instructions to man page on using tags with NEdit.
+* Added setargv.obj to link for wildcard expansion [MSVC].
+* Added capability to have regex invoke a callback in a regex parser.
+* Fixed regex tag problem which left newlines in back-references.
+* Fixed missing class-qualified tags [Eiffel].
+* Fixed spurious tags for entries in final indexing clause [Eiffel].
+* Fixed problem with invalid filenames in preprocessor line directives.
+* Fixed bug parsing scoped variables (e.g. "b:variable") [Vim, Bug #487608].
+* Fixed problem compiling readtags.c on some hosts.
+* Fixed memory overwrite problem in readtags library.
+
+ctags-5.1 (Tue Nov 06 2001)
+* Changed name of option configuration files for MSDOS, MSWindows, and OS/2.
+* Changed regex support to enforce REG_NEWLINE. This fixes problem where the
+ newline character was explicity being matched by user patterns [Bug #431477].
+* Added new public domain library for reading tag files (see readtags.h).
+* Added support for variables and namespaces, provided by Jay Glanville [Vim].
+* Added report of non-options in option configuration files and CTAGS
+ environment variable.
+* Added support for YACC language, submitted by Nick Hibma [YACC].
+* Added support for Perl packages, submitted by Nick Hibma [Perl].
+* Added '$' as valid identifier character for DEC C compiler [VMS, Bug #425147].
+* Added compilation date and time to --version output.
+* Added configure check for HP-UX to determine if ANSI options needed [HP-UX].
+* Removed tags for forward class/struct declarations [C/C++, Bug #432563].
+* Eliminated ;" separator from end of tag line when no extension fields are
+ present.
+* Fixed segmentation violation for some Lisp files [Lisp].
+* Fixed segmentation violation occurring when file referenced in #line
+ directive was from an unknown language.
+* Fixed loss of sync when parsing bit fields named with C++ reserved word [C].
+* Fixed compilation problem on gcc-2.7.2.
+* Fixed problem parsing verbatim strings [Eiffel].
+* Fixed problem with PHP references [PHP].
+* Fixed handling of Perl __DATA__ sections [Perl].
+* Fixed problem resulting from white space in tag name due to regex name
+ specifier.
+* Fixed double reading of $HOME/.ctags when current directory is $HOME.
+* Fixed problem reading option configuration files using CR-LF newlines.
+* Fixed problem preventing output control over tag kinds of regex patterns
+ [Bug #429869]
+* Fixed incorrect parsing of Vim functions with ':' modifiers [Bug #466517].
+
+ctags-5.0.1 (Sun Apr 15 2001)
+* Fixed problem checking recursive links [SunOS 4.x].
+* Improved security on hosts where mkstemp() is not available.
+
+ctags-5.0 (Sun Mar 18 2001)
+* Restructured code to simplify support for new language parsers.
+* Changed source code to use ANSI-style function definitions.
+* Changed scope-qualified tag entries to omit enumeration name [C/C++].
+* Changed reading of files supplied to -I option to read one token per line.
+* Changed reading of option files to read one argument per line.
+* Changed default extension fields, now controlled by new option --fields.
+* Changed detection of etags invocation to accept any name containing "etags".
+* Removed -p option, which only caused confusion and is rendered obsolete by
+ the change immediately above.
+* Removed 'A' flag to the --c-types, --eiffel-types, and --java-types
+ options, replacing its functionality with the new --fields option.
+* Removed 'C' flag to the --c-types, --eiffel-types, and --java-types
+ options, replacing its functionality with the new --extra option.
+* Deprecated -i option, which was long ago replaced with the --c-types option.
+* Deprecated --file-tags option, now incorporated into new --extra option.
+* Deprecated --kind-long option, now incorporated into new --fields option.
+* Renamed --lang[uage] option to --language-force.
+* Renamed makefiles for non-Unix platforms.
+* Improved parsing of assembly language files [Asm].
+* Improved parsing of Fortran language files, adding new tag kinds [Fortran].
+* Added documentation explaining how to extend ctags with new parsers.
+* Added support for regular expressions, using either Posix or Gnu interface.
+* Added support for mapping file names to languages using shell patterns.
+* Added support for ASP scripts, submitted by Patrick Dehne [ASP].
+* Added support for Makefiles [Make].
+* Added support for Pascal language [Pascal].
+* Added support for PHP scripts, submitted by Jesus Castagnetto [PHP].
+* Added support for REXX language [REXX], based on submission by Alexaner Mai.
+* Added support for Ruby, submitted by Thaddeus Covert [Ruby].
+* Added support for S-Lang, submitted by Francesc Rocher [SLang].
+* Added support for Macintosh platform using MPW (by Maarten Hekkelman).
+* Added .tk as recognized extension [Tcl].
+* Added .cp and .hp as C++ extensions [C++].
+* Added .zsh as shell script extension [Sh].
+* Added support for trigraphs for C-based languages [C/C++].
+* Added language recognition for shell scripts using "#!/usr/bin/env command".
+* Added check for recursive directory links.
+* Added support for "[" form of verbatim strings [Eiffel].
+* Added --exclude option to exclude directories while recursing.
+* Added --fields option to specify extension fields to include in output.
+* Added --extra option to allow control over extra tags.
+* Added --regex-<LANG> option to define language-specific regular expressions.
+* Added --<LANG>-types options for all supported languages.
+* Added --langdef option to define new languages to be parsed with regex.
+* Added --languages option to restrict set of languages scanned for tags.
+* Added --tag-relative option to make file paths recorded in tag file relative
+ to location of tag file itself instead of the current working directory when
+ file arguments are specified using relative paths.
+* Added restriction of permissions of created temporary files when mkstemp()
+ is not available for security.
+* Reimplemented line directive handling to work for all languages.
+* Fixed tag generation for packages [Java].
+* Fixed Lisp parser [Lisp].
+* Fixed Mingw32 port [Win32].
+* Fixed bug in procedure name parsing [Tcl].
+* Fixed bug resulting in wrong column being checked for paragraphs [Cobol].
+* Fixed bug in language dispatch for executable "#!" scripts [Unix].
+* Fixed bugs resulting in incorrect scope entries in tag file [C++/Java].
+* Fixed warning caused by reinstallation of etags link [Unix].
+* Fixed destruction of existing tag file when no files supplied on invocation.
+* Fixed problem in Makefile.in which prevented configuring and building in
+ non-source directory. Also changed Makefile.in to generate and use correct
+ object and executable file extensions when run on Win32.
+
+ctags-4.0.3 (Sun Jul 16 2000)
+* Fixed compiler warnings [Amiga].
+* Fixed problem in configure.in causing struct stat st_ino member test to fail.
+* Fixed problem with TAGS entries for files using DOS-style (CR-LF) new lines.
+* Improved algorithm for locating Perl functions and skipping pods.
+* Improved algorithm for locating shell functions [Sh].
+* Renamed Makefile.amiga to Makefile.manx [Amiga].
+* Added Makefile.sas for SAS C compiler [Amiga].
+* Updated Makefile.qdos [QDOS].
+* Improved support for DECC compiler [VAX].
+
+ctags-4.0.2 (Mon Jul 10 2000)
+* Now silently ignore -w option for backwards compatibility with SVR4 ctags.
+* Fixed bug resulting in no extension flags when using --kind-long option.
+
+ctags-4.0.1 (Wed Jun 28 2000)
+* Fixed segmentation violation when using --file-tags.
+
+ctags-4.0 (Thu Jun 22 2000)
+* Fixed infinite loop on certain syntactically invalid class constructs [C++].
+* Fixed problem of incorrect tags for some pure virtual functions [C++].
+* Fixed inability to clear all tag types when using --c-types= (all languages).
+* Fixed problem of arguments to parameterized class being reported as
+ ancestors in the "inherits" extension flag.
+* Fixed missed tags for typedef-ed function pointers having a PROTO((a,b))
+ style argument list.
+* Fixed missing file tags for referenced files when using --line-directives
+ option [C/C++].
+* Fixed failure to recognize drive-qualified file name as a file name when
+ supplied as argument to -I option [Win32].
+* Fixed problem with missing comma in "inherits" extension flag [Java].
+* Fixed problem with incorrect or redundant parents listed for "inherits"
+ extension flag [Java].
+* Added check to avoid recursive symbolic links to directories.
+* Added warning message for -i option, which is deprecated and being dropped.
+* Added support for Assembler, COBOL, LISP, PERL, and Scheme, taken from Gnu
+ etags.
+* Added support for AWK, Bourne/Korn/Z Shell, Python, TCL, and Vim scripts.
+* Added support for the BETA language, submitted by Erik Corry.
+* Added ability to determine language from interpreter specified in first line
+ of executable files if they are not recognized by their extension.
+* Added --options option.
+* Added ability to specify files having no extension with -h and --langmap
+ options.
+* Added compile time option to separate path components with a unix-style path
+ separator for sharing tag file across platforms, enabled by defining the
+ label UNIX_PATH_SEPARATOR [Win32].
+* Fixed portability issues [VMS].
+
+ctags-3.5.2 (Mon Apr 24 2000)
+* Fixed problem preventing Emacs-style tags from being written to stdout.
+
+ctags-3.5.1 (Sun Apr 23 2000)
+* Fixed infinite loop in writing Emacs-style TAGS file on platforms using
+ tmpnam() instead of mkstemp() [Win32].
+* Fixed minor problems in Borland makefiles [Win32].
+* Fixed compiler warning [DJGPP].
+
+ctags-3.5 (Fri Apr 14 2000)
+* Fixed core dump when including access field in tag file [Java].
+* Fixed failure to identify end of statement for block statements [Java].
+* Fixed bug with lone "end" in feature adaptation part of inheritance clause
+ [Eiffel].
+* Fixed problem preventing const functions from being recognized as pure
+ virtual [C/C++].
+* Fixed problem with no tags found after certain macro calls [C/C++].
+* Fixed bug in descrip.mms build file [VMS].
+* Changed to use mkstemp() (when available) to create temporary files for
+ security reasons and allow configuring default temporary directory, and to
+ override this directory at run-time by setting TMPDIR environment variable.
+* Added support for extracting inheritance information into new "inherits"
+ extension flag [C++, Java].
+* Added Makefile.bc5 for Borland C++ version 5.5 compiler (free version).
+* Added new question to FAQ regarding Xemacs.
+* Updated FAQ regarding new release of NEdit.
+* Renamed Borland 3.1 makefile from Makefile.bcc to Makefile.bc3.
+* Renamed Microsoft Visual C++ makefile from Makefile.w32 to Makefile.mvc.
+
+ctags-3.4 (Thu Jan 13 2000)
+* Fixed sorting problems when LC_ALL environment variable was set to foreign
+ locale (not fixed by previous release).
+* Fixed nested scoping reported in extension flags and class-qualified tags.
+* Eliminated generation of class-qualified tag entries when --c-types=+C
+ option is in effect but scope is empty (e.g. "::main").
+* Added support for default access of class members in Java.
+* Added new extension flag "implementation", which indicates if a routine or
+ class is virtual or abstract.
+* Minor changes for OS/2 compilation.
+
+ctags-3.3.3 (Thu Dec 16 1999)
+* Changed how input is read for -L and --filter options to permit file names
+ containing spaces (see man page).
+* Fixed scope recorded for C++ class elements, especially in namespaces.
+* Fixed spurious tag generated for MODULE PROCEDURE in interfaces [Fortran].
+* Fixed sorting problems when LC_ALL environment variable was set to foreign
+ locale.
+* Fixed crash on Windows when compiled with Mingw32 gcc compiler.
+* Fixed compilation problems on Cray.
+
+ctags-3.3.2 (Fri Sep 24 1999)
+* Fixed compile problem on AIX 4.1.
+* Improved recovery from syntax error [Fortran].
+* Changed name of configure option (now --enable-custom-config).
+* Changed Makefile.bcc to optimize for space, since code size exceeded 64KB.
+
+ctags-3.3.1 (Mon Sep 20 1999)
+* Fixed segmentation violation occurring when directory recursion was selected.
+* Fixed misleading message when out of memory during internal sort.
+
+ctags-3.3 (Fri Sep 17 1999)
+* Fixed missing class-qualified tags [Java].
+* Fixed missing tag for functions having function pointer argument [C].
+* Fixed parsing of conversion functions [C++].
+* Added missing space following "operator" keyword to the tag names generated
+ for function call operators [C++].
+* Fixed string parsing to retry file as free source form upon EOF [Fortran].
+* Fixed missing tags following comments [Fortran].
+* Fixed missing labels for free source form [Fortran].
+* Removed 72 character limit for fixed form source lines, since many compilers
+ relax this limit and it is commonly taken advantage of. This was sometimes
+ causing fixed form source to be parsed as free form source [Fortran].
+* Changed misleading message when file could not be accessed.
+* Changed behavior of --verbose option to display option processing.
+* Changed -I option to permit clearing the token list with "-I-".
+* Changed --lang option to accept new "auto" parameter.
+* Changed --langmap option to accept new "default" parameter.
+* Changed --eiffel-types option to accept new 'C' flag to generate
+ class-qualified tags.
+* Changed -h option to accept new "default" parameter.
+* Changed option processing. Most options may now appear anywhere on the
+ command line, affecting only those files which follow them.
+* Added ability to specify default options in any of the files /etc/ctags.conf,
+ /usr/local/etc/ctags.conf, $HOME/.ctags, .ctags, and one optional file,
+ which can be supplied at configure time.
+* Added --filter option.
+* Added --filter-terminator option.
+
+ctags-3.2.4 (Thu Jul 01 1999)
+* Changed name of macro in Makefile.in to avoid being overriden by CTAGS
+ environment variable.
+
+ctags-3.2.3 (Mon Jun 21 1999)
+* Small portability change for EMX compiler on OS/2.
+* Slight change to W32 and BCC makefiles.
+
+ctags-3.2.2 (Sat May 29 1999)
+* Fixed endless error loop in the case of unreadable file.
+* Fixed redundant include entries in TAGS file when using --etags-include.
+
+ctags-3.2.1 (Wed May 09 1999)
+* Fixed problem reading -I token list from file.
+* Fixed with "using" declarations which corrupted tag file [C++].
+* Fixed configure.in to more reliably recognize existing prototypes.
+* Added ability to ignore preprocessor directives in Fortran files.
+* Added support for egcs/MingW32 compiler [Win32].
+
+ctags-3.2 (Wed Mar 03 1999)
+* Fixed spurious tags related to export specifiers of feature clauses [Eiffel].
+* Fixed problem with template in ctor-initialer [C++].
+* Fixed typo causing compiler error [MSVC].
+* Extended -I option to allow token replacement [thanks to Flemming Madsen].
+* Added --etags-include option to support TAGS file includes.
+* Added support for QDOS [thanks to Thierry Godefroy].
+
+ctags-3.1.2 (Tue Jan 26 1999)
+* Changed extension flags to eliminate space between label and value to remain
+ true to the intent of the agreement on the extended format made with editor
+ authors.
+* Added --links option to permit ignoring symbolic links.
+* Fixed missing tags upon ANSI style variable function argument lists.
+* Fixed missing tags for methods with fully qualified type names in argument
+ list [Java].
+* Fixed double tags generated for enumerators followed by comma.
+* Fixed missing path prefix for -p option [Win 95/NT].
+
+ctags-3.1 (Wed Jan 20 1999)
+* Changed -h and -langmap options to accept a plus sign as the first character
+ of their arguments to indicate that arguments should be added to current.
+* Changed default for member tags to 'on' [C/C++].
+* Changed default for local entities to 'off' [Eiffel].
+* Added tags for forward class/struct/union/enum declarations when using
+ -c-types=+x [C/C++].
+* Fixed memory overwrite bug causing general protection fault [Win 95/NT].
+* Fixed missing tags for methods with throws clause [Java].
+* Fixed bad tags generated for null macro names [C].
+* Fixed spurious tag for features and entities of BIT type [Eiffel].
+* Fixed spurious tags when local entity declaration list was empty [Eiffel].
+* Fixed missing tags for contructors and destructors [C++].
+* Fixed failure to recognize function when declaration for first argument
+ was of template type [C++].
+
+ctags-3.0.3 (Mon Dec 21 1998)
+* Fixed mistake made in previous version which caused macro tags to be missed.
+* Fixed parsing of --langmap option.
+
+ctags-3.0.2 (Mon Dec 21 1998)
+* Added tags for names undefined with #undef [C/C++].
+* Added tags for renamed features (Eiffel).
+* Improved Emacs-style tag file contents (per Ian Zimmerman).
+* Fixed problem handling deferred, external, once, obsolete features in Eiffel.
+* Fixed porting problem [OSF1 V4.0].
+
+ctags-3.0.1 (Sat Dec 12 1998)
+* Fixed problem with certain macros and functions with no declared return type.
+* Fixed problem causing endless loop on MSDOS/Win32 by restoring use of binary
+ mode on opening of source files.
+* Fixed porting problems [SunOS 4.1.x and MSVC++ 5.0].
+
+ctags-3.0 (Sun Dec 06 1998)
+* Added support for the Eiffel language (everyone should learn Eiffel).
+* Added support for the Fortran language.
+* Added --c-types option to specify tag types to be included for C/C++.
+* Added --eiffel-types option to specify tag types to be included for Eiffel.
+* Added --fortran-types option to specify tag types to be included for Fortran.
+* Added --file-scope option to place verbose tag description into tag file.
+* Added --file-tags option to place tags for source file names into tag file.
+* Added --java-types option to specify tag types to be included for Java.
+* Added --kind-long option to place verbose tag description into tag file.
+* Added --linedirectives option to enable processing of #line directives so
+ that running ctags on preprocessor output can generate line numbers and file
+ names which correspond to the original source files.
+* Added -V option to enable verbose message for each file considered.
+* Added special handling for macros of form "INIT(= value)".
+* Added ability to suffix an ignored identifier (-I option) with the '+'
+ character, thus instructing ctags to also ignore any argument list which
+ may follow the identifier.
+* Changed the -i option, moving Java language options to the new --java-types
+ option. The -i option is now deprecated in favor of the new language
+ specific tag type options.
+* Changed behavior of handling of ignored identifiers (-I option) to still
+ generate a tag for any macro definition for that identifier.
+* Changed handling of -h option so that include files are no longer assumed to
+ be C++ files.
+* Changed tags for operators to always precede the operator with the string
+ "operator ", thus making it consistent for all operators. [C++]
+* Changed C/C++ parsing, catching many more tricky constructs.
+* Changed extension flags to place a space between the label and the value for
+ readability.
+* Fixed core dump which occurred when using -iF (now --file-tags) together
+ with -e (etags) on a zero-length file.
+* Fixed missing or incorrect tags for conversions operators or operator "()".
+* Fixed incorrect parent class in extension flags for type declarations of the
+ form "class Bar { OtherClass::sometype foo; }".
+* Fixed missing tags for "friend" and "static" prototypes in header files.
+* Fixed problem of external "sort" reporting locale not available on HPUX.
+* Fixed -p option.
+* Fixed VMS support. It should now work for any source file type.
+
+ctags-2.3.2 (Wed Sep 09 1998)
+* Fixed -h option; broken since version 1.7, yet only just reported.
+
+ctags-2.3.1 (Sun Aug 30 1998)
+* Fixed improper handling of derived structs.
+* Fixed wrong class name tag when a nested-name-specifier was present in class
+ declaration.
+* Added parent information into tag extension flags for data structures to
+ match that already present for members.
+* Add missing documentation for --langmap option in the --help output.
+* Eliminated compiler warning [gcc 2.8.1].
+
+ctags-2.3 (Thu Aug 20 1998)
+* Eliminated compiler warnings [SGI MIPSpro].
+
+ctags-2.2.7 (Mon Aug 17 1998)
+* Fixed porting problem [Borland C++].
+
+ctags-2.2.6 (Wed Aug 12 1998)
+* Fixed core dump encountered on some platforms when the CTAGS environment
+ variable was set but empty.
+* Fixed porting problem [MSVC].
+* Added directory recursion support for Amiga.
+
+ctags-2.2.3 (Sun Aug 02 1998)
+ctags-2.2.2 (Fri Jul 24 1998)
+* Fixed porting problems [AIX, HP-UX, OSF/1, SunOS, MSVC].
+
+ctags-2.2.1 (Fri Jul 24 1998)
+* Now uses a default directory name of "." when using -R or --recurse option
+ (e.g. "ctags -R" is equivalent to "ctags -R .").
+* Directories named "SCCS" are skipped when using the -R or --recurse option
+ under Unix.
+* Fixed porting problems [HP-UX, IRIX, SunOS, MSDOS/Windows].
+
+ctags-2.2 (Mon Jul 20 1998)
+* Added the --recurse and -R options to allow recursing into directories.
+ This allows running ctags on an entire source directory tree using the
+ single command "ctags -R <dir>". Currently, this option is only supported on
+ UNIX, MSDOS, Windows 95/NT, and OS/2. Other platforms will have to wait.
+* Changed writing of Emacs-style TAGS file to binary mode [MSDOS/Windows].
+* Fixed porting problems [HP-UX, OSF/1].
+
+ctags-2.1.1 (Mon Jul 06 1998)
+* Changed -h option to allow only periods to separate extensions.
+* Added the --langmap option to allow overriding the default associations
+ between source language and file extension.
+* Added configuration check and code work-around for putenv() prototypes
+ missing the const from the argument declaration [IRIX 5.2 and CRAY J90].
+* Added makefile for VMS.
+* Fixed porting problem [HP-UX].
+
+ctags-2.1 (Wed Jul 01 1998)
+* Added Java support.
+* Eliminated the --keywords option introduced in the 2.0.4, replacing it with
+ automatic detection of the language (i.e. recognized keywords) based upon
+ the file extension.
+* Added the --lang option for manually overriding the automatic selection of
+ the language.
+* Added new flag 'i' to the -i option to allow generating tags for Java
+ interfaces.
+* Added new flag 'n' to the -i option to allow generating tags for C++
+ namespaces.
+* Added new flag 'x' to the -i option to allow generating tags for extern
+ variable declarations.
+* Added new extension flags, "private", "protected", and "public", which
+ indicate the visibility of class members when it can be determined.
+* Changed behavior of flag 'C' of the -i option to add tags of form
+ "class.member" for Java.
+* Changed how files on command line are handled. Ctags will now only scan
+ those files whose extensions it knows about unless the --lang option is
+ specified. This allows running ctags on all files in a directory without
+ having to be specific (e.g. "ctags *").
+* Removed support for duplicate tag warnings and the -w and -W options. These
+ options are silently ignored for now.
+
+ctags-2.0.4 (Sat May 23 1998)
+* Added sorting time to the output of the --totals option.
+* Added the --keywords option to allow restricting the recognized
+ declaration keywords in order to handle legacy source code which uses
+ newer keywords for variable and parameter names.
+* Ignore list now also applies to macro tags.
+* /dev/stdout now properly handled as parameter to -f/-o option.
+* Fixed problem handling an operator definition in C++ when white space
+ appeared between the "operator" keyword and the operator (e.g. "=").
+* Fixed handling of non-symbolic operators (e.g. "new", "delete", etc.).
+* Fixed sort order problem for some locale settings.
+* Fixed segmentation violation when using ignore list (-I) on SunOS 4.x.
+* Fixed a segmentation violation caused by a stack overwrite when testing a
+ particular kind of non-standard tag file format.
+
+ctags-2.0.3 (Sun Mar 12 1998)
+* Added configure check for "strip" program.
+* Added new flag 'C' to the -i option to allow adding extra tags to the tag
+ file in the form "class::member" for class methods and members.
+
+ctags-2.0.2 (Wed Feb 25 1998)
+* Added stripping of installed binary for "install" target.
+
+ctags-2.0.1 (Thu Feb 19 1998)
+* Added support for C++.
+* Added new flag 'F' to the -i option to allow adding an extra tag for the
+ basename of each source file supplied to ctags. This provides the ability to
+ jump to a source file in Vi using ":tag file.c".
+* Added new flag 'm' to generate tags for class, structure, and union members
+ (disabled by default).
+* Added several new flags to the -i option to allow finer specification of
+ which types of tags to include/exclude.
+* Added ".hh" extension to the default list of files recognized as header
+ files.
+* Added explicit handling of special gcc construct __attribute((..)),
+ which could lead to incorrect tag generation.
+* Added configure option --disable-extended-format to allow building ctags
+ with the extended format disabled by default.
+* Added configure option --enable-macro-patterns to change the default
+ behavior of ctags to generate patterns instead of line numbers for macro
+ (define) tags.
+* Changed configure option --enable-internal-sort to --disable-external-sort.
+* Changed makefile for OS/2.
+* Removed support for the -d, -t and -T options which had been deprecated
+ for some time.
+* Removed ANNOUNCE file in distribution, consolidating it with the README
+ file.
+* Replaced CHANGES file with NEWS for more GNU-like standard distribution.
+* Improved the detection of macros of the type generated by Microsoft Visual C
+ when generating source code. These caused subsequent statements to fail to
+ have tags generated for them. Still not bullet proof, though.
+* Fixed a problem which prevented the use of / as a path separator under MSDOS
+ and Win 95/NT.
+* Fixed problem of blank lines occuring in the tag file.
+* Fixed recognition of declarations with parentheses.
+* Fixed problem of missing tags for objects within extern "C" blocks.
+* Fixed problem in source file counts when using --totals option.
+* Extended the length of tag type field in -x output to be more verbose.
+* Fixed option initialization error which caused static tags to be excluded.
+
+ctags-1.7 (Mon Oct 13 1997)
+* Tag files now use a new extended format which is backwards compatible with
+ existing Vi implementations, yet provides extended information which can be
+ used by supporting editors.
+* Added documentation pseudo-tags (e.g. !_TAG_FILE_FORMAT) to tag file.
+* Added the --excmd option as alternative to the -n and -N options.
+* Added the --format option to allow forcing the old-style tag file format.
+* Added the --if0 to control how "#if 0" branches are handled.
+* Added the --sort option as alternative to -u option.
+* Added the --totals option to print statistics of tag generation.
+* Added the --version option.
+* Improved handling of preprocessor conditionals.
+* Code within an #if 0 is now never scanned for non-macro tags by default
+ since tags within that code could conceivably be overridden by more
+ desirable tags in the #else branch to follow. Macro tags in these branches
+ are always included.
+* Fixed problem which could leave invalid tag entries in tag file when an
+ internal re-scan of a source file occurred.
+* Fixed problem with internal sort mechanism when appending tags to existing
+ file.
+* Changed external sort command to filter out duplicate identical tags lines
+ (including the pattern) unless warnings for duplicate tags are enabled.
+* Added data to emacs style tag entries to more closely match that produced by
+ the GNU etags.
+* Removed fixed limits on maximum tag line length.
+* Correction to Amiga makefile.
+* Improvements to Win32 portability with changes to Makefile.w32.
+
+ctags-1.6 (Tue May 13 1997)
+* Now using GNU autoconf to permit automatic host configuration.
+* Added the -e option to generate Emacs style tag files
+* Added ETAGS environment variable to be used when -e option is in effect.
+* Added the -p option to supply a default path for source files.
+* Fixed problem of incorrect line numbers in tag file when form feeds and
+ vertical tabs occured in the source file.
+* Fixed problem preventing ignoring of identifiers specified via the -I option
+ when the identifiers were followed by a parameter list in the source code.
+* Changed the search patterns generated for #define macros when using -N
+ option. It now includes the character following the name (or "$" if end of
+ line) instead of the "\>" used previously. Some editors did not support this
+ metacharacter for tag searches.
+* Removed -u (unique) flag from sort command line invocation to retain exactly
+ indentical tag patterns in the same file. This is in preparation for editor
+ support of duplicate tags.
+* Fixed problem resulting in no tags generated following a function-like
+ macro reference outside of a function block.
+* Fixed problem of no tags generated for typedef for function or function
+ pointer.
+* Fixed problem of no tags generated after encountering strange function
+ declarations (e.g. "foo(params) saywhat (void);")
+
+ctags-1.5 (Sat Oct 5 1996)
+* Added generation of tags for objects inside C++ extern blocks (e.g.
+ 'extern "C" {...}' construct).
+* Added generation of tags for function definitions inside brace-enclosed
+ ("{}") blocks, since function definitions are allowed inside classes and
+ extern blocks.
+* Added the -N option to force patterns to be used for all tags (including
+ macro tags).
+* Changed the search patterns generated for macro definitions to be terminated
+ with "\>", which allows the pattern to still match even when the #define
+ line beyond the macro name is changed.
+* Fixed problem resulting in no tags generated for files containing a
+ function-like macro, such as 'MODULE_ID("$Id")', even when ignoring the
+ keyword. This also fixed a problem that caused tags to be missed for
+ initialized function pointer definitions.
+* Redirected error messages to stdout for MSDOS version.
+
+ctags-1.4 (Sun Aug 18 1996)
+* Added recursive parsing of class/struct/enum blocks to look for
+ class/struct/enum tags and enumeration values.
+* Added the -I option to specify keywords to ignore in the source files.
+* Fixed problem resulting in no tag generated when declaring a pointer const
+ or volatile.
+* Fixed problem resulting in no tag generated for comma terminated function
+ declarations.
+
+ctags-1.3 (Sun Jun 16 1996)
+* Fixed problem problem which caused "struct tag;" to be misinterpreted as a
+ variable definition.
+* Added the -n option to use line numbers in the tag file instead of patterns.
+* Added the -? option as an alternative for the --help option.
+
+ctags-1.2 (Wed Jun 5 1996)
+* Fixed a problem caused by an unitialized variable.
+
+ctags-1.1 (Tue Jun 4 1996)
+* Fixed problem reading parameter list to the -h option.
+
+ctags-1.0a (Mon Jun 3 1996)
+* Added ctags.lsm to distribution
+
+ctags-1.0 (Fri May 31 1996)
+* First public release.
+
+vim:tw=78
diff --git a/README b/README
new file mode 100644
index 0000000..f9554f3
--- /dev/null
+++ b/README
@@ -0,0 +1,73 @@
+Exuberant Ctags
+===============
+Author: Darren Hiebert <dhiebert at users.sourceforge.net>
+ http://ctags.sourceforge.net
+ Instant Messaging:
+ Yahoo! ID : dbhiebert
+ AIM ScreenName: darrenhiebert
+
+Exuberant Ctags is a multilanguage reimplementation of the much-underused
+ctags(1) program and is intended to be the mother of all ctags programs. It
+generates indexes of source code definitions which are used by a number of
+editors and tools. The motivation which drove the development of Exuberant
+Ctags was the need for a ctags program which supported generation of tags
+for all possible C language constructs (which no other ctags offers), and
+because most were easily fooled by a number of preprocessor contructs.
+
+
+Exuberant Ctags offers the following features:
+
+1. It supports the following languages: Assembler, AWK, ASP, BETA,
+ Bourne/Korn/Z Shell, C, C++, C#, COBOL, Eiffel, Erlang, Fortran, Java, Lisp,
+ Lua, Makefile, Pascal, Perl, PHP, PL/SQL, Python, REXX, Ruby, Scheme,
+ S-Lang, SML (Standard ML), Tcl, Vera, Verilog, VHDL, Vim, and YACC.
+
+2. It is capable of generating tags for virtually all C language constructs.
+
+3. It is very robust in parsing code. In particular, the C/C++ parser is
+ far less easily fooled by code containing #if preprocessor conditional
+ constructs, using a conditional path selection algorithm to resolve
+ complicated situations, and a fall-back algorithm when this one fails.
+
+4. Supports output of Emacs-style TAGS files (i.e. "etags").
+
+5. User-defined languages, using Posix regular expressions.
+
+6. Supports UNIX, MSDOS, Windows 95/98/NT/2000/XP, OS/2, QNX, Amiga, QDOS,
+ RISC OS, VMS, Macintosh, and Cray. Some pre-compiled binaries are
+ available on the web site.
+
+
+Visit the Exuberant Ctags web site:
+
+ http://ctags.sourceforge.net
+
+
+Which brings us to the most obvious question:
+
+ Q: Why is it called "Exuberant" ctags?
+ A: Because one of the meanings of the word is:
+
+ exuberant : produced in extreme abundance : PLENTIFUL syn see PROFUSE
+
+Compare the tag file produced by Exuberant Ctags with that produced by any
+other ctags and you will see how appropriate the name is.
+
+
+This source code is distributed according to the terms of the GNU General
+Public License. It is provided on an as-is basis and no responsibility is
+accepted for its failure to perform as expected. It is worth at least as
+much as you paid for it!
+
+Exuberant Ctags was originally derived from and inspired by the ctags
+program by Steve Kirkendall (kirkenda@cs.pdx.edu) that comes with the Elvis
+vi clone (though almost none of the original code remains). This, too, is
+freely available.
+
+Please report any problems you find. The two problems I expect to be most
+likely are either a tag which you expected but is missing, or a tag created
+in error (shouldn't really be a tag). Please include a sample of code (the
+definition) for the object which misbehaves.
+
+--
+vim:tw=76:sw=4:et:
diff --git a/ant.c b/ant.c
new file mode 100644
index 0000000..eedfcec
--- /dev/null
+++ b/ant.c
@@ -0,0 +1,42 @@
+/*
+* $Id$
+*
+* Copyright (c) 2008, David Fishburn
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* This module contains functions for generating tags for Ant language files.
+*/
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include <string.h>
+#include "parse.h"
+
+/*
+* FUNCTION DEFINITIONS
+*/
+
+static void installAntRegex (const langType language)
+{
+ addTagRegex (language,
+ "^[ \t]*<[ \t]*project.*name=\"([^\"]+)\".*", "\\1", "p,project,projects", NULL);
+ addTagRegex (language,
+ "^[ \t]*<[ \t]*target.*name=\"([^\"]+)\".*", "\\1", "t,target,targets", NULL);
+}
+
+extern parserDefinition* AntParser ()
+{
+ static const char *const extensions [] = { "build.xml", NULL };
+ parserDefinition* const def = parserNew ("Ant");
+ def->extensions = extensions;
+ def->initialize = installAntRegex;
+ def->regex = TRUE;
+ return def;
+}
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/argproc.c b/argproc.c
new file mode 100644
index 0000000..e06182f
--- /dev/null
+++ b/argproc.c
@@ -0,0 +1,505 @@
+/*
+* $Id: argproc.c 443 2006-05-30 04:37:13Z darren $
+*
+* Copyright (c) 1989, Mark Pizzolato (mark@infopiz.uucp)
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* Provided by Stephen P. Wall <swall@redcom.com>
+* Extracted from the VMS port of GNU patch-2.1.
+*
+* This module provides redirection support for the VAX DECC port of
+* Exuberant Ctags.
+*/
+/*
+ * @(#)argproc.c 1.0 89/02/01 Mark Pizzolato (mark@infopiz.uucp)
+ */
+
+#ifndef lint
+char argproc_version [] = "@(#)argproc.c VMS uucp Version infopiz-1.0";
+#endif
+
+#include <ctype.h>
+#include <descrip.h>
+#include <dvidef.h>
+#include <errno.h>
+#include <iodef.h>
+#include <lib$routines.h>
+#include <starlet.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <syidef.h> /* System Information Definitions */
+
+#define EXIT_OK 1 /* image exit code */
+#define EXIT_ERR 0x10000000 /* image exit code */
+
+/*
+ * getredirection() is intended to aid in porting C programs
+ * to VMS (Vax-11 C) which does not support '>' and '<'
+ * I/O redirection, along with a command line pipe mechanism
+ * using the '|' AND background command execution '&'.
+ * The piping mechanism will probably work with almost any 'filter' type
+ * of program. With suitable modification, it may useful for other
+ * portability problems as well.
+ *
+ * Author: Mark Pizzolato mark@infopiz.UUCP
+ * Mods: Steve Wall Don't return a full path unless the
+ * original filename included a path.
+ */
+struct list_item
+ {
+ struct list_item *next;
+ char *value;
+ };
+
+static expand_wild_cards ();
+static char *pipe_and_fork ();
+
+int
+getredirection (ac, av)
+int *ac;
+char ***av;
+/*
+ * Process vms redirection arg's. Exit if any error is seen.
+ * If getredirection() processes an argument, it is erased
+ * from the vector. getredirection () returns a new argc and argv value.
+ * In the event that a background command is requested (by a trailing "&"),
+ * this routine creates a background subprocess, and simply exits the program.
+ *
+ * Warning: do not try to simplify the code for vms. The code
+ * presupposes that getredirection() is called before any data is
+ * read from stdin or written to stdout.
+ *
+ * Normal usage is as follows:
+ *
+ * main (argc, argv)
+ * int argc;
+ * char *argv [];
+ * {
+ * getredirection (&argc, &argv);
+ * }
+ */
+{
+ int argc = *ac; /* Argument Count */
+ char **argv = *av; /* Argument Vector */
+ char *ap; /* Argument pointer */
+ int j; /* argv [] index */
+ extern int errno; /* Last vms i/o error */
+ int item_count = 0; /* Count of Items in List */
+ struct list_item *list_head = 0; /* First Item in List */
+ struct list_item *list_tail; /* Last Item in List */
+ char *in = NULL; /* Input File Name */
+ char *out = NULL; /* Output File Name */
+ char *outmode = "w"; /* Mode to Open Output File */
+ int cmargc = 0; /* Piped Command Arg Count */
+ char **cmargv = NULL;/* Piped Command Arg Vector */
+
+ /*
+ * First handle the case where the last thing on the line ends with
+ * a '&'. This indicates the desire for the command to be run in a
+ * subprocess, so we satisfy that desire.
+ */
+ {
+ extern background_process ();
+ ap = argv [argc-1];
+ if (0 == strcmp ("&", ap))
+ exit (background_process (--argc, argv));
+ if ('&' == ap [strlen (ap)-1])
+ {
+ ap [strlen (ap)-1] = '\0';
+ exit (background_process (argc, argv));
+ }
+ }
+ /*
+ * Now we handle the general redirection cases that involve '>', '>>',
+ * '<', and pipes '|'.
+ */
+ for (j = 0; j < argc; ++j)
+ {
+ if (0 == strcmp ("<", argv [j]))
+ {
+ if (j+1 >= argc)
+ {
+ errno = EINVAL;
+ perror ("No input file");
+ exit (EXIT_ERR);
+ }
+ in = argv [++j];
+ continue;
+ }
+ if ('<' == *(ap = argv [j]))
+ {
+ in = 1 + ap;
+ continue;
+ }
+ if (0 == strcmp (">", ap))
+ {
+ if (j+1 >= argc)
+ {
+ errno = EINVAL;
+ perror ("No output file");
+ exit (EXIT_ERR);
+ }
+ out = argv [++j];
+ continue;
+ }
+ if ('>' == *ap)
+ {
+ if ('>' == ap [1])
+ {
+ outmode = "a";
+ if ('\0' == ap [2])
+ out = argv [++j];
+ else
+ out = 2 + ap;
+ }
+ else
+ out = 1 + ap;
+ continue;
+ }
+ if (0 == strcmp ("|", argv [j]))
+ {
+ if (j+1 >= argc)
+ {
+ errno = EPIPE;
+ perror ("No command to Pipe to");
+ exit (EXIT_ERR);
+ }
+ cmargc = argc- (j+1);
+ cmargv = &argv [j+1];
+ argc = j;
+ continue;
+ }
+ if ('|' == *(ap = argv [j]))
+ {
+ ++argv [j];
+ cmargc = argc-j;
+ cmargv = &argv [j];
+ argc = j;
+ continue;
+ }
+ expand_wild_cards (ap, &list_head, &list_tail, &item_count);
+ }
+ /*
+ * Allocate and fill in the new argument vector, Some Unix's terminate
+ * the list with an extra null pointer.
+ */
+ argv = *av = calloc (item_count+1, sizeof (char *));
+ for (j = 0; j < item_count; ++j, list_head = list_head->next)
+ argv [j] = list_head->value;
+ *ac = item_count;
+ if (cmargv != NULL)
+ {
+ char subcmd [1024];
+
+ if (out != NULL)
+ {
+ errno = EINVAL;
+ perror ("Invalid '|' and '>' specified");
+ exit (EXIT_ERR);
+ }
+ strcpy (subcmd, cmargv [0]);
+ for (j = 1; j < cmargc; ++j)
+ {
+ strcat (subcmd, " \"");
+ strcat (subcmd, cmargv [j]);
+ strcat (subcmd, "\"");
+ }
+ out = pipe_and_fork (subcmd);
+ }
+ if ((in != NULL) && (NULL == freopen (in, "r", stdin, "mbc=32", "mbf=2")))
+ {
+ perror (in); /* Can't find file */
+ exit (EXIT_ERR); /* Is a fatal error */
+ }
+ if ((out != NULL) && (NULL == freopen (out, outmode, stdout, "mbc=32", "mbf=2")))
+ {
+ perror (ap); /* Error, can't write or append */
+ exit (EXIT_ERR); /* Is a fatal error */
+ }
+#ifdef DEBUG
+ fprintf (stderr, "Arglist:\n");
+ for (j = 0; j < *ac; ++j)
+ fprintf (stderr, "argv[%d] = '%s'\n", j, argv [j]);
+#endif
+ return 0;
+}
+
+static add_item (head, tail, value, count)
+struct list_item **head;
+struct list_item **tail;
+char *value;
+int *count;
+{
+ if (*head == 0)
+ {
+ if (NULL == (*head = calloc (1, sizeof (**head))))
+ {
+ errno = ENOMEM;
+ perror ("");
+ exit (EXIT_ERR);
+ }
+ *tail = *head;
+ }
+ else
+ if (NULL == ((*tail)->next = calloc (1, sizeof (**head))))
+ {
+ errno = ENOMEM;
+ perror ("");
+ exit (EXIT_ERR);
+ }
+ else
+ *tail = (*tail)->next;
+ (*tail)->value = value;
+ ++ (*count);
+}
+
+static expand_wild_cards (item, head, tail, count)
+char *item;
+struct list_item **head;
+struct list_item **tail;
+int *count;
+{
+int expcount = 0;
+int context = 0;
+int status;
+int status_value;
+char *had_version;
+int had_path;
+$DESCRIPTOR (filespec, item);
+/*$DESCRIPTOR (defaultspec, "SYS$DISK:[]*.*;");*/
+$DESCRIPTOR (defaultspec, "");
+$DESCRIPTOR (resultspec, "");
+
+ if (strcspn (item, "*%") == strlen (item))
+ {
+ add_item (head, tail, item, count);
+ return;
+ }
+ resultspec.dsc$b_dtype = DSC$K_DTYPE_T;
+ resultspec.dsc$b_class = DSC$K_CLASS_D;
+ resultspec.dsc$a_pointer = NULL;
+ filespec.dsc$w_length = strlen (item);
+ /*
+ * Only return version specs, if the caller specified a version
+ */
+ had_version = strchr (item, ';');
+ /*
+ * Only return full path if the caller specified a path
+ */
+ had_path = (strchr (item, ']') || strchr (item, ':'));
+ while (1 == (1&lib$find_file (&filespec, &resultspec, &context,
+ &defaultspec, 0, &status_value, &0)))
+ {
+ char *string;
+ char *c;
+
+ if (NULL == (string = calloc (1, resultspec.dsc$w_length+1)))
+ {
+ errno = ENOMEM;
+ perror ("");
+ exit (EXIT_ERR);
+ }
+ strncpy (string, resultspec.dsc$a_pointer, resultspec.dsc$w_length);
+ string [resultspec.dsc$w_length] = '\0';
+ if (NULL == had_version)
+ *((char *) strrchr (string, ';')) = '\0';
+ if (!had_path) {
+ char *s = strrchr (string, ']');
+ if ( s == NULL ) s = strrchr (string, ':');
+ if ( s != NULL ) strcpy (string, s+1);
+ }
+ /*
+ * Be consistent with what the C RTL has already done to the rest of
+ * the argv items and lowercase all of these names.
+ */
+ for (c = string; *c; ++c)
+ if (isupper (*c))
+ *c = tolower (*c);
+ add_item (head, tail, string, count);
+ ++expcount;
+ }
+ if (expcount == 0)
+ add_item (head, tail, item, count);
+ lib$sfree1_dd (&resultspec);
+ lib$find_file_end (&context);
+}
+
+static int child_st [2]; /* Event Flag set when child process completes */
+
+static short child_chan;/* I/O Channel for Pipe Mailbox */
+
+static exit_handler (status)
+int *status;
+{
+short iosb [4];
+
+ if (0 == child_st [0])
+ {
+#ifdef DEBUG
+ fprintf (stderr, "Waiting for Child Process to Finnish . . .\n");
+#endif
+ sys$qiow (0, child_chan, IO$_WRITEOF, iosb, 0, 0, 0, 0, 0, 0, 0, 0);
+ sys$dassgn (child_chan);
+ fclose (stdout);
+ sys$synch (0, child_st);
+ }
+}
+
+
+static sig_child (chan)
+int chan;
+{
+#ifdef DEBUG
+ fprintf (stderr, "Child Completion AST\n");
+#endif
+ if (child_st [0] == 0)
+ child_st [0] = 1;
+}
+
+static struct exit_control_block
+ {
+ struct exit_control_block *flink;
+ int (*exit_routine) ();
+ int arg_count;
+ int *status_address;
+ int exit_status;
+ } exit_block =
+ {
+ 0,
+ exit_handler,
+ 1,
+ &exit_block.exit_status,
+ 0
+ };
+
+static char *pipe_and_fork (cmd)
+char *cmd;
+{
+ $DESCRIPTOR (cmddsc, cmd);
+ static char mbxname [64];
+ $DESCRIPTOR (mbxdsc, mbxname);
+ short iosb [4];
+ int status;
+ int pid;
+ struct
+ {
+ short dna_buflen;
+ short dna_itmcod;
+ char *dna_buffer;
+ unsigned short *dna_retlen;
+ int listend;
+ } itmlst =
+ {
+ sizeof (mbxname),
+ DVI$_DEVNAM,
+ mbxname,
+ &mbxdsc.dsc$w_length,
+ 0
+ };
+ int mbxsize;
+ struct
+ {
+ short mbf_buflen;
+ short mbf_itmcod;
+ int *mbf_maxbuf;
+ unsigned short *mbf_retlen;
+ int listend;
+ } syiitmlst =
+ {
+ sizeof (mbxsize),
+ SYI$_MAXBUF,
+ &mbxsize,
+ 0,
+ 0
+ };
+
+ cmddsc.dsc$w_length = strlen (cmd);
+ /*
+ * Get the SYSGEN parameter MAXBUF, and the smaller of it and 2048 as
+ * the size of the 'pipe' mailbox.
+ */
+ if (1 == (1& (vaxc$errno = sys$getsyiw (0, 0, 0, &syiitmlst, iosb, 0, 0, 0))))
+ vaxc$errno = iosb [0];
+ if (0 == (1&vaxc$errno))
+ {
+ errno = EVMSERR;
+ perror ("Can't get SYSGEN parameter value for MAXBUF");
+ exit (EXIT_ERR);
+ }
+ if (mbxsize > 2048)
+ mbxsize = 2048;
+ if (0 == (1& (vaxc$errno = sys$crembx (0, &child_chan, mbxsize, mbxsize, 0, 0, 0))))
+ {
+ errno = EVMSERR;
+ perror ("Can't create pipe mailbox");
+ exit (EXIT_ERR);
+ }
+ if (1 == (1& (vaxc$errno = sys$getdviw (0, child_chan, 0, &itmlst, iosb,
+ 0, 0, 0))))
+ vaxc$errno = iosb [0];
+ if (0 == (1&vaxc$errno))
+ {
+ errno = EVMSERR;
+ perror ("Can't get pipe mailbox device name");
+ exit (EXIT_ERR);
+ }
+ mbxname [mbxdsc.dsc$w_length] = '\0';
+#ifdef DEBUG
+ fprintf (stderr, "Pipe Mailbox Name = '%s'\n", mbxname);
+#endif
+ if (0 == (1& (vaxc$errno = lib$spawn (&cmddsc, &mbxdsc, 0, &1,
+ 0, &pid, child_st, &0, sig_child,
+ &child_chan))))
+ {
+ errno = EVMSERR;
+ perror ("Can't spawn subprocess");
+ exit (EXIT_ERR);
+ }
+#ifdef DEBUG
+ fprintf (stderr, "Subprocess's Pid = %08X\n", pid);
+#endif
+ sys$dclexh (&exit_block);
+ return (mbxname);
+}
+
+background_process (argc, argv)
+int argc;
+char **argv;
+{
+char command [2048] = "$";
+$DESCRIPTOR (value, command);
+$DESCRIPTOR (cmd, "BACKGROUND$COMMAND");
+$DESCRIPTOR (null, "NLA0:");
+int pid;
+
+ strcat (command, argv [0]);
+ while (--argc)
+ {
+ strcat (command, " \"");
+ strcat (command, *(++argv));
+ strcat (command, "\"");
+ }
+ value.dsc$w_length = strlen (command);
+ if (0 == (1& (vaxc$errno = lib$set_symbol (&cmd, &value))))
+ {
+ errno = EVMSERR;
+ perror ("Can't create symbol for subprocess command");
+ exit (EXIT_ERR);
+ }
+ if (0 == (1& (vaxc$errno = lib$spawn (&cmd, &null, 0, &17, 0, &pid))))
+ {
+ errno = EVMSERR;
+ perror ("Can't spawn subprocess");
+ exit (EXIT_ERR);
+ }
+#ifdef DEBUG
+ fprintf (stderr, "%s\n", command);
+#endif
+ fprintf (stderr, "%08X\n", pid);
+ return (EXIT_OK);
+}
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/args.c b/args.c
new file mode 100644
index 0000000..a9336d2
--- /dev/null
+++ b/args.c
@@ -0,0 +1,274 @@
+/*
+* $Id: args.c 536 2007-06-02 06:09:00Z elliotth $
+*
+* Copyright (c) 1999-2002, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* This module contains functions for reading command line arguments.
+*/
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "args.h"
+#include "debug.h"
+#include "routines.h"
+
+/*
+* FUNCTION DEFINITIONS
+*/
+
+static char *nextStringArg (const char** const next)
+{
+ char* result = NULL;
+ const char* start;
+
+ Assert (*next != NULL);
+ for (start = *next ; isspace ((int) *start) ; ++start)
+ ;
+ if (*start == '\0')
+ *next = start;
+ else
+ {
+ size_t length;
+ const char* end;
+
+ for (end = start ; *end != '\0' && ! isspace ((int) *end) ; ++end)
+ ;
+ length = end - start;
+ Assert (length > 0);
+ result = xMalloc (length + 1, char);
+ strncpy (result, start, length);
+ result [length] = '\0';
+ *next = end;
+ }
+ return result;
+}
+
+static char* nextStringLine (const char** const next)
+{
+ char* result = NULL;
+ size_t length;
+ const char* end;
+
+ Assert (*next != NULL);
+ for (end = *next ; *end != '\n' && *end != '\0' ; ++end)
+ ;
+ length = end - *next;
+ if (length > 0)
+ {
+ result = xMalloc (length + 1, char);
+ strncpy (result, *next, length);
+ result [length] = '\0';
+ }
+ if (*end == '\n')
+ ++end;
+ else if (*end == '\r')
+ {
+ ++end;
+ if (*end == '\n')
+ ++end;
+ }
+ *next = end;
+ return result;
+}
+
+static char* nextString (const Arguments* const current, const char** const next)
+{
+ char* result;
+ if (current->lineMode)
+ result = nextStringLine (next);
+ else
+ result = nextStringArg (next);
+ return result;
+}
+
+static char* nextFileArg (FILE* const fp)
+{
+ char* result = NULL;
+ Assert (fp != NULL);
+ if (! feof (fp))
+ {
+ vString* vs = vStringNew ();
+ int c;
+ do
+ c = fgetc (fp);
+ while (isspace (c));
+
+ if (c != EOF)
+ {
+ do
+ {
+ vStringPut (vs, c);
+ c = fgetc (fp);
+ } while (c != EOF && ! isspace (c));
+ vStringTerminate (vs);
+ Assert (vStringLength (vs) > 0);
+ result = xMalloc (vStringLength (vs) + 1, char);
+ strcpy (result, vStringValue (vs));
+ }
+ vStringDelete (vs);
+ }
+ return result;
+}
+
+static char* nextFileLine (FILE* const fp)
+{
+ char* result = NULL;
+ if (! feof (fp))
+ {
+ vString* vs = vStringNew ();
+ int c;
+
+ Assert (fp != NULL);
+ c = fgetc (fp);
+ while (c != EOF)
+ {
+ if (c != '\n' && c != '\r')
+ vStringPut (vs, c);
+ else if (vStringLength (vs) > 0)
+ break;
+ c = fgetc (fp);
+ }
+ if (c != EOF || vStringLength (vs) > 0)
+ {
+ if (c == '\r')
+ {
+ c = fgetc (fp);
+ if (c != '\n')
+ c = ungetc (c, fp);
+ }
+ vStringTerminate (vs);
+ vStringStripTrailing (vs);
+ result = xMalloc (vStringLength (vs) + 1, char);
+ strcpy (result, vStringValue (vs));
+ }
+ vStringDelete (vs);
+ }
+ return result;
+}
+
+static char* nextFileString (const Arguments* const current, FILE* const fp)
+{
+ char* result;
+ if (current->lineMode)
+ result = nextFileLine (fp);
+ else
+ result = nextFileArg (fp);
+ return result;
+}
+
+extern Arguments* argNewFromString (const char* const string)
+{
+ Arguments* result = xMalloc (1, Arguments);
+ memset (result, 0, sizeof (Arguments));
+ result->type = ARG_STRING;
+ result->u.stringArgs.string = string;
+ result->u.stringArgs.item = string;
+ result->u.stringArgs.next = string;
+ result->item = nextString (result, &result->u.stringArgs.next);
+ return result;
+}
+
+extern Arguments* argNewFromArgv (char* const* const argv)
+{
+ Arguments* result = xMalloc (1, Arguments);
+ memset (result, 0, sizeof (Arguments));
+ result->type = ARG_ARGV;
+ result->u.argvArgs.argv = argv;
+ result->u.argvArgs.item = result->u.argvArgs.argv;
+ result->item = *result->u.argvArgs.item;
+ return result;
+}
+
+extern Arguments* argNewFromFile (FILE* const fp)
+{
+ Arguments* result = xMalloc (1, Arguments);
+ memset (result, 0, sizeof (Arguments));
+ result->type = ARG_FILE;
+ result->u.fileArgs.fp = fp;
+ result->item = nextFileString (result, result->u.fileArgs.fp);
+ return result;
+}
+
+extern Arguments* argNewFromLineFile (FILE* const fp)
+{
+ Arguments* result = xMalloc (1, Arguments);
+ memset (result, 0, sizeof (Arguments));
+ result->type = ARG_FILE;
+ result->lineMode = TRUE;
+ result->u.fileArgs.fp = fp;
+ result->item = nextFileString (result, result->u.fileArgs.fp);
+ return result;
+}
+
+extern char *argItem (const Arguments* const current)
+{
+ Assert (current != NULL);
+ Assert (! argOff (current));
+ return current->item;
+}
+
+extern boolean argOff (const Arguments* const current)
+{
+ Assert (current != NULL);
+ return (boolean) (current->item == NULL);
+}
+
+extern void argSetWordMode (Arguments* const current)
+{
+ Assert (current != NULL);
+ current->lineMode = FALSE;
+}
+
+extern void argSetLineMode (Arguments* const current)
+{
+ Assert (current != NULL);
+ current->lineMode = TRUE;
+}
+
+extern void argForth (Arguments* const current)
+{
+ Assert (current != NULL);
+ Assert (! argOff (current));
+ switch (current->type)
+ {
+ case ARG_STRING:
+ if (current->item != NULL)
+ eFree (current->item);
+ current->u.stringArgs.item = current->u.stringArgs.next;
+ current->item = nextString (current, &current->u.stringArgs.next);
+ break;
+ case ARG_ARGV:
+ ++current->u.argvArgs.item;
+ current->item = *current->u.argvArgs.item;
+ break;
+ case ARG_FILE:
+ if (current->item != NULL)
+ eFree (current->item);
+ current->item = nextFileString (current, current->u.fileArgs.fp);
+ break;
+ default:
+ Assert ("Invalid argument type" == NULL);
+ break;
+ }
+}
+
+extern void argDelete (Arguments* const current)
+{
+ Assert (current != NULL);
+ if (current->type == ARG_STRING && current->item != NULL)
+ eFree (current->item);
+ memset (current, 0, sizeof (Arguments));
+ eFree (current);
+}
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/args.h b/args.h
new file mode 100644
index 0000000..985a06c
--- /dev/null
+++ b/args.h
@@ -0,0 +1,63 @@
+/*
+* $Id: args.h 443 2006-05-30 04:37:13Z darren $
+*
+* Copyright (c) 1999-2002, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* Defines external interface to command line argument reading.
+*/
+#ifndef _ARGS_H
+#define _ARGS_H
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include <stdio.h>
+
+/*
+* DATA DECLARATIONS
+*/
+
+typedef enum { ARG_NONE, ARG_STRING, ARG_ARGV, ARG_FILE } argType;
+
+typedef struct sArgs {
+ argType type;
+ union {
+ struct sStringArgs {
+ const char* string;
+ const char* next;
+ const char* item;
+ } stringArgs;
+ struct sArgvArgs {
+ char* const* argv;
+ char* const* item;
+ } argvArgs;
+ struct sFileArgs {
+ FILE* fp;
+ } fileArgs;
+ } u;
+ char* item;
+ boolean lineMode;
+} Arguments;
+
+/*
+* FUNCTION PROTOTYPES
+*/
+extern Arguments* argNewFromString (const char* const string);
+extern Arguments* argNewFromArgv (char* const* const argv);
+extern Arguments* argNewFromFile (FILE* const fp);
+extern Arguments* argNewFromLineFile (FILE* const fp);
+extern char *argItem (const Arguments* const current);
+extern boolean argOff (const Arguments* const current);
+extern void argSetWordMode (Arguments* const current);
+extern void argSetLineMode (Arguments* const current);
+extern void argForth (Arguments* const current);
+extern void argDelete (Arguments* const current);
+
+#endif /* _ARGS_H */
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/asm.c b/asm.c
new file mode 100644
index 0000000..8c1ff2b
--- /dev/null
+++ b/asm.c
@@ -0,0 +1,387 @@
+/*
+* $Id: asm.c 536 2007-06-02 06:09:00Z elliotth $
+*
+* Copyright (c) 2000-2003, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* This module contains functions for generating tags for assembly language
+* files.
+*/
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include <string.h>
+
+#include "debug.h"
+#include "keyword.h"
+#include "parse.h"
+#include "read.h"
+#include "routines.h"
+#include "vstring.h"
+
+/*
+* DATA DECLARATIONS
+*/
+typedef enum {
+ K_NONE = -1, K_DEFINE, K_LABEL, K_MACRO, K_TYPE
+} AsmKind;
+
+typedef enum {
+ OP_UNDEFINED = -1,
+ OP_ALIGN,
+ OP_COLON_EQUAL,
+ OP_END,
+ OP_ENDM,
+ OP_ENDMACRO,
+ OP_ENDP,
+ OP_ENDS,
+ OP_EQU,
+ OP_EQUAL,
+ OP_LABEL,
+ OP_MACRO,
+ OP_PROC,
+ OP_RECORD,
+ OP_SECTIONS,
+ OP_SET,
+ OP_STRUCT,
+ OP_LAST
+} opKeyword;
+
+typedef struct {
+ const char *operator;
+ opKeyword keyword;
+} asmKeyword;
+
+typedef struct {
+ opKeyword keyword;
+ AsmKind kind;
+} opKind;
+
+/*
+* DATA DEFINITIONS
+*/
+static langType Lang_asm;
+
+static kindOption AsmKinds [] = {
+ { TRUE, 'd', "define", "defines" },
+ { TRUE, 'l', "label", "labels" },
+ { TRUE, 'm', "macro", "macros" },
+ { TRUE, 't', "type", "types (structs and records)" }
+};
+
+static const asmKeyword AsmKeywords [] = {
+ { "align", OP_ALIGN },
+ { "endmacro", OP_ENDMACRO },
+ { "endm", OP_ENDM },
+ { "end", OP_END },
+ { "endp", OP_ENDP },
+ { "ends", OP_ENDS },
+ { "equ", OP_EQU },
+ { "label", OP_LABEL },
+ { "macro", OP_MACRO },
+ { ":=", OP_COLON_EQUAL },
+ { "=", OP_EQUAL },
+ { "proc", OP_PROC },
+ { "record", OP_RECORD },
+ { "sections", OP_SECTIONS },
+ { "set", OP_SET },
+ { "struct", OP_STRUCT }
+};
+
+static const opKind OpKinds [] = {
+ /* must be ordered same as opKeyword enumeration */
+ { OP_ALIGN, K_NONE },
+ { OP_COLON_EQUAL, K_DEFINE },
+ { OP_END, K_NONE },
+ { OP_ENDM, K_NONE },
+ { OP_ENDMACRO, K_NONE },
+ { OP_ENDP, K_NONE },
+ { OP_ENDS, K_NONE },
+ { OP_EQU, K_DEFINE },
+ { OP_EQUAL, K_DEFINE },
+ { OP_LABEL, K_LABEL },
+ { OP_MACRO, K_MACRO },
+ { OP_PROC, K_LABEL },
+ { OP_RECORD, K_TYPE },
+ { OP_SECTIONS, K_NONE },
+ { OP_SET, K_DEFINE },
+ { OP_STRUCT, K_TYPE }
+};
+
+/*
+* FUNCTION DEFINITIONS
+*/
+static void buildAsmKeywordHash (void)
+{
+ const size_t count = sizeof (AsmKeywords) / sizeof (AsmKeywords [0]);
+ size_t i;
+ for (i = 0 ; i < count ; ++i)
+ {
+ const asmKeyword* const p = AsmKeywords + i;
+ addKeyword (p->operator, Lang_asm, (int) p->keyword);
+ }
+}
+
+static opKeyword analyzeOperator (const vString *const op)
+{
+ vString *keyword = vStringNew ();
+ opKeyword result;
+
+ vStringCopyToLower (keyword, op);
+ result = (opKeyword) lookupKeyword (vStringValue (keyword), Lang_asm);
+ vStringDelete (keyword);
+ return result;
+}
+
+static boolean isInitialSymbolCharacter (int c)
+{
+ return (boolean) (c != '\0' && (isalpha (c) || strchr ("_$", c) != NULL));
+}
+
+static boolean isSymbolCharacter (int c)
+{
+ /* '?' character is allowed in AMD 29K family */
+ return (boolean) (c != '\0' && (isalnum (c) || strchr ("_$?", c) != NULL));
+}
+
+static boolean readPreProc (const unsigned char *const line)
+{
+ boolean result;
+ const unsigned char *cp = line;
+ vString *name = vStringNew ();
+ while (isSymbolCharacter ((int) *cp))
+ {
+ vStringPut (name, *cp);
+ ++cp;
+ }
+ vStringTerminate (name);
+ result = (boolean) (strcmp (vStringValue (name), "define") == 0);
+ if (result)
+ {
+ while (isspace ((int) *cp))
+ ++cp;
+ vStringClear (name);
+ while (isSymbolCharacter ((int) *cp))
+ {
+ vStringPut (name, *cp);
+ ++cp;
+ }
+ vStringTerminate (name);
+ makeSimpleTag (name, AsmKinds, K_DEFINE);
+ }
+ vStringDelete (name);
+ return result;
+}
+
+static AsmKind operatorKind (
+ const vString *const operator,
+ boolean *const found)
+{
+ AsmKind result = K_NONE;
+ const opKeyword kw = analyzeOperator (operator);
+ *found = (boolean) (kw != OP_UNDEFINED);
+ if (*found)
+ {
+ result = OpKinds [kw].kind;
+ Assert (OpKinds [kw].keyword == kw);
+ }
+ return result;
+}
+
+/* We must check for "DB", "DB.L", "DCB.W" (68000)
+ */
+static boolean isDefineOperator (const vString *const operator)
+{
+ const unsigned char *const op =
+ (unsigned char*) vStringValue (operator);
+ const size_t length = vStringLength (operator);
+ const boolean result = (boolean) (length > 0 &&
+ toupper ((int) *op) == 'D' &&
+ (length == 2 ||
+ (length == 4 && (int) op [2] == '.') ||
+ (length == 5 && (int) op [3] == '.')));
+ return result;
+}
+
+static void makeAsmTag (
+ const vString *const name,
+ const vString *const operator,
+ const boolean labelCandidate,
+ const boolean nameFollows)
+{
+ if (vStringLength (name) > 0)
+ {
+ boolean found;
+ const AsmKind kind = operatorKind (operator, &found);
+ if (found)
+ {
+ if (kind != K_NONE)
+ makeSimpleTag (name, AsmKinds, kind);
+ }
+ else if (isDefineOperator (operator))
+ {
+ if (! nameFollows)
+ makeSimpleTag (name, AsmKinds, K_DEFINE);
+ }
+ else if (labelCandidate)
+ {
+ operatorKind (name, &found);
+ if (! found)
+ makeSimpleTag (name, AsmKinds, K_LABEL);
+ }
+ }
+}
+
+static const unsigned char *readSymbol (
+ const unsigned char *const start,
+ vString *const sym)
+{
+ const unsigned char *cp = start;
+ vStringClear (sym);
+ if (isInitialSymbolCharacter ((int) *cp))
+ {
+ while (isSymbolCharacter ((int) *cp))
+ {
+ vStringPut (sym, *cp);
+ ++cp;
+ }
+ vStringTerminate (sym);
+ }
+ return cp;
+}
+
+static const unsigned char *readOperator (
+ const unsigned char *const start,
+ vString *const operator)
+{
+ const unsigned char *cp = start;
+ vStringClear (operator);
+ while (*cp != '\0' && ! isspace ((int) *cp))
+ {
+ vStringPut (operator, *cp);
+ ++cp;
+ }
+ vStringTerminate (operator);
+ return cp;
+}
+
+static void findAsmTags (void)
+{
+ vString *name = vStringNew ();
+ vString *operator = vStringNew ();
+ const unsigned char *line;
+ boolean inCComment = FALSE;
+
+ while ((line = fileReadLine ()) != NULL)
+ {
+ const unsigned char *cp = line;
+ boolean labelCandidate = (boolean) (! isspace ((int) *cp));
+ boolean nameFollows = FALSE;
+ const boolean isComment = (boolean)
+ (*cp != '\0' && strchr (";*@", *cp) != NULL);
+
+ /* skip comments */
+ if (strncmp ((const char*) cp, "/*", (size_t) 2) == 0)
+ {
+ inCComment = TRUE;
+ cp += 2;
+ }
+ if (inCComment)
+ {
+ do
+ {
+ if (strncmp ((const char*) cp, "*/", (size_t) 2) == 0)
+ {
+ inCComment = FALSE;
+ cp += 2;
+ break;
+ }
+ ++cp;
+ } while (*cp != '\0');
+ }
+ if (isComment || inCComment)
+ continue;
+
+ /* read preprocessor defines */
+ if (*cp == '#')
+ {
+ ++cp;
+ readPreProc (cp);
+ continue;
+ }
+
+ /* skip white space */
+ while (isspace ((int) *cp))
+ ++cp;
+
+ /* read symbol */
+ cp = readSymbol (cp, name);
+ if (vStringLength (name) > 0 && *cp == ':')
+ {
+ labelCandidate = TRUE;
+ ++cp;
+ }
+
+ if (! isspace ((int) *cp) && *cp != '\0')
+ continue;
+
+ /* skip white space */
+ while (isspace ((int) *cp))
+ ++cp;
+
+ /* skip leading dot */
+#if 0
+ if (*cp == '.')
+ ++cp;
+#endif
+
+ cp = readOperator (cp, operator);
+
+ /* attempt second read of symbol */
+ if (vStringLength (name) == 0)
+ {
+ while (isspace ((int) *cp))
+ ++cp;
+ cp = readSymbol (cp, name);
+ nameFollows = TRUE;
+ }
+ makeAsmTag (name, operator, labelCandidate, nameFollows);
+ }
+ vStringDelete (name);
+ vStringDelete (operator);
+}
+
+static void initialize (const langType language)
+{
+ Lang_asm = language;
+ buildAsmKeywordHash ();
+}
+
+extern parserDefinition* AsmParser (void)
+{
+ static const char *const extensions [] = {
+ "asm", "ASM", "s", "S", NULL
+ };
+ static const char *const patterns [] = {
+ "*.A51",
+ "*.29[kK]",
+ "*.[68][68][kKsSxX]",
+ "*.[xX][68][68]",
+ NULL
+ };
+ parserDefinition* def = parserNew ("Asm");
+ def->kinds = AsmKinds;
+ def->kindCount = KIND_COUNT (AsmKinds);
+ def->extensions = extensions;
+ def->patterns = patterns;
+ def->parser = findAsmTags;
+ def->initialize = initialize;
+ return def;
+}
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/asp.c b/asp.c
new file mode 100644
index 0000000..7290ad8
--- /dev/null
+++ b/asp.c
@@ -0,0 +1,328 @@
+/*
+* $Id: asp.c 711 2009-07-04 16:52:11Z dhiebert $
+*
+* Copyright (c) 2000, Patrick Dehne <patrick@steidle.net>
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* This module contains functions for generating tags for the ASP (Active
+* Server Pages) web page scripting language.
+*/
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include <string.h>
+
+#include "parse.h"
+#include "read.h"
+#include "vstring.h"
+
+/*
+* DATA DEFINITIONS
+*/
+typedef enum {
+ K_CONST, K_CLASS, K_FUNCTION, K_SUB, K_DIM
+} aspKind;
+
+static kindOption AspKinds [] = {
+ { TRUE, 'd', "constant", "constants"},
+ { TRUE, 'c', "class", "classes"},
+ { TRUE, 'f', "function", "functions"},
+ { TRUE, 's', "subroutine", "subroutines"},
+ { TRUE, 'v', "variable", "variables"}
+};
+
+/*
+* FUNCTION DEFINITIONS
+*/
+
+static void findAspTags (void)
+{
+ vString *name = vStringNew ();
+ const unsigned char *line;
+
+ while ((line = fileReadLine ()) != NULL)
+ {
+ const unsigned char *cp = line;
+
+ while (*cp != '\0')
+ {
+ /* jump over whitespace */
+ while (isspace ((int)*cp))
+ cp++;
+
+ /* jump over strings */
+ if (*cp == '"')
+ {
+ cp++;
+ while (*cp!='"' && *cp!='\0')
+ cp++;
+ }
+
+ /* jump over comments */
+ else if (*cp == '\'')
+ break;
+
+ /* jump over end function/sub lines */
+ else if (strncasecmp ((const char*) cp, "end", (size_t) 3)== 0)
+ {
+ cp += 3;
+ if (isspace ((int)*cp))
+ {
+ while (isspace ((int)*cp))
+ ++cp;
+
+ if (strncasecmp ((const char*) cp, "function", (size_t) 8) == 0)
+ {
+ cp+=8;
+ break;
+ }
+
+ else if (strncasecmp ((const char*) cp, "sub", (size_t) 3) == 0)
+ {
+ cp+=3;
+ break;
+ }
+ }
+ }
+
+ /* jump over exit function/sub lines */
+ else if (strncasecmp ((const char*) cp, "exit", (size_t) 4)==0)
+ {
+ cp += 4;
+ if (isspace ((int) *cp))
+ {
+ while (isspace ((int) *cp))
+ ++cp;
+
+ if (strncasecmp ((const char*) cp, "function", (size_t) 8) == 0)
+ {
+ cp+=8;
+ break;
+ }
+
+ else if (strncasecmp ((const char*) cp, "sub", (size_t) 3) == 0)
+ {
+ cp+=3;
+ break;
+ }
+ }
+ }
+
+ /* class member? */
+ else if (strncasecmp ((const char*) cp, "public", (size_t) 6) == 0)
+ {
+ cp += 6;
+ if (isspace ((int) *cp))
+ {
+ while (isspace ((int) *cp))
+ ++cp;
+ if (strncasecmp ((const char*) cp, "function", (size_t) 8) == 0)
+ {
+ cp+=8;
+ while (isspace ((int) *cp))
+ ++cp;
+ while (isalnum ((int) *cp) || *cp == '_')
+ {
+ vStringPut (name, (int) *cp);
+ ++cp;
+ }
+ vStringTerminate (name);
+ makeSimpleTag (name, AspKinds, K_FUNCTION);
+ vStringClear (name);
+ }
+ else if (strncasecmp ((const char*) cp, "sub", (size_t) 3) == 0)
+ {
+ cp+=3;
+ while (isspace ((int) *cp))
+ ++cp;
+ while (isalnum ((int) *cp) || *cp == '_')
+ {
+ vStringPut (name, (int) *cp);
+ ++cp;
+ }
+ vStringTerminate (name);
+ makeSimpleTag (name, AspKinds, K_SUB);
+ vStringClear (name);
+ }
+ else {
+ while (isalnum ((int) *cp) || *cp == '_')
+ {
+ vStringPut (name, (int) *cp);
+ ++cp;
+ }
+ vStringTerminate (name);
+ makeSimpleTag (name, AspKinds, K_DIM);
+ vStringClear (name);
+ }
+ }
+ }
+ else if (strncasecmp ((const char*) cp, "private", (size_t) 7) == 0)
+ {
+ cp += 7;
+ if (isspace ((int) *cp))
+ {
+ while (isspace ((int) *cp))
+ ++cp;
+ if (strncasecmp ((const char*) cp, "function", (size_t) 8) == 0)
+ {
+ cp+=8;
+ while (isspace ((int) *cp))
+ ++cp;
+ while (isalnum ((int) *cp) || *cp == '_')
+ {
+ vStringPut (name, (int) *cp);
+ ++cp;
+ }
+ vStringTerminate (name);
+ makeSimpleTag (name, AspKinds, K_FUNCTION);
+ vStringClear (name);
+ }
+ else if (strncasecmp ((const char*) cp, "sub", (size_t) 3) == 0)
+ {
+ cp+=3;
+ while (isspace ((int) *cp))
+ ++cp;
+ while (isalnum ((int) *cp) || *cp == '_')
+ {
+ vStringPut (name, (int) *cp);
+ ++cp;
+ }
+ vStringTerminate (name);
+ makeSimpleTag (name, AspKinds, K_SUB);
+ vStringClear (name);
+ }
+ else {
+ while (isalnum ((int) *cp) || *cp == '_')
+ {
+ vStringPut (name, (int) *cp);
+ ++cp;
+ }
+ vStringTerminate (name);
+ makeSimpleTag (name, AspKinds, K_DIM);
+ vStringClear (name);
+ }
+ }
+ }
+
+ /* function? */
+ else if (strncasecmp ((const char*) cp, "function", (size_t) 8) == 0)
+ {
+ cp += 8;
+
+ if (isspace ((int) *cp))
+ {
+ while (isspace ((int) *cp))
+ ++cp;
+ while (isalnum ((int) *cp) || *cp == '_')
+ {
+ vStringPut (name, (int) *cp);
+ ++cp;
+ }
+ vStringTerminate (name);
+ makeSimpleTag (name, AspKinds, K_FUNCTION);
+ vStringClear (name);
+ }
+ }
+
+ /* sub? */
+ else if (strncasecmp ((const char*) cp, "sub", (size_t) 3) == 0)
+ {
+ cp += 3;
+ if (isspace ((int) *cp))
+ {
+ while (isspace ((int) *cp))
+ ++cp;
+ while (isalnum ((int) *cp) || *cp == '_')
+ {
+ vStringPut (name, (int) *cp);
+ ++cp;
+ }
+ vStringTerminate (name);
+ makeSimpleTag (name, AspKinds, K_SUB);
+ vStringClear (name);
+ }
+ }
+
+ /* dim variable? */
+ else if (strncasecmp ((const char*) cp, "dim", (size_t) 3) == 0)
+ {
+ cp += 3;
+ if (isspace ((int) *cp))
+ {
+ while (isspace ((int) *cp))
+ ++cp;
+ while (isalnum ((int) *cp) || *cp == '_')
+ {
+ vStringPut (name, (int) *cp);
+ ++cp;
+ }
+ vStringTerminate (name);
+ makeSimpleTag (name, AspKinds, K_DIM);
+ vStringClear (name);
+ }
+ }
+
+ /* class declaration? */
+ else if (strncasecmp ((const char*) cp, "class", (size_t) 5) == 0)
+ {
+ cp += 5;
+ if (isspace ((int) *cp))
+ {
+ while (isspace ((int) *cp))
+ ++cp;
+ while (isalnum ((int) *cp) || *cp == '_')
+ {
+ vStringPut (name, (int) *cp);
+ ++cp;
+ }
+ vStringTerminate (name);
+ makeSimpleTag (name, AspKinds, K_CLASS);
+ vStringClear (name);
+ }
+ }
+
+ /* const declaration? */
+ else if (strncasecmp ((const char*) cp, "const", (size_t) 5) == 0)
+ {
+ cp += 5;
+ if (isspace ((int) *cp))
+ {
+ while (isspace ((int) *cp))
+ ++cp;
+ while (isalnum ((int) *cp) || *cp == '_')
+ {
+ vStringPut (name, (int) *cp);
+ ++cp;
+ }
+ vStringTerminate (name);
+ makeSimpleTag (name, AspKinds, K_CONST);
+ vStringClear (name);
+ }
+ }
+
+ /* nothing relevant */
+ else if (*cp != '\0')
+ cp++;
+ }
+ }
+ vStringDelete (name);
+}
+
+extern parserDefinition* AspParser (void)
+{
+ static const char *const extensions [] = { "asp", "asa", NULL };
+ parserDefinition* def = parserNew ("Asp");
+ def->kinds = AspKinds;
+ def->kindCount = KIND_COUNT (AspKinds);
+ def->extensions = extensions;
+ def->parser = findAspTags;
+ return def;
+}
+
+/* vi:set tabstop=4 shiftwidth=4: */
+
diff --git a/awk.c b/awk.c
new file mode 100644
index 0000000..d825d6f
--- /dev/null
+++ b/awk.c
@@ -0,0 +1,81 @@
+/*
+* $Id: awk.c 443 2006-05-30 04:37:13Z darren $
+*
+* Copyright (c) 2000-2002, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* This module contains functions for generating tags for AWK functions.
+*/
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include <string.h>
+
+#include "parse.h"
+#include "read.h"
+#include "vstring.h"
+
+/*
+* DATA DEFINITIONS
+*/
+typedef enum eAwkKinds {
+ K_FUNCTION
+} awkKind;
+
+static kindOption AwkKinds [] = {
+ { TRUE, 'f', "function", "functions" }
+};
+
+/*
+* FUNCTION DEFINITIONS
+*/
+
+static void findAwkTags (void)
+{
+ vString *name = vStringNew ();
+ const unsigned char *line;
+
+ while ((line = fileReadLine ()) != NULL)
+ {
+ if (strncmp ((const char*) line, "function", (size_t) 8) == 0 &&
+ isspace ((int) line [8]))
+ {
+ const unsigned char *cp = line + 8;
+
+ while (isspace ((int) *cp))
+ ++cp;
+ while (isalnum ((int) *cp) || *cp == '_')
+ {
+ vStringPut (name, (int) *cp);
+ ++cp;
+ }
+ vStringTerminate (name);
+ while (isspace ((int) *cp))
+ ++cp;
+ if (*cp == '(')
+ makeSimpleTag (name, AwkKinds, K_FUNCTION);
+ vStringClear (name);
+ if (*cp != '\0')
+ ++cp;
+ }
+ }
+ vStringDelete (name);
+}
+
+extern parserDefinition* AwkParser ()
+{
+ static const char *const extensions [] = { "awk", "gawk", "mawk", NULL };
+ parserDefinition* def = parserNew ("Awk");
+ def->kinds = AwkKinds;
+ def->kindCount = KIND_COUNT (AwkKinds);
+ def->extensions = extensions;
+ def->parser = findAwkTags;
+ return def;
+}
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/basic.c b/basic.c
new file mode 100644
index 0000000..a117afa
--- /dev/null
+++ b/basic.c
@@ -0,0 +1,203 @@
+/*
+ * $Id:$
+ *
+ * Copyright (c) 2000-2006, Darren Hiebert, Elias Pschernig
+ *
+ * This source code is released for free distribution under the terms of the
+ * GNU General Public License.
+ *
+ * This module contains functions for generating tags for BlitzBasic
+ * (BlitzMax), PureBasic and FreeBasic language files. For now, this is kept
+ * quite simple - but feel free to ask for more things added any time -
+ * patches are of course most welcome.
+ */
+
+/*
+ * INCLUDE FILES
+ */
+#include "general.h" /* must always come first */
+
+#include <string.h>
+
+#include "options.h"
+#include "parse.h"
+#include "read.h"
+#include "routines.h"
+#include "vstring.h"
+
+/*
+ * DATA DEFINITIONS
+ */
+typedef enum {
+ K_CONST,
+ K_FUNCTION,
+ K_LABEL,
+ K_TYPE,
+ K_VARIABLE,
+ K_ENUM
+} BasicKind;
+
+typedef struct {
+ char const *token;
+ BasicKind kind;
+ int skip;
+} KeyWord;
+
+static kindOption BasicKinds[] = {
+ {TRUE, 'c', "constant", "constants"},
+ {TRUE, 'f', "function", "functions"},
+ {TRUE, 'l', "label", "labels"},
+ {TRUE, 't', "type", "types"},
+ {TRUE, 'v', "variable", "variables"},
+ {TRUE, 'g', "enum", "enumerations"}
+};
+
+static KeyWord blitzbasic_keywords[] = {
+ {"const", K_CONST, 0},
+ {"global", K_VARIABLE, 0},
+ {"dim", K_VARIABLE, 0},
+ {"function", K_FUNCTION, 0},
+ {"type", K_TYPE, 0},
+ {NULL, 0, 0}
+};
+
+static KeyWord purebasic_keywords[] = {
+ {"newlist", K_VARIABLE, 0},
+ {"global", K_VARIABLE, 0},
+ {"dim", K_VARIABLE, 0},
+ {"procedure", K_FUNCTION, 0},
+ {"interface", K_TYPE, 0},
+ {"structure", K_TYPE, 0},
+ {NULL, 0, 0}
+};
+
+static KeyWord freebasic_keywords[] = {
+ {"const", K_CONST, 0},
+ {"dim as", K_VARIABLE, 1},
+ {"dim", K_VARIABLE, 0},
+ {"common", K_VARIABLE, 0},
+ {"function", K_FUNCTION, 0},
+ {"sub", K_FUNCTION, 0},
+ {"private sub", K_FUNCTION, 0},
+ {"public sub", K_FUNCTION, 0},
+ {"private function", K_FUNCTION, 0},
+ {"public function", K_FUNCTION, 0},
+ {"type", K_TYPE, 0},
+ {"enum", K_ENUM, 0},
+ {NULL, 0, 0}
+};
+
+/*
+ * FUNCTION DEFINITIONS
+ */
+
+/* Match the name of a tag (function, variable, type, ...) starting at pos. */
+static char const *extract_name (char const *pos, vString * name)
+{
+ while (isspace (*pos))
+ pos++;
+ vStringClear (name);
+ for (; *pos && !isspace (*pos) && *pos != '(' && *pos != ','; pos++)
+ vStringPut (name, *pos);
+ vStringTerminate (name);
+ return pos;
+}
+
+/* Match a keyword starting at p (case insensitive). */
+static int match_keyword (const char *p, KeyWord const *kw)
+{
+ vString *name;
+ size_t i;
+ int j;
+ for (i = 0; i < strlen (kw->token); i++)
+ {
+ if (tolower (p[i]) != kw->token[i])
+ return 0;
+ }
+ name = vStringNew ();
+ p += i;
+ for (j = 0; j < 1 + kw->skip; j++)
+ {
+ p = extract_name (p, name);
+ }
+ makeSimpleTag (name, BasicKinds, kw->kind);
+ vStringDelete (name);
+ return 1;
+}
+
+/* Match a "label:" style label. */
+static void match_colon_label (char const *p)
+{
+ char const *end = p + strlen (p) - 1;
+ while (isspace (*end))
+ end--;
+ if (*end == ':')
+ {
+ vString *name = vStringNew ();
+ vStringNCatS (name, p, end - p);
+ makeSimpleTag (name, BasicKinds, K_LABEL);
+ vStringDelete (name);
+ }
+}
+
+/* Match a ".label" style label. */
+static void match_dot_label (char const *p)
+{
+ if (*p == '.')
+ {
+ vString *name = vStringNew ();
+ extract_name (p + 1, name);
+ makeSimpleTag (name, BasicKinds, K_LABEL);
+ vStringDelete (name);
+ }
+}
+
+static void findBasicTags (void)
+{
+ const char *line;
+ const char *extension = fileExtension (vStringValue (File.name));
+ KeyWord *keywords;
+
+ if (strcmp (extension, "bb") == 0)
+ keywords = blitzbasic_keywords;
+ else if (strcmp (extension, "pb") == 0)
+ keywords = purebasic_keywords;
+ else
+ keywords = freebasic_keywords;
+
+ while ((line = (const char *) fileReadLine ()) != NULL)
+ {
+ const char *p = line;
+ KeyWord const *kw;
+
+ while (isspace (*p))
+ p++;
+
+ /* Empty line? */
+ if (!*p)
+ continue;
+
+ /* In Basic, keywords always are at the start of the line. */
+ for (kw = keywords; kw->token; kw++)
+ if (match_keyword (p, kw)) break;
+
+ /* Is it a label? */
+ if (strcmp (extension, "bb") == 0)
+ match_dot_label (p);
+ else
+ match_colon_label (p);
+ }
+}
+
+parserDefinition *BasicParser (void)
+{
+ static char const *extensions[] = { "bas", "bi", "bb", "pb", NULL };
+ parserDefinition *def = parserNew ("Basic");
+ def->kinds = BasicKinds;
+ def->kindCount = KIND_COUNT (BasicKinds);
+ def->extensions = extensions;
+ def->parser = findBasicTags;
+ return def;
+}
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/beta.c b/beta.c
new file mode 100644
index 0000000..da195a1
--- /dev/null
+++ b/beta.c
@@ -0,0 +1,321 @@
+/*
+* $Id: beta.c 536 2007-06-02 06:09:00Z elliotth $
+*
+* Copyright (c) 1999-2000, Mjølner Informatics
+*
+* Written by Erik Corry <corry@mjolner.dk>
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* This module contains functions for generating tags for BETA language
+* files.
+*/
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include <string.h>
+
+#include "entry.h"
+#include "parse.h"
+#include "read.h"
+#include "routines.h"
+#include "vstring.h"
+
+/*
+* MACROS
+*/
+#define isbident(c) (identarray [(unsigned char) (c)])
+
+/*
+* DATA DEFINITIONS
+*/
+typedef enum {
+ K_FRAGMENT, K_PATTERN, K_SLOT, K_VIRTUAL
+} betaKind;
+
+static kindOption BetaKinds [] = {
+ { TRUE, 'f', "fragment", "fragment definitions"},
+ { FALSE, 'p', "pattern", "all patterns"},
+ { TRUE, 's', "slot", "slots (fragment uses)"},
+ { TRUE, 'v', "virtual", "patterns (virtual or rebound)"}
+};
+
+/* [A-Z_a-z0-9] */
+static const char identarray [256] = {
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0-15 */
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16-31 */
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 32-47 !"#$%&'()*+'-./ */
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 48-63 0123456789:;<=>? */
+0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 64-79 @ABCDEFGHIJKLMNO */
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 80-95 PQRSTUVWXYZ [\]^_ */
+0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 96-111 `abcdefghijklmno */
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 112-127 pqrstuvwxyz{|}~ */
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 128- */
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* -255 */
+
+/*
+* FUNCTION DEFINITIONS
+*/
+
+static void makeBetaTag (const char* const name, const betaKind kind)
+{
+ if (BetaKinds [kind].enabled)
+ {
+ tagEntryInfo e;
+ initTagEntry (&e, name);
+ e.kindName = BetaKinds [kind].name;
+ e.kind = BetaKinds [kind].letter;
+ makeTagEntry (&e);
+ }
+}
+
+static void findBetaTags (void)
+{
+ vString *line = vStringNew ();
+ boolean incomment = FALSE;
+ boolean inquote = FALSE;
+ boolean dovirtuals = BetaKinds [K_VIRTUAL].enabled;
+ boolean dopatterns = BetaKinds [K_PATTERN].enabled;
+
+ do
+ {
+ boolean foundfragmenthere = FALSE;
+ /* find fragment definition (line that starts and ends with --) */
+ int last;
+ int first;
+ int c;
+
+ vStringClear (line);
+
+ while ((c = fileGetc ()) != EOF && c != '\n' && c != '\r')
+ vStringPut (line, c);
+
+ vStringTerminate (line);
+
+ last = vStringLength (line) - 1;
+ first = 0;
+ /* skip white space at start and end of line */
+ while (last && isspace ((int) vStringChar (line, last))) last--;
+ while (first < last && isspace ((int) vStringChar (line, first))) first++;
+ /* if line still has a reasonable length and ... */
+ if (last - first > 4 &&
+ (vStringChar (line, first) == '-' &&
+ vStringChar (line, first + 1) == '-' &&
+ vStringChar (line, last) == '-' &&
+ vStringChar (line, last - 1) == '-'))
+ {
+ if (!incomment && !inquote)
+ {
+ foundfragmenthere = TRUE;
+ /* skip past -- and whitespace. Also skip back past 'dopart'
+ or 'attributes' to the :. We have to do this because there
+ is no sensible way to include whitespace in a ctags token
+ so the conventional space after the ':' would mess us up */
+ last -= 2;
+ first += 2;
+ while (last && vStringChar (line, last) != ':') last--;
+ while (last && (isspace ((int) vStringChar (line, last-1)))) last--;
+ while (first < last &&
+ (isspace ((int) vStringChar (line, first)) ||
+ vStringChar (line, first) == '-'))
+ first++;
+ /* If there's anything left it is a fragment title */
+ if (first < last - 1)
+ {
+ vStringChar (line, last) = 0;
+ if (strcasecmp ("LIB", vStringValue (line) + first) &&
+ strcasecmp ("PROGRAM", vStringValue (line) + first))
+ {
+ makeBetaTag (vStringValue (line) + first, K_FRAGMENT);
+ }
+ }
+ }
+ } else {
+ int pos = 0;
+ int len = vStringLength (line);
+ if (inquote) goto stringtext;
+ if (incomment) goto commenttext;
+ programtext:
+ for ( ; pos < len; pos++)
+ {
+ if (vStringChar (line, pos) == '\'')
+ {
+ pos++;
+ inquote = TRUE;
+ goto stringtext;
+ }
+ if (vStringChar (line, pos) == '{')
+ {
+ pos++;
+ incomment = TRUE;
+ goto commenttext;
+ }
+ if (vStringChar (line, pos) == '(' && pos < len - 1 &&
+ vStringChar (line, pos+1) == '*')
+ {
+ pos +=2;
+ incomment = TRUE;
+ goto commenttext;
+ }
+ /*
+ * SLOT definition looks like this:
+ * <<SLOT nameofslot: dopart>>
+ * or
+ * <<SLOT nameofslot: descriptor>>
+ */
+ if (!foundfragmenthere &&
+ vStringChar (line, pos) == '<' &&
+ pos+1 < len &&
+ vStringChar (line, pos+1) == '<' &&
+ strstr (vStringValue (line) + pos, ">>"))
+ {
+ /* Found slot name, get start and end */
+ int eoname;
+ char c2;
+ pos += 2; /* skip past << */
+ /* skip past space before SLOT */
+ while (pos < len && isspace ((int) vStringChar (line, pos)))
+ pos++;
+ /* skip past SLOT */
+ if (pos+4 <= len &&
+ !strncasecmp (vStringValue(line) + pos, "SLOT", (size_t)4))
+ pos += 4;
+ /* skip past space after SLOT */
+ while (pos < len && isspace ((int) vStringChar (line, pos)))
+ pos++;
+ eoname = pos;
+ /* skip to end of name */
+ while (eoname < len &&
+ (c2 = vStringChar (line, eoname)) != '>' &&
+ c2 != ':' &&
+ !isspace ((int) c2))
+ eoname++;
+ if (eoname < len)
+ {
+ vStringChar (line, eoname) = 0;
+ if (strcasecmp ("LIB", vStringValue (line) + pos) &&
+ strcasecmp ("PROGRAM", vStringValue (line) + pos) &&
+ strcasecmp ("SLOT", vStringValue (line) + pos))
+ {
+ makeBetaTag (vStringValue (line) + pos, K_SLOT);
+ }
+ }
+ if (eoname+1 < len) {
+ pos = eoname + 1;
+ } else {
+ pos = len;
+ continue;
+ }
+ }
+ /* Only patterns that are virtual, extensions of virtuals or
+ * final bindings are normally included so as not to overload
+ * totally.
+ * That means one of the forms name:: name:< or name::<
+ */
+ if (!foundfragmenthere &&
+ vStringChar (line, pos) == ':' &&
+ (dopatterns ||
+ (dovirtuals &&
+ (vStringChar (line, pos+1) == ':' ||
+ vStringChar (line, pos+1) == '<')
+ )
+ )
+ )
+ {
+ /* Found pattern name, get start and end */
+ int eoname = pos;
+ int soname;
+ while (eoname && isspace ((int) vStringChar (line, eoname-1)))
+ eoname--;
+ foundanothername:
+ /* terminate right after name */
+ vStringChar (line, eoname) = 0;
+ soname = eoname;
+ while (soname &&
+ isbident (vStringChar (line, soname-1)))
+ {
+ soname--;
+ }
+ if (soname != eoname)
+ {
+ makeBetaTag (vStringValue (line) + soname, K_PATTERN);
+ /* scan back past white space */
+ while (soname &&
+ isspace ((int) vStringChar (line, soname-1)))
+ soname--;
+ if (soname && vStringChar (line, soname-1) == ',')
+ {
+ /* we found a new pattern name before comma */
+ eoname = soname;
+ goto foundanothername;
+ }
+ }
+ }
+ }
+ goto endofline;
+ commenttext:
+ for ( ; pos < len; pos++)
+ {
+ if (vStringChar (line, pos) == '*' && pos < len - 1 &&
+ vStringChar (line, pos+1) == ')')
+ {
+ pos += 2;
+ incomment = FALSE;
+ goto programtext;
+ }
+ if (vStringChar (line, pos) == '}')
+ {
+ pos++;
+ incomment = FALSE;
+ goto programtext;
+ }
+ }
+ goto endofline;
+ stringtext:
+ for ( ; pos < len; pos++)
+ {
+ if (vStringChar (line, pos) == '\\')
+ {
+ if (pos < len - 1) pos++;
+ }
+ else if (vStringChar (line, pos) == '\'')
+ {
+ pos++;
+ /* support obsolete '' syntax */
+ if (pos < len && vStringChar (line, pos) == '\'')
+ {
+ continue;
+ }
+ inquote = FALSE;
+ goto programtext;
+ }
+ }
+ }
+ endofline:
+ inquote = FALSE; /* This shouldn't really make a difference */
+ } while (!feof (File.fp));
+ vStringDelete (line);
+}
+
+extern parserDefinition* BetaParser (void)
+{
+ static const char *const extensions [] = { "bet", NULL };
+ parserDefinition* def = parserNew ("BETA");
+ def->kinds = BetaKinds;
+ def->kindCount = KIND_COUNT (BetaKinds);
+ def->extensions = extensions;
+ def->parser = findBetaTags;
+ return def;
+}
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/c.c b/c.c
new file mode 100644
index 0000000..0cf0a14
--- /dev/null
+++ b/c.c
@@ -0,0 +1,2932 @@
+/*
+* $Id: c.c 689 2008-12-13 21:17:36Z elliotth $
+*
+* Copyright (c) 1996-2003, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* This module contains functions for parsing and scanning C, C++ and Java
+* source files.
+*/
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include <string.h>
+#include <setjmp.h>
+
+#include "debug.h"
+#include "entry.h"
+#include "get.h"
+#include "keyword.h"
+#include "options.h"
+#include "parse.h"
+#include "read.h"
+#include "routines.h"
+
+/*
+* MACROS
+*/
+
+#define activeToken(st) ((st)->token [(int) (st)->tokenIndex])
+#define parentDecl(st) ((st)->parent == NULL ? \
+ DECL_NONE : (st)->parent->declaration)
+#define isType(token,t) (boolean) ((token)->type == (t))
+#define insideEnumBody(st) ((st)->parent == NULL ? FALSE : \
+ (boolean) ((st)->parent->declaration == DECL_ENUM))
+#define isExternCDecl(st,c) (boolean) ((c) == STRING_SYMBOL && \
+ ! (st)->haveQualifyingName && (st)->scope == SCOPE_EXTERN)
+
+#define isOneOf(c,s) (boolean) (strchr ((s), (c)) != NULL)
+
+#define isHighChar(c) ((c) != EOF && (unsigned char)(c) >= 0xc0)
+
+/*
+* DATA DECLARATIONS
+*/
+
+enum { NumTokens = 3 };
+
+typedef enum eException {
+ ExceptionNone, ExceptionEOF, ExceptionFormattingError,
+ ExceptionBraceFormattingError
+} exception_t;
+
+/* Used to specify type of keyword.
+ */
+typedef enum eKeywordId {
+ KEYWORD_NONE = -1,
+ KEYWORD_ATTRIBUTE, KEYWORD_ABSTRACT,
+ KEYWORD_BOOLEAN, KEYWORD_BYTE, KEYWORD_BAD_STATE, KEYWORD_BAD_TRANS,
+ KEYWORD_BIND, KEYWORD_BIND_VAR, KEYWORD_BIT,
+ KEYWORD_CASE, KEYWORD_CATCH, KEYWORD_CHAR, KEYWORD_CLASS, KEYWORD_CONST,
+ KEYWORD_CONSTRAINT, KEYWORD_COVERAGE_BLOCK, KEYWORD_COVERAGE_DEF,
+ KEYWORD_DEFAULT, KEYWORD_DELEGATE, KEYWORD_DELETE, KEYWORD_DO,
+ KEYWORD_DOUBLE,
+ KEYWORD_ELSE, KEYWORD_ENUM, KEYWORD_EXPLICIT, KEYWORD_EXTERN,
+ KEYWORD_EXTENDS, KEYWORD_EVENT,
+ KEYWORD_FINAL, KEYWORD_FLOAT, KEYWORD_FOR, KEYWORD_FOREACH,
+ KEYWORD_FRIEND, KEYWORD_FUNCTION,
+ KEYWORD_GOTO,
+ KEYWORD_IF, KEYWORD_IMPLEMENTS, KEYWORD_IMPORT, KEYWORD_INLINE, KEYWORD_INT,
+ KEYWORD_INOUT, KEYWORD_INPUT, KEYWORD_INTEGER, KEYWORD_INTERFACE,
+ KEYWORD_INTERNAL,
+ KEYWORD_LOCAL, KEYWORD_LONG,
+ KEYWORD_M_BAD_STATE, KEYWORD_M_BAD_TRANS, KEYWORD_M_STATE, KEYWORD_M_TRANS,
+ KEYWORD_MUTABLE,
+ KEYWORD_NAMESPACE, KEYWORD_NEW, KEYWORD_NEWCOV, KEYWORD_NATIVE,
+ KEYWORD_OPERATOR, KEYWORD_OUTPUT, KEYWORD_OVERLOAD, KEYWORD_OVERRIDE,
+ KEYWORD_PACKED, KEYWORD_PORT, KEYWORD_PACKAGE, KEYWORD_PRIVATE,
+ KEYWORD_PROGRAM, KEYWORD_PROTECTED, KEYWORD_PUBLIC,
+ KEYWORD_REGISTER, KEYWORD_RETURN,
+ KEYWORD_SHADOW, KEYWORD_STATE,
+ KEYWORD_SHORT, KEYWORD_SIGNED, KEYWORD_STATIC, KEYWORD_STRING,
+ KEYWORD_STRUCT, KEYWORD_SWITCH, KEYWORD_SYNCHRONIZED,
+ KEYWORD_TASK, KEYWORD_TEMPLATE, KEYWORD_THIS, KEYWORD_THROW,
+ KEYWORD_THROWS, KEYWORD_TRANSIENT, KEYWORD_TRANS, KEYWORD_TRANSITION,
+ KEYWORD_TRY, KEYWORD_TYPEDEF, KEYWORD_TYPENAME,
+ KEYWORD_UINT, KEYWORD_ULONG, KEYWORD_UNION, KEYWORD_UNSIGNED, KEYWORD_USHORT,
+ KEYWORD_USING,
+ KEYWORD_VIRTUAL, KEYWORD_VOID, KEYWORD_VOLATILE,
+ KEYWORD_WCHAR_T, KEYWORD_WHILE
+} keywordId;
+
+/* Used to determine whether keyword is valid for the current language and
+ * what its ID is.
+ */
+typedef struct sKeywordDesc {
+ const char *name;
+ keywordId id;
+ short isValid [5]; /* indicates languages for which kw is valid */
+} keywordDesc;
+
+/* Used for reporting the type of object parsed by nextToken ().
+ */
+typedef enum eTokenType {
+ TOKEN_NONE, /* none */
+ TOKEN_ARGS, /* a parenthetical pair and its contents */
+ TOKEN_BRACE_CLOSE,
+ TOKEN_BRACE_OPEN,
+ TOKEN_COLON, /* the colon character */
+ TOKEN_COMMA, /* the comma character */
+ TOKEN_DOUBLE_COLON, /* double colon indicates nested-name-specifier */
+ TOKEN_KEYWORD,
+ TOKEN_NAME, /* an unknown name */
+ TOKEN_PACKAGE, /* a Java package name */
+ TOKEN_PAREN_NAME, /* a single name in parentheses */
+ TOKEN_SEMICOLON, /* the semicolon character */
+ TOKEN_SPEC, /* a storage class specifier, qualifier, type, etc. */
+ TOKEN_COUNT
+} tokenType;
+
+/* This describes the scoping of the current statement.
+ */
+typedef enum eTagScope {
+ SCOPE_GLOBAL, /* no storage class specified */
+ SCOPE_STATIC, /* static storage class */
+ SCOPE_EXTERN, /* external storage class */
+ SCOPE_FRIEND, /* declares access only */
+ SCOPE_TYPEDEF, /* scoping depends upon context */
+ SCOPE_COUNT
+} tagScope;
+
+typedef enum eDeclaration {
+ DECL_NONE,
+ DECL_BASE, /* base type (default) */
+ DECL_CLASS,
+ DECL_ENUM,
+ DECL_EVENT,
+ DECL_FUNCTION,
+ DECL_IGNORE, /* non-taggable "declaration" */
+ DECL_INTERFACE,
+ DECL_NAMESPACE,
+ DECL_NOMANGLE, /* C++ name demangling block */
+ DECL_PACKAGE,
+ DECL_PROGRAM, /* Vera program */
+ DECL_STRUCT,
+ DECL_TASK, /* Vera task */
+ DECL_UNION,
+ DECL_COUNT
+} declType;
+
+typedef enum eVisibilityType {
+ ACCESS_UNDEFINED,
+ ACCESS_LOCAL,
+ ACCESS_PRIVATE,
+ ACCESS_PROTECTED,
+ ACCESS_PUBLIC,
+ ACCESS_DEFAULT, /* Java-specific */
+ ACCESS_COUNT
+} accessType;
+
+/* Information about the parent class of a member (if any).
+ */
+typedef struct sMemberInfo {
+ accessType access; /* access of current statement */
+ accessType accessDefault; /* access default for current statement */
+} memberInfo;
+
+typedef struct sTokenInfo {
+ tokenType type;
+ keywordId keyword;
+ vString* name; /* the name of the token */
+ unsigned long lineNumber; /* line number of tag */
+ fpos_t filePosition; /* file position of line containing name */
+} tokenInfo;
+
+typedef enum eImplementation {
+ IMP_DEFAULT,
+ IMP_ABSTRACT,
+ IMP_VIRTUAL,
+ IMP_PURE_VIRTUAL,
+ IMP_COUNT
+} impType;
+
+/* Describes the statement currently undergoing analysis.
+ */
+typedef struct sStatementInfo {
+ tagScope scope;
+ declType declaration; /* specifier associated with TOKEN_SPEC */
+ boolean gotName; /* was a name parsed yet? */
+ boolean haveQualifyingName; /* do we have a name we are considering? */
+ boolean gotParenName; /* was a name inside parentheses parsed yet? */
+ boolean gotArgs; /* was a list of parameters parsed yet? */
+ boolean isPointer; /* is 'name' a pointer? */
+ boolean inFunction; /* are we inside of a function? */
+ boolean assignment; /* have we handled an '='? */
+ boolean notVariable; /* has a variable declaration been disqualified ? */
+ impType implementation; /* abstract or concrete implementation? */
+ unsigned int tokenIndex; /* currently active token */
+ tokenInfo* token [(int) NumTokens];
+ tokenInfo* context; /* accumulated scope of current statement */
+ tokenInfo* blockName; /* name of current block */
+ memberInfo member; /* information regarding parent class/struct */
+ vString* parentClasses; /* parent classes */
+ struct sStatementInfo *parent; /* statement we are nested within */
+} statementInfo;
+
+/* Describes the type of tag being generated.
+ */
+typedef enum eTagType {
+ TAG_UNDEFINED,
+ TAG_CLASS, /* class name */
+ TAG_ENUM, /* enumeration name */
+ TAG_ENUMERATOR, /* enumerator (enumeration value) */
+ TAG_EVENT, /* event */
+ TAG_FIELD, /* field (Java) */
+ TAG_FUNCTION, /* function definition */
+ TAG_INTERFACE, /* interface declaration */
+ TAG_LOCAL, /* local variable definition */
+ TAG_MEMBER, /* structure, class or interface member */
+ TAG_METHOD, /* method declaration */
+ TAG_NAMESPACE, /* namespace name */
+ TAG_PACKAGE, /* package name */
+ TAG_PROGRAM, /* program name */
+ TAG_PROPERTY, /* property name */
+ TAG_PROTOTYPE, /* function prototype or declaration */
+ TAG_STRUCT, /* structure name */
+ TAG_TASK, /* task name */
+ TAG_TYPEDEF, /* typedef name */
+ TAG_UNION, /* union name */
+ TAG_VARIABLE, /* variable definition */
+ TAG_EXTERN_VAR, /* external variable declaration */
+ TAG_COUNT /* must be last */
+} tagType;
+
+typedef struct sParenInfo {
+ boolean isPointer;
+ boolean isParamList;
+ boolean isKnrParamList;
+ boolean isNameCandidate;
+ boolean invalidContents;
+ boolean nestedArgs;
+ unsigned int parameterCount;
+} parenInfo;
+
+/*
+* DATA DEFINITIONS
+*/
+
+static jmp_buf Exception;
+
+static langType Lang_c;
+static langType Lang_cpp;
+static langType Lang_csharp;
+static langType Lang_java;
+static langType Lang_vera;
+static vString *Signature;
+static boolean CollectingSignature;
+
+/* Number used to uniquely identify anonymous structs and unions. */
+static int AnonymousID = 0;
+
+/* Used to index into the CKinds table. */
+typedef enum {
+ CK_UNDEFINED = -1,
+ CK_CLASS, CK_DEFINE, CK_ENUMERATOR, CK_FUNCTION,
+ CK_ENUMERATION, CK_LOCAL, CK_MEMBER, CK_NAMESPACE, CK_PROTOTYPE,
+ CK_STRUCT, CK_TYPEDEF, CK_UNION, CK_VARIABLE,
+ CK_EXTERN_VARIABLE
+} cKind;
+
+static kindOption CKinds [] = {
+ { TRUE, 'c', "class", "classes"},
+ { TRUE, 'd', "macro", "macro definitions"},
+ { TRUE, 'e', "enumerator", "enumerators (values inside an enumeration)"},
+ { TRUE, 'f', "function", "function definitions"},
+ { TRUE, 'g', "enum", "enumeration names"},
+ { FALSE, 'l', "local", "local variables"},
+ { TRUE, 'm', "member", "class, struct, and union members"},
+ { TRUE, 'n', "namespace", "namespaces"},
+ { FALSE, 'p', "prototype", "function prototypes"},
+ { TRUE, 's', "struct", "structure names"},
+ { TRUE, 't', "typedef", "typedefs"},
+ { TRUE, 'u', "union", "union names"},
+ { TRUE, 'v', "variable", "variable definitions"},
+ { FALSE, 'x', "externvar", "external and forward variable declarations"},
+};
+
+typedef enum {
+ CSK_UNDEFINED = -1,
+ CSK_CLASS, CSK_DEFINE, CSK_ENUMERATOR, CSK_EVENT, CSK_FIELD,
+ CSK_ENUMERATION, CSK_INTERFACE, CSK_LOCAL, CSK_METHOD,
+ CSK_NAMESPACE, CSK_PROPERTY, CSK_STRUCT, CSK_TYPEDEF
+} csharpKind;
+
+static kindOption CsharpKinds [] = {
+ { TRUE, 'c', "class", "classes"},
+ { TRUE, 'd', "macro", "macro definitions"},
+ { TRUE, 'e', "enumerator", "enumerators (values inside an enumeration)"},
+ { TRUE, 'E', "event", "events"},
+ { TRUE, 'f', "field", "fields"},
+ { TRUE, 'g', "enum", "enumeration names"},
+ { TRUE, 'i', "interface", "interfaces"},
+ { FALSE, 'l', "local", "local variables"},
+ { TRUE, 'm', "method", "methods"},
+ { TRUE, 'n', "namespace", "namespaces"},
+ { TRUE, 'p', "property", "properties"},
+ { TRUE, 's', "struct", "structure names"},
+ { TRUE, 't', "typedef", "typedefs"},
+};
+
+/* Used to index into the JavaKinds table. */
+typedef enum {
+ JK_UNDEFINED = -1,
+ JK_CLASS, JK_ENUM_CONSTANT, JK_FIELD, JK_ENUM, JK_INTERFACE,
+ JK_LOCAL, JK_METHOD, JK_PACKAGE, JK_ACCESS, JK_CLASS_PREFIX
+} javaKind;
+
+static kindOption JavaKinds [] = {
+ { TRUE, 'c', "class", "classes"},
+ { TRUE, 'e', "enum constant", "enum constants"},
+ { TRUE, 'f', "field", "fields"},
+ { TRUE, 'g', "enum", "enum types"},
+ { TRUE, 'i', "interface", "interfaces"},
+ { FALSE, 'l', "local", "local variables"},
+ { TRUE, 'm', "method", "methods"},
+ { TRUE, 'p', "package", "packages"},
+};
+
+/* Used to index into the VeraKinds table. */
+typedef enum {
+ VK_UNDEFINED = -1,
+ VK_CLASS, VK_DEFINE, VK_ENUMERATOR, VK_FUNCTION,
+ VK_ENUMERATION, VK_LOCAL, VK_MEMBER, VK_PROGRAM, VK_PROTOTYPE,
+ VK_TASK, VK_TYPEDEF, VK_VARIABLE,
+ VK_EXTERN_VARIABLE
+} veraKind;
+
+static kindOption VeraKinds [] = {
+ { TRUE, 'c', "class", "classes"},
+ { TRUE, 'd', "macro", "macro definitions"},
+ { TRUE, 'e', "enumerator", "enumerators (values inside an enumeration)"},
+ { TRUE, 'f', "function", "function definitions"},
+ { TRUE, 'g', "enum", "enumeration names"},
+ { FALSE, 'l', "local", "local variables"},
+ { TRUE, 'm', "member", "class, struct, and union members"},
+ { TRUE, 'p', "program", "programs"},
+ { FALSE, 'P', "prototype", "function prototypes"},
+ { TRUE, 't', "task", "tasks"},
+ { TRUE, 'T', "typedef", "typedefs"},
+ { TRUE, 'v', "variable", "variable definitions"},
+ { FALSE, 'x', "externvar", "external variable declarations"}
+};
+
+static const keywordDesc KeywordTable [] = {
+ /* C++ */
+ /* ANSI C | C# Java */
+ /* | | | | Vera */
+ /* keyword keyword ID | | | | | */
+ { "__attribute__", KEYWORD_ATTRIBUTE, { 1, 1, 1, 0, 0 } },
+ { "abstract", KEYWORD_ABSTRACT, { 0, 0, 1, 1, 0 } },
+ { "bad_state", KEYWORD_BAD_STATE, { 0, 0, 0, 0, 1 } },
+ { "bad_trans", KEYWORD_BAD_TRANS, { 0, 0, 0, 0, 1 } },
+ { "bind", KEYWORD_BIND, { 0, 0, 0, 0, 1 } },
+ { "bind_var", KEYWORD_BIND_VAR, { 0, 0, 0, 0, 1 } },
+ { "bit", KEYWORD_BIT, { 0, 0, 0, 0, 1 } },
+ { "boolean", KEYWORD_BOOLEAN, { 0, 0, 0, 1, 0 } },
+ { "byte", KEYWORD_BYTE, { 0, 0, 0, 1, 0 } },
+ { "case", KEYWORD_CASE, { 1, 1, 1, 1, 0 } },
+ { "catch", KEYWORD_CATCH, { 0, 1, 1, 0, 0 } },
+ { "char", KEYWORD_CHAR, { 1, 1, 1, 1, 0 } },
+ { "class", KEYWORD_CLASS, { 0, 1, 1, 1, 1 } },
+ { "const", KEYWORD_CONST, { 1, 1, 1, 1, 0 } },
+ { "constraint", KEYWORD_CONSTRAINT, { 0, 0, 0, 0, 1 } },
+ { "coverage_block", KEYWORD_COVERAGE_BLOCK, { 0, 0, 0, 0, 1 } },
+ { "coverage_def", KEYWORD_COVERAGE_DEF, { 0, 0, 0, 0, 1 } },
+ { "do", KEYWORD_DO, { 1, 1, 1, 1, 0 } },
+ { "default", KEYWORD_DEFAULT, { 1, 1, 1, 1, 0 } },
+ { "delegate", KEYWORD_DELEGATE, { 0, 0, 1, 0, 0 } },
+ { "delete", KEYWORD_DELETE, { 0, 1, 0, 0, 0 } },
+ { "double", KEYWORD_DOUBLE, { 1, 1, 1, 1, 0 } },
+ { "else", KEYWORD_ELSE, { 1, 1, 1, 1, 0 } },
+ { "enum", KEYWORD_ENUM, { 1, 1, 1, 1, 1 } },
+ { "event", KEYWORD_EVENT, { 0, 0, 1, 0, 1 } },
+ { "explicit", KEYWORD_EXPLICIT, { 0, 1, 1, 0, 0 } },
+ { "extends", KEYWORD_EXTENDS, { 0, 0, 0, 1, 1 } },
+ { "extern", KEYWORD_EXTERN, { 1, 1, 1, 0, 1 } },
+ { "final", KEYWORD_FINAL, { 0, 0, 0, 1, 0 } },
+ { "float", KEYWORD_FLOAT, { 1, 1, 1, 1, 0 } },
+ { "for", KEYWORD_FOR, { 1, 1, 1, 1, 0 } },
+ { "foreach", KEYWORD_FOREACH, { 0, 0, 1, 0, 0 } },
+ { "friend", KEYWORD_FRIEND, { 0, 1, 0, 0, 0 } },
+ { "function", KEYWORD_FUNCTION, { 0, 0, 0, 0, 1 } },
+ { "goto", KEYWORD_GOTO, { 1, 1, 1, 1, 0 } },
+ { "if", KEYWORD_IF, { 1, 1, 1, 1, 0 } },
+ { "implements", KEYWORD_IMPLEMENTS, { 0, 0, 0, 1, 0 } },
+ { "import", KEYWORD_IMPORT, { 0, 0, 0, 1, 0 } },
+ { "inline", KEYWORD_INLINE, { 0, 1, 0, 0, 0 } },
+ { "inout", KEYWORD_INOUT, { 0, 0, 0, 0, 1 } },
+ { "input", KEYWORD_INPUT, { 0, 0, 0, 0, 1 } },
+ { "int", KEYWORD_INT, { 1, 1, 1, 1, 0 } },
+ { "integer", KEYWORD_INTEGER, { 0, 0, 0, 0, 1 } },
+ { "interface", KEYWORD_INTERFACE, { 0, 0, 1, 1, 1 } },
+ { "internal", KEYWORD_INTERNAL, { 0, 0, 1, 0, 0 } },
+ { "local", KEYWORD_LOCAL, { 0, 0, 0, 0, 1 } },
+ { "long", KEYWORD_LONG, { 1, 1, 1, 1, 0 } },
+ { "m_bad_state", KEYWORD_M_BAD_STATE, { 0, 0, 0, 0, 1 } },
+ { "m_bad_trans", KEYWORD_M_BAD_TRANS, { 0, 0, 0, 0, 1 } },
+ { "m_state", KEYWORD_M_STATE, { 0, 0, 0, 0, 1 } },
+ { "m_trans", KEYWORD_M_TRANS, { 0, 0, 0, 0, 1 } },
+ { "mutable", KEYWORD_MUTABLE, { 0, 1, 0, 0, 0 } },
+ { "namespace", KEYWORD_NAMESPACE, { 0, 1, 1, 0, 0 } },
+ { "native", KEYWORD_NATIVE, { 0, 0, 0, 1, 0 } },
+ { "new", KEYWORD_NEW, { 0, 1, 1, 1, 0 } },
+ { "newcov", KEYWORD_NEWCOV, { 0, 0, 0, 0, 1 } },
+ { "operator", KEYWORD_OPERATOR, { 0, 1, 1, 0, 0 } },
+ { "output", KEYWORD_OUTPUT, { 0, 0, 0, 0, 1 } },
+ { "overload", KEYWORD_OVERLOAD, { 0, 1, 0, 0, 0 } },
+ { "override", KEYWORD_OVERRIDE, { 0, 0, 1, 0, 0 } },
+ { "package", KEYWORD_PACKAGE, { 0, 0, 0, 1, 0 } },
+ { "packed", KEYWORD_PACKED, { 0, 0, 0, 0, 1 } },
+ { "port", KEYWORD_PORT, { 0, 0, 0, 0, 1 } },
+ { "private", KEYWORD_PRIVATE, { 0, 1, 1, 1, 0 } },
+ { "program", KEYWORD_PROGRAM, { 0, 0, 0, 0, 1 } },
+ { "protected", KEYWORD_PROTECTED, { 0, 1, 1, 1, 1 } },
+ { "public", KEYWORD_PUBLIC, { 0, 1, 1, 1, 1 } },
+ { "register", KEYWORD_REGISTER, { 1, 1, 0, 0, 0 } },
+ { "return", KEYWORD_RETURN, { 1, 1, 1, 1, 0 } },
+ { "shadow", KEYWORD_SHADOW, { 0, 0, 0, 0, 1 } },
+ { "short", KEYWORD_SHORT, { 1, 1, 1, 1, 0 } },
+ { "signed", KEYWORD_SIGNED, { 1, 1, 0, 0, 0 } },
+ { "state", KEYWORD_STATE, { 0, 0, 0, 0, 1 } },
+ { "static", KEYWORD_STATIC, { 1, 1, 1, 1, 1 } },
+ { "string", KEYWORD_STRING, { 0, 0, 1, 0, 1 } },
+ { "struct", KEYWORD_STRUCT, { 1, 1, 1, 0, 0 } },
+ { "switch", KEYWORD_SWITCH, { 1, 1, 1, 1, 0 } },
+ { "synchronized", KEYWORD_SYNCHRONIZED, { 0, 0, 0, 1, 0 } },
+ { "task", KEYWORD_TASK, { 0, 0, 0, 0, 1 } },
+ { "template", KEYWORD_TEMPLATE, { 0, 1, 0, 0, 0 } },
+ { "this", KEYWORD_THIS, { 0, 1, 1, 1, 0 } },
+ { "throw", KEYWORD_THROW, { 0, 1, 1, 1, 0 } },
+ { "throws", KEYWORD_THROWS, { 0, 0, 0, 1, 0 } },
+ { "trans", KEYWORD_TRANS, { 0, 0, 0, 0, 1 } },
+ { "transition", KEYWORD_TRANSITION, { 0, 0, 0, 0, 1 } },
+ { "transient", KEYWORD_TRANSIENT, { 0, 0, 0, 1, 0 } },
+ { "try", KEYWORD_TRY, { 0, 1, 1, 0, 0 } },
+ { "typedef", KEYWORD_TYPEDEF, { 1, 1, 1, 0, 1 } },
+ { "typename", KEYWORD_TYPENAME, { 0, 1, 0, 0, 0 } },
+ { "uint", KEYWORD_UINT, { 0, 0, 1, 0, 0 } },
+ { "ulong", KEYWORD_ULONG, { 0, 0, 1, 0, 0 } },
+ { "union", KEYWORD_UNION, { 1, 1, 0, 0, 0 } },
+ { "unsigned", KEYWORD_UNSIGNED, { 1, 1, 1, 0, 0 } },
+ { "ushort", KEYWORD_USHORT, { 0, 0, 1, 0, 0 } },
+ { "using", KEYWORD_USING, { 0, 1, 1, 0, 0 } },
+ { "virtual", KEYWORD_VIRTUAL, { 0, 1, 1, 0, 1 } },
+ { "void", KEYWORD_VOID, { 1, 1, 1, 1, 1 } },
+ { "volatile", KEYWORD_VOLATILE, { 1, 1, 1, 1, 0 } },
+ { "wchar_t", KEYWORD_WCHAR_T, { 1, 1, 1, 0, 0 } },
+ { "while", KEYWORD_WHILE, { 1, 1, 1, 1, 0 } }
+};
+
+/*
+* FUNCTION PROTOTYPES
+*/
+static void createTags (const unsigned int nestLevel, statementInfo *const parent);
+
+/*
+* FUNCTION DEFINITIONS
+*/
+
+extern boolean includingDefineTags (void)
+{
+ return CKinds [CK_DEFINE].enabled;
+}
+
+/*
+* Token management
+*/
+
+static void initToken (tokenInfo* const token)
+{
+ token->type = TOKEN_NONE;
+ token->keyword = KEYWORD_NONE;
+ token->lineNumber = getSourceLineNumber ();
+ token->filePosition = getInputFilePosition ();
+ vStringClear (token->name);
+}
+
+static void advanceToken (statementInfo* const st)
+{
+ if (st->tokenIndex >= (unsigned int) NumTokens - 1)
+ st->tokenIndex = 0;
+ else
+ ++st->tokenIndex;
+ initToken (st->token [st->tokenIndex]);
+}
+
+static tokenInfo *prevToken (const statementInfo *const st, unsigned int n)
+{
+ unsigned int tokenIndex;
+ unsigned int num = (unsigned int) NumTokens;
+ Assert (n < num);
+ tokenIndex = (st->tokenIndex + num - n) % num;
+ return st->token [tokenIndex];
+}
+
+static void setToken (statementInfo *const st, const tokenType type)
+{
+ tokenInfo *token;
+ token = activeToken (st);
+ initToken (token);
+ token->type = type;
+}
+
+static void retardToken (statementInfo *const st)
+{
+ if (st->tokenIndex == 0)
+ st->tokenIndex = (unsigned int) NumTokens - 1;
+ else
+ --st->tokenIndex;
+ setToken (st, TOKEN_NONE);
+}
+
+static tokenInfo *newToken (void)
+{
+ tokenInfo *const token = xMalloc (1, tokenInfo);
+ token->name = vStringNew ();
+ initToken (token);
+ return token;
+}
+
+static void deleteToken (tokenInfo *const token)
+{
+ if (token != NULL)
+ {
+ vStringDelete (token->name);
+ eFree (token);
+ }
+}
+
+static const char *accessString (const accessType access)
+{
+ static const char *const names [] = {
+ "?", "local", "private", "protected", "public", "default"
+ };
+ Assert (sizeof (names) / sizeof (names [0]) == ACCESS_COUNT);
+ Assert ((int) access < ACCESS_COUNT);
+ return names [(int) access];
+}
+
+static const char *implementationString (const impType imp)
+{
+ static const char *const names [] ={
+ "?", "abstract", "virtual", "pure virtual"
+ };
+ Assert (sizeof (names) / sizeof (names [0]) == IMP_COUNT);
+ Assert ((int) imp < IMP_COUNT);
+ return names [(int) imp];
+}
+
+/*
+* Debugging functions
+*/
+
+#ifdef DEBUG
+
+#define boolString(c) ((c) ? "TRUE" : "FALSE")
+
+static const char *tokenString (const tokenType type)
+{
+ static const char *const names [] = {
+ "none", "args", "}", "{", "colon", "comma", "double colon", "keyword",
+ "name", "package", "paren-name", "semicolon", "specifier"
+ };
+ Assert (sizeof (names) / sizeof (names [0]) == TOKEN_COUNT);
+ Assert ((int) type < TOKEN_COUNT);
+ return names [(int) type];
+}
+
+static const char *scopeString (const tagScope scope)
+{
+ static const char *const names [] = {
+ "global", "static", "extern", "friend", "typedef"
+ };
+ Assert (sizeof (names) / sizeof (names [0]) == SCOPE_COUNT);
+ Assert ((int) scope < SCOPE_COUNT);
+ return names [(int) scope];
+}
+
+static const char *declString (const declType declaration)
+{
+ static const char *const names [] = {
+ "?", "base", "class", "enum", "event", "function", "ignore",
+ "interface", "namespace", "no mangle", "package", "program",
+ "struct", "task", "union",
+ };
+ Assert (sizeof (names) / sizeof (names [0]) == DECL_COUNT);
+ Assert ((int) declaration < DECL_COUNT);
+ return names [(int) declaration];
+}
+
+static const char *keywordString (const keywordId keyword)
+{
+ const size_t count = sizeof (KeywordTable) / sizeof (KeywordTable [0]);
+ const char *name = "none";
+ size_t i;
+ for (i = 0 ; i < count ; ++i)
+ {
+ const keywordDesc *p = &KeywordTable [i];
+ if (p->id == keyword)
+ {
+ name = p->name;
+ break;
+ }
+ }
+ return name;
+}
+
+static void __unused__ pt (tokenInfo *const token)
+{
+ if (isType (token, TOKEN_NAME))
+ printf ("type: %-12s: %-13s line: %lu\n",
+ tokenString (token->type), vStringValue (token->name),
+ token->lineNumber);
+ else if (isType (token, TOKEN_KEYWORD))
+ printf ("type: %-12s: %-13s line: %lu\n",
+ tokenString (token->type), keywordString (token->keyword),
+ token->lineNumber);
+ else
+ printf ("type: %-12s line: %lu\n",
+ tokenString (token->type), token->lineNumber);
+}
+
+static void __unused__ ps (statementInfo *const st)
+{
+ unsigned int i;
+ printf ("scope: %s decl: %s gotName: %s gotParenName: %s\n",
+ scopeString (st->scope), declString (st->declaration),
+ boolString (st->gotName), boolString (st->gotParenName));
+ printf ("haveQualifyingName: %s\n", boolString (st->haveQualifyingName));
+ printf ("access: %s default: %s\n", accessString (st->member.access),
+ accessString (st->member.accessDefault));
+ printf ("token : ");
+ pt (activeToken (st));
+ for (i = 1 ; i < (unsigned int) NumTokens ; ++i)
+ {
+ printf ("prev %u : ", i);
+ pt (prevToken (st, i));
+ }
+ printf ("context: ");
+ pt (st->context);
+}
+
+#endif
+
+/*
+* Statement management
+*/
+
+static boolean isContextualKeyword (const tokenInfo *const token)
+{
+ boolean result;
+ switch (token->keyword)
+ {
+ case KEYWORD_CLASS:
+ case KEYWORD_ENUM:
+ case KEYWORD_INTERFACE:
+ case KEYWORD_NAMESPACE:
+ case KEYWORD_STRUCT:
+ case KEYWORD_UNION:
+ result = TRUE;
+ break;
+
+ default: result = FALSE; break;
+ }
+ return result;
+}
+
+static boolean isContextualStatement (const statementInfo *const st)
+{
+ boolean result = FALSE;
+ if (st != NULL) switch (st->declaration)
+ {
+ case DECL_CLASS:
+ case DECL_ENUM:
+ case DECL_INTERFACE:
+ case DECL_NAMESPACE:
+ case DECL_STRUCT:
+ case DECL_UNION:
+ result = TRUE;
+ break;
+
+ default: result = FALSE; break;
+ }
+ return result;
+}
+
+static boolean isMember (const statementInfo *const st)
+{
+ boolean result;
+ if (isType (st->context, TOKEN_NAME))
+ result = TRUE;
+ else
+ result = (boolean)
+ (st->parent != NULL && isContextualStatement (st->parent));
+ return result;
+}
+
+static void initMemberInfo (statementInfo *const st)
+{
+ accessType accessDefault = ACCESS_UNDEFINED;
+
+ if (st->parent != NULL) switch (st->parent->declaration)
+ {
+ case DECL_ENUM:
+ accessDefault = (isLanguage (Lang_java) ? ACCESS_PUBLIC : ACCESS_UNDEFINED);
+ break;
+ case DECL_NAMESPACE:
+ accessDefault = ACCESS_UNDEFINED;
+ break;
+
+ case DECL_CLASS:
+ if (isLanguage (Lang_java))
+ accessDefault = ACCESS_DEFAULT;
+ else
+ accessDefault = ACCESS_PRIVATE;
+ break;
+
+ case DECL_INTERFACE:
+ case DECL_STRUCT:
+ case DECL_UNION:
+ accessDefault = ACCESS_PUBLIC;
+ break;
+
+ default: break;
+ }
+ st->member.accessDefault = accessDefault;
+ st->member.access = accessDefault;
+}
+
+static void reinitStatement (statementInfo *const st, const boolean partial)
+{
+ unsigned int i;
+
+ if (! partial)
+ {
+ st->scope = SCOPE_GLOBAL;
+ if (isContextualStatement (st->parent))
+ st->declaration = DECL_BASE;
+ else
+ st->declaration = DECL_NONE;
+ }
+ st->gotParenName = FALSE;
+ st->isPointer = FALSE;
+ st->inFunction = FALSE;
+ st->assignment = FALSE;
+ st->notVariable = FALSE;
+ st->implementation = IMP_DEFAULT;
+ st->gotArgs = FALSE;
+ st->gotName = FALSE;
+ st->haveQualifyingName = FALSE;
+ st->tokenIndex = 0;
+
+ if (st->parent != NULL)
+ st->inFunction = st->parent->inFunction;
+
+ for (i = 0 ; i < (unsigned int) NumTokens ; ++i)
+ initToken (st->token [i]);
+
+ initToken (st->context);
+
+ /* Keep the block name, so that a variable following after a comma will
+ * still have the structure name.
+ */
+ if (! partial)
+ initToken (st->blockName);
+
+ vStringClear (st->parentClasses);
+
+ /* Init member info.
+ */
+ if (! partial)
+ st->member.access = st->member.accessDefault;
+}
+
+static void initStatement (statementInfo *const st, statementInfo *const parent)
+{
+ st->parent = parent;
+ initMemberInfo (st);
+ reinitStatement (st, FALSE);
+}
+
+/*
+* Tag generation functions
+*/
+static cKind cTagKind (const tagType type)
+{
+ cKind result = CK_UNDEFINED;
+ switch (type)
+ {
+ case TAG_CLASS: result = CK_CLASS; break;
+ case TAG_ENUM: result = CK_ENUMERATION; break;
+ case TAG_ENUMERATOR: result = CK_ENUMERATOR; break;
+ case TAG_FUNCTION: result = CK_FUNCTION; break;
+ case TAG_LOCAL: result = CK_LOCAL; break;
+ case TAG_MEMBER: result = CK_MEMBER; break;
+ case TAG_NAMESPACE: result = CK_NAMESPACE; break;
+ case TAG_PROTOTYPE: result = CK_PROTOTYPE; break;
+ case TAG_STRUCT: result = CK_STRUCT; break;
+ case TAG_TYPEDEF: result = CK_TYPEDEF; break;
+ case TAG_UNION: result = CK_UNION; break;
+ case TAG_VARIABLE: result = CK_VARIABLE; break;
+ case TAG_EXTERN_VAR: result = CK_EXTERN_VARIABLE; break;
+
+ default: Assert ("Bad C tag type" == NULL); break;
+ }
+ return result;
+}
+
+static csharpKind csharpTagKind (const tagType type)
+{
+ csharpKind result = CSK_UNDEFINED;
+ switch (type)
+ {
+ case TAG_CLASS: result = CSK_CLASS; break;
+ case TAG_ENUM: result = CSK_ENUMERATION; break;
+ case TAG_ENUMERATOR: result = CSK_ENUMERATOR; break;
+ case TAG_EVENT: result = CSK_EVENT; break;
+ case TAG_FIELD: result = CSK_FIELD ; break;
+ case TAG_INTERFACE: result = CSK_INTERFACE; break;
+ case TAG_LOCAL: result = CSK_LOCAL; break;
+ case TAG_METHOD: result = CSK_METHOD; break;
+ case TAG_NAMESPACE: result = CSK_NAMESPACE; break;
+ case TAG_PROPERTY: result = CSK_PROPERTY; break;
+ case TAG_STRUCT: result = CSK_STRUCT; break;
+ case TAG_TYPEDEF: result = CSK_TYPEDEF; break;
+
+ default: Assert ("Bad C# tag type" == NULL); break;
+ }
+ return result;
+}
+
+static javaKind javaTagKind (const tagType type)
+{
+ javaKind result = JK_UNDEFINED;
+ switch (type)
+ {
+ case TAG_CLASS: result = JK_CLASS; break;
+ case TAG_ENUM: result = JK_ENUM; break;
+ case TAG_ENUMERATOR: result = JK_ENUM_CONSTANT; break;
+ case TAG_FIELD: result = JK_FIELD; break;
+ case TAG_INTERFACE: result = JK_INTERFACE; break;
+ case TAG_LOCAL: result = JK_LOCAL; break;
+ case TAG_METHOD: result = JK_METHOD; break;
+ case TAG_PACKAGE: result = JK_PACKAGE; break;
+
+ default: Assert ("Bad Java tag type" == NULL); break;
+ }
+ return result;
+}
+
+static veraKind veraTagKind (const tagType type) {
+ veraKind result = VK_UNDEFINED;
+ switch (type)
+ {
+ case TAG_CLASS: result = VK_CLASS; break;
+ case TAG_ENUM: result = VK_ENUMERATION; break;
+ case TAG_ENUMERATOR: result = VK_ENUMERATOR; break;
+ case TAG_FUNCTION: result = VK_FUNCTION; break;
+ case TAG_LOCAL: result = VK_LOCAL; break;
+ case TAG_MEMBER: result = VK_MEMBER; break;
+ case TAG_PROGRAM: result = VK_PROGRAM; break;
+ case TAG_PROTOTYPE: result = VK_PROTOTYPE; break;
+ case TAG_TASK: result = VK_TASK; break;
+ case TAG_TYPEDEF: result = VK_TYPEDEF; break;
+ case TAG_VARIABLE: result = VK_VARIABLE; break;
+ case TAG_EXTERN_VAR: result = VK_EXTERN_VARIABLE; break;
+
+ default: Assert ("Bad Vera tag type" == NULL); break;
+ }
+ return result;
+}
+
+static const char *tagName (const tagType type)
+{
+ const char* result;
+ if (isLanguage (Lang_csharp))
+ result = CsharpKinds [csharpTagKind (type)].name;
+ else if (isLanguage (Lang_java))
+ result = JavaKinds [javaTagKind (type)].name;
+ else if (isLanguage (Lang_vera))
+ result = VeraKinds [veraTagKind (type)].name;
+ else
+ result = CKinds [cTagKind (type)].name;
+ return result;
+}
+
+static int tagLetter (const tagType type)
+{
+ int result;
+ if (isLanguage (Lang_csharp))
+ result = CsharpKinds [csharpTagKind (type)].letter;
+ else if (isLanguage (Lang_java))
+ result = JavaKinds [javaTagKind (type)].letter;
+ else if (isLanguage (Lang_vera))
+ result = VeraKinds [veraTagKind (type)].letter;
+ else
+ result = CKinds [cTagKind (type)].letter;
+ return result;
+}
+
+static boolean includeTag (const tagType type, const boolean isFileScope)
+{
+ boolean result;
+ if (isFileScope && ! Option.include.fileScope)
+ result = FALSE;
+ else if (isLanguage (Lang_csharp))
+ result = CsharpKinds [csharpTagKind (type)].enabled;
+ else if (isLanguage (Lang_java))
+ result = JavaKinds [javaTagKind (type)].enabled;
+ else if (isLanguage (Lang_vera))
+ result = VeraKinds [veraTagKind (type)].enabled;
+ else
+ result = CKinds [cTagKind (type)].enabled;
+ return result;
+}
+
+static tagType declToTagType (const declType declaration)
+{
+ tagType type = TAG_UNDEFINED;
+
+ switch (declaration)
+ {
+ case DECL_CLASS: type = TAG_CLASS; break;
+ case DECL_ENUM: type = TAG_ENUM; break;
+ case DECL_EVENT: type = TAG_EVENT; break;
+ case DECL_FUNCTION: type = TAG_FUNCTION; break;
+ case DECL_INTERFACE: type = TAG_INTERFACE; break;
+ case DECL_NAMESPACE: type = TAG_NAMESPACE; break;
+ case DECL_PROGRAM: type = TAG_PROGRAM; break;
+ case DECL_TASK: type = TAG_TASK; break;
+ case DECL_STRUCT: type = TAG_STRUCT; break;
+ case DECL_UNION: type = TAG_UNION; break;
+
+ default: Assert ("Unexpected declaration" == NULL); break;
+ }
+ return type;
+}
+
+static const char* accessField (const statementInfo *const st)
+{
+ const char* result = NULL;
+ if (isLanguage (Lang_cpp) && st->scope == SCOPE_FRIEND)
+ result = "friend";
+ else if (st->member.access != ACCESS_UNDEFINED)
+ result = accessString (st->member.access);
+ return result;
+}
+
+static void addContextSeparator (vString *const scope)
+{
+ if (isLanguage (Lang_c) || isLanguage (Lang_cpp))
+ vStringCatS (scope, "::");
+ else if (isLanguage (Lang_java) || isLanguage (Lang_csharp))
+ vStringCatS (scope, ".");
+}
+
+static void addOtherFields (tagEntryInfo* const tag, const tagType type,
+ const statementInfo *const st,
+ vString *const scope, vString *const typeRef)
+{
+ /* For selected tag types, append an extension flag designating the
+ * parent object in which the tag is defined.
+ */
+ switch (type)
+ {
+ default: break;
+
+ case TAG_FUNCTION:
+ case TAG_METHOD:
+ case TAG_PROTOTYPE:
+ if (vStringLength (Signature) > 0)
+ tag->extensionFields.signature = vStringValue (Signature);
+ case TAG_CLASS:
+ case TAG_ENUM:
+ case TAG_ENUMERATOR:
+ case TAG_EVENT:
+ case TAG_FIELD:
+ case TAG_INTERFACE:
+ case TAG_MEMBER:
+ case TAG_NAMESPACE:
+ case TAG_PROPERTY:
+ case TAG_STRUCT:
+ case TAG_TASK:
+ case TAG_TYPEDEF:
+ case TAG_UNION:
+ if (vStringLength (scope) > 0 &&
+ (isMember (st) || st->parent->declaration == DECL_NAMESPACE))
+ {
+ if (isType (st->context, TOKEN_NAME))
+ tag->extensionFields.scope [0] = tagName (TAG_CLASS);
+ else
+ tag->extensionFields.scope [0] =
+ tagName (declToTagType (parentDecl (st)));
+ tag->extensionFields.scope [1] = vStringValue (scope);
+ }
+ if ((type == TAG_CLASS || type == TAG_INTERFACE ||
+ type == TAG_STRUCT) && vStringLength (st->parentClasses) > 0)
+ {
+
+ tag->extensionFields.inheritance =
+ vStringValue (st->parentClasses);
+ }
+ if (st->implementation != IMP_DEFAULT &&
+ (isLanguage (Lang_cpp) || isLanguage (Lang_csharp) ||
+ isLanguage (Lang_java)))
+ {
+ tag->extensionFields.implementation =
+ implementationString (st->implementation);
+ }
+ if (isMember (st))
+ {
+ tag->extensionFields.access = accessField (st);
+ }
+ break;
+ }
+
+ /* Add typename info, type of the tag and name of struct/union/etc. */
+ if ((type == TAG_TYPEDEF || type == TAG_VARIABLE || type == TAG_MEMBER)
+ && isContextualStatement(st))
+ {
+ char *p;
+
+ tag->extensionFields.typeRef [0] =
+ tagName (declToTagType (st->declaration));
+ p = vStringValue (st->blockName->name);
+
+ /* If there was no {} block get the name from the token before the
+ * name (current token is ';' or ',', previous token is the name).
+ */
+ if (p == NULL || *p == '\0')
+ {
+ tokenInfo *const prev2 = prevToken (st, 2);
+ if (isType (prev2, TOKEN_NAME))
+ p = vStringValue (prev2->name);
+ }
+
+ /* Prepend the scope name if there is one. */
+ if (vStringLength (scope) > 0)
+ {
+ vStringCopy(typeRef, scope);
+ addContextSeparator (typeRef);
+ vStringCatS(typeRef, p);
+ p = vStringValue (typeRef);
+ }
+ tag->extensionFields.typeRef [1] = p;
+ }
+}
+
+static void findScopeHierarchy (vString *const string,
+ const statementInfo *const st)
+{
+ vStringClear (string);
+ if (isType (st->context, TOKEN_NAME))
+ vStringCopy (string, st->context->name);
+ if (st->parent != NULL)
+ {
+ vString *temp = vStringNew ();
+ const statementInfo *s;
+ for (s = st->parent ; s != NULL ; s = s->parent)
+ {
+ if (isContextualStatement (s) ||
+ s->declaration == DECL_NAMESPACE ||
+ s->declaration == DECL_PROGRAM)
+ {
+ vStringCopy (temp, string);
+ vStringClear (string);
+ Assert (isType (s->blockName, TOKEN_NAME));
+ if (isType (s->context, TOKEN_NAME) &&
+ vStringLength (s->context->name) > 0)
+ {
+ vStringCat (string, s->context->name);
+ addContextSeparator (string);
+ }
+ vStringCat (string, s->blockName->name);
+ if (vStringLength (temp) > 0)
+ addContextSeparator (string);
+ vStringCat (string, temp);
+ }
+ }
+ vStringDelete (temp);
+ }
+}
+
+static void makeExtraTagEntry (const tagType type, tagEntryInfo *const e,
+ vString *const scope)
+{
+ if (Option.include.qualifiedTags &&
+ scope != NULL && vStringLength (scope) > 0)
+ {
+ vString *const scopedName = vStringNew ();
+
+ if (type != TAG_ENUMERATOR)
+ vStringCopy (scopedName, scope);
+ else
+ {
+ /* remove last component (i.e. enumeration name) from scope */
+ const char* const sc = vStringValue (scope);
+ const char* colon = strrchr (sc, ':');
+ if (colon != NULL)
+ {
+ while (*colon == ':' && colon > sc)
+ --colon;
+ vStringNCopy (scopedName, scope, colon + 1 - sc);
+ }
+ }
+ if (vStringLength (scopedName) > 0)
+ {
+ addContextSeparator (scopedName);
+ vStringCatS (scopedName, e->name);
+ e->name = vStringValue (scopedName);
+ makeTagEntry (e);
+ }
+ vStringDelete (scopedName);
+ }
+}
+
+static void makeTag (const tokenInfo *const token,
+ const statementInfo *const st,
+ boolean isFileScope, const tagType type)
+{
+ /* Nothing is really of file scope when it appears in a header file.
+ */
+ isFileScope = (boolean) (isFileScope && ! isHeaderFile ());
+
+ if (isType (token, TOKEN_NAME) && vStringLength (token->name) > 0 &&
+ includeTag (type, isFileScope))
+ {
+ vString *scope = vStringNew ();
+ /* Use "typeRef" to store the typename from addOtherFields() until
+ * it's used in makeTagEntry().
+ */
+ vString *typeRef = vStringNew ();
+ tagEntryInfo e;
+
+ initTagEntry (&e, vStringValue (token->name));
+
+ e.lineNumber = token->lineNumber;
+ e.filePosition = token->filePosition;
+ e.isFileScope = isFileScope;
+ e.kindName = tagName (type);
+ e.kind = tagLetter (type);
+
+ findScopeHierarchy (scope, st);
+ addOtherFields (&e, type, st, scope, typeRef);
+
+ makeTagEntry (&e);
+ makeExtraTagEntry (type, &e, scope);
+ vStringDelete (scope);
+ vStringDelete (typeRef);
+ }
+}
+
+static boolean isValidTypeSpecifier (const declType declaration)
+{
+ boolean result;
+ switch (declaration)
+ {
+ case DECL_BASE:
+ case DECL_CLASS:
+ case DECL_ENUM:
+ case DECL_EVENT:
+ case DECL_STRUCT:
+ case DECL_UNION:
+ result = TRUE;
+ break;
+
+ default:
+ result = FALSE;
+ break;
+ }
+ return result;
+}
+
+static void qualifyEnumeratorTag (const statementInfo *const st,
+ const tokenInfo *const nameToken)
+{
+ if (isType (nameToken, TOKEN_NAME))
+ makeTag (nameToken, st, TRUE, TAG_ENUMERATOR);
+}
+
+static void qualifyFunctionTag (const statementInfo *const st,
+ const tokenInfo *const nameToken)
+{
+ if (isType (nameToken, TOKEN_NAME))
+ {
+ tagType type;
+ const boolean isFileScope =
+ (boolean) (st->member.access == ACCESS_PRIVATE ||
+ (!isMember (st) && st->scope == SCOPE_STATIC));
+ if (isLanguage (Lang_java) || isLanguage (Lang_csharp))
+ type = TAG_METHOD;
+ else if (isLanguage (Lang_vera) && st->declaration == DECL_TASK)
+ type = TAG_TASK;
+ else
+ type = TAG_FUNCTION;
+ makeTag (nameToken, st, isFileScope, type);
+ }
+}
+
+static void qualifyFunctionDeclTag (const statementInfo *const st,
+ const tokenInfo *const nameToken)
+{
+ if (! isType (nameToken, TOKEN_NAME))
+ ;
+ else if (isLanguage (Lang_java) || isLanguage (Lang_csharp))
+ qualifyFunctionTag (st, nameToken);
+ else if (st->scope == SCOPE_TYPEDEF)
+ makeTag (nameToken, st, TRUE, TAG_TYPEDEF);
+ else if (isValidTypeSpecifier (st->declaration) && ! isLanguage (Lang_csharp))
+ makeTag (nameToken, st, TRUE, TAG_PROTOTYPE);
+}
+
+static void qualifyCompoundTag (const statementInfo *const st,
+ const tokenInfo *const nameToken)
+{
+ if (isType (nameToken, TOKEN_NAME))
+ {
+ const tagType type = declToTagType (st->declaration);
+ const boolean fileScoped = (boolean)
+ (!(isLanguage (Lang_java) ||
+ isLanguage (Lang_csharp) ||
+ isLanguage (Lang_vera)));
+
+ if (type != TAG_UNDEFINED)
+ makeTag (nameToken, st, fileScoped, type);
+ }
+}
+
+static void qualifyBlockTag (statementInfo *const st,
+ const tokenInfo *const nameToken)
+{
+ switch (st->declaration)
+ {
+ case DECL_CLASS:
+ case DECL_ENUM:
+ case DECL_INTERFACE:
+ case DECL_NAMESPACE:
+ case DECL_PROGRAM:
+ case DECL_STRUCT:
+ case DECL_UNION:
+ qualifyCompoundTag (st, nameToken);
+ break;
+ default: break;
+ }
+}
+
+static void qualifyVariableTag (const statementInfo *const st,
+ const tokenInfo *const nameToken)
+{
+ /* We have to watch that we do not interpret a declaration of the
+ * form "struct tag;" as a variable definition. In such a case, the
+ * token preceding the name will be a keyword.
+ */
+ if (! isType (nameToken, TOKEN_NAME))
+ ;
+ else if (st->scope == SCOPE_TYPEDEF)
+ makeTag (nameToken, st, TRUE, TAG_TYPEDEF);
+ else if (st->declaration == DECL_EVENT)
+ makeTag (nameToken, st, (boolean) (st->member.access == ACCESS_PRIVATE),
+ TAG_EVENT);
+ else if (st->declaration == DECL_PACKAGE)
+ makeTag (nameToken, st, FALSE, TAG_PACKAGE);
+ else if (isValidTypeSpecifier (st->declaration))
+ {
+ if (st->notVariable)
+ ;
+ else if (isMember (st))
+ {
+ if (isLanguage (Lang_java) || isLanguage (Lang_csharp))
+ makeTag (nameToken, st,
+ (boolean) (st->member.access == ACCESS_PRIVATE), TAG_FIELD);
+ else if (st->scope == SCOPE_GLOBAL || st->scope == SCOPE_STATIC)
+ makeTag (nameToken, st, TRUE, TAG_MEMBER);
+ }
+ else
+ {
+ if (st->scope == SCOPE_EXTERN || ! st->haveQualifyingName)
+ makeTag (nameToken, st, FALSE, TAG_EXTERN_VAR);
+ else if (st->inFunction)
+ makeTag (nameToken, st, (boolean) (st->scope == SCOPE_STATIC),
+ TAG_LOCAL);
+ else
+ makeTag (nameToken, st, (boolean) (st->scope == SCOPE_STATIC),
+ TAG_VARIABLE);
+ }
+ }
+}
+
+/*
+* Parsing functions
+*/
+
+static int skipToOneOf (const char *const chars)
+{
+ int c;
+ do
+ c = cppGetc ();
+ while (c != EOF && c != '\0' && strchr (chars, c) == NULL);
+ return c;
+}
+
+/* Skip to the next non-white character.
+ */
+static int skipToNonWhite (void)
+{
+ boolean found = FALSE;
+ int c;
+
+#if 0
+ do
+ c = cppGetc ();
+ while (isspace (c));
+#else
+ while (1)
+ {
+ c = cppGetc ();
+ if (isspace (c))
+ found = TRUE;
+ else
+ break;
+ }
+ if (CollectingSignature && found)
+ vStringPut (Signature, ' ');
+#endif
+
+ return c;
+}
+
+/* Skips to the next brace in column 1. This is intended for cases where
+ * preprocessor constructs result in unbalanced braces.
+ */
+static void skipToFormattedBraceMatch (void)
+{
+ int c, next;
+
+ c = cppGetc ();
+ next = cppGetc ();
+ while (c != EOF && (c != '\n' || next != '}'))
+ {
+ c = next;
+ next = cppGetc ();
+ }
+}
+
+/* Skip to the matching character indicated by the pair string. If skipping
+ * to a matching brace and any brace is found within a different level of a
+ * #if conditional statement while brace formatting is in effect, we skip to
+ * the brace matched by its formatting. It is assumed that we have already
+ * read the character which starts the group (i.e. the first character of
+ * "pair").
+ */
+static void skipToMatch (const char *const pair)
+{
+ const boolean braceMatching = (boolean) (strcmp ("{}", pair) == 0);
+ const boolean braceFormatting = (boolean) (isBraceFormat () && braceMatching);
+ const unsigned int initialLevel = getDirectiveNestLevel ();
+ const int begin = pair [0], end = pair [1];
+ const unsigned long inputLineNumber = getInputLineNumber ();
+ int matchLevel = 1;
+ int c = '\0';
+
+ while (matchLevel > 0 && (c = skipToNonWhite ()) != EOF)
+ {
+ if (CollectingSignature)
+ vStringPut (Signature, c);
+ if (c == begin)
+ {
+ ++matchLevel;
+ if (braceFormatting && getDirectiveNestLevel () != initialLevel)
+ {
+ skipToFormattedBraceMatch ();
+ break;
+ }
+ }
+ else if (c == end)
+ {
+ --matchLevel;
+ if (braceFormatting && getDirectiveNestLevel () != initialLevel)
+ {
+ skipToFormattedBraceMatch ();
+ break;
+ }
+ }
+ }
+ if (c == EOF)
+ {
+ verbose ("%s: failed to find match for '%c' at line %lu\n",
+ getInputFileName (), begin, inputLineNumber);
+ if (braceMatching)
+ longjmp (Exception, (int) ExceptionBraceFormattingError);
+ else
+ longjmp (Exception, (int) ExceptionFormattingError);
+ }
+}
+
+static void skipParens (void)
+{
+ const int c = skipToNonWhite ();
+
+ if (c == '(')
+ skipToMatch ("()");
+ else
+ cppUngetc (c);
+}
+
+static void skipBraces (void)
+{
+ const int c = skipToNonWhite ();
+
+ if (c == '{')
+ skipToMatch ("{}");
+ else
+ cppUngetc (c);
+}
+
+static keywordId analyzeKeyword (const char *const name)
+{
+ const keywordId id = (keywordId) lookupKeyword (name, getSourceLanguage ());
+ return id;
+}
+
+static void analyzeIdentifier (tokenInfo *const token)
+{
+ char *const name = vStringValue (token->name);
+ const char *replacement = NULL;
+ boolean parensToo = FALSE;
+
+ if (isLanguage (Lang_java) ||
+ ! isIgnoreToken (name, &parensToo, &replacement))
+ {
+ if (replacement != NULL)
+ token->keyword = analyzeKeyword (replacement);
+ else
+ token->keyword = analyzeKeyword (vStringValue (token->name));
+
+ if (token->keyword == KEYWORD_NONE)
+ token->type = TOKEN_NAME;
+ else
+ token->type = TOKEN_KEYWORD;
+ }
+ else
+ {
+ initToken (token);
+ if (parensToo)
+ {
+ int c = skipToNonWhite ();
+
+ if (c == '(')
+ skipToMatch ("()");
+ }
+ }
+}
+
+static void readIdentifier (tokenInfo *const token, const int firstChar)
+{
+ vString *const name = token->name;
+ int c = firstChar;
+ boolean first = TRUE;
+
+ initToken (token);
+
+ /* Bug #1585745: strangely, C++ destructors allow whitespace between
+ * the ~ and the class name. */
+ if (isLanguage (Lang_cpp) && firstChar == '~')
+ {
+ vStringPut (name, c);
+ c = skipToNonWhite ();
+ }
+
+ do
+ {
+ vStringPut (name, c);
+ if (CollectingSignature)
+ {
+ if (!first)
+ vStringPut (Signature, c);
+ first = FALSE;
+ }
+ c = cppGetc ();
+ } while (isident (c) || ((isLanguage (Lang_java) || isLanguage (Lang_csharp)) && (isHighChar (c) || c == '.')));
+ vStringTerminate (name);
+ cppUngetc (c); /* unget non-identifier character */
+
+ analyzeIdentifier (token);
+}
+
+static void readPackageName (tokenInfo *const token, const int firstChar)
+{
+ vString *const name = token->name;
+ int c = firstChar;
+
+ initToken (token);
+
+ while (isident (c) || c == '.')
+ {
+ vStringPut (name, c);
+ c = cppGetc ();
+ }
+ vStringTerminate (name);
+ cppUngetc (c); /* unget non-package character */
+}
+
+static void readPackageOrNamespace (statementInfo *const st, const declType declaration)
+{
+ st->declaration = declaration;
+
+ if (declaration == DECL_NAMESPACE && !isLanguage (Lang_csharp))
+ {
+ /* In C++ a namespace is specified one level at a time. */
+ return;
+ }
+ else
+ {
+ /* In C#, a namespace can also be specified like a Java package name. */
+ tokenInfo *const token = activeToken (st);
+ Assert (isType (token, TOKEN_KEYWORD));
+ readPackageName (token, skipToNonWhite ());
+ token->type = TOKEN_NAME;
+ st->gotName = TRUE;
+ st->haveQualifyingName = TRUE;
+ }
+}
+
+static void processName (statementInfo *const st)
+{
+ Assert (isType (activeToken (st), TOKEN_NAME));
+ if (st->gotName && st->declaration == DECL_NONE)
+ st->declaration = DECL_BASE;
+ st->gotName = TRUE;
+ st->haveQualifyingName = TRUE;
+}
+
+static void readOperator (statementInfo *const st)
+{
+ const char *const acceptable = "+-*/%^&|~!=<>,[]";
+ const tokenInfo* const prev = prevToken (st,1);
+ tokenInfo *const token = activeToken (st);
+ vString *const name = token->name;
+ int c = skipToNonWhite ();
+
+ /* When we arrive here, we have the keyword "operator" in 'name'.
+ */
+ if (isType (prev, TOKEN_KEYWORD) && (prev->keyword == KEYWORD_ENUM ||
+ prev->keyword == KEYWORD_STRUCT || prev->keyword == KEYWORD_UNION))
+ ; /* ignore "operator" keyword if preceded by these keywords */
+ else if (c == '(')
+ {
+ /* Verify whether this is a valid function call (i.e. "()") operator.
+ */
+ if (cppGetc () == ')')
+ {
+ vStringPut (name, ' '); /* always separate operator from keyword */
+ c = skipToNonWhite ();
+ if (c == '(')
+ vStringCatS (name, "()");
+ }
+ else
+ {
+ skipToMatch ("()");
+ c = cppGetc ();
+ }
+ }
+ else if (isident1 (c))
+ {
+ /* Handle "new" and "delete" operators, and conversion functions
+ * (per 13.3.1.1.2 [2] of the C++ spec).
+ */
+ boolean whiteSpace = TRUE; /* default causes insertion of space */
+ do
+ {
+ if (isspace (c))
+ whiteSpace = TRUE;
+ else
+ {
+ if (whiteSpace)
+ {
+ vStringPut (name, ' ');
+ whiteSpace = FALSE;
+ }
+ vStringPut (name, c);
+ }
+ c = cppGetc ();
+ } while (! isOneOf (c, "(;") && c != EOF);
+ vStringTerminate (name);
+ }
+ else if (isOneOf (c, acceptable))
+ {
+ vStringPut (name, ' '); /* always separate operator from keyword */
+ do
+ {
+ vStringPut (name, c);
+ c = cppGetc ();
+ } while (isOneOf (c, acceptable));
+ vStringTerminate (name);
+ }
+
+ cppUngetc (c);
+
+ token->type = TOKEN_NAME;
+ token->keyword = KEYWORD_NONE;
+ processName (st);
+}
+
+static void copyToken (tokenInfo *const dest, const tokenInfo *const src)
+{
+ dest->type = src->type;
+ dest->keyword = src->keyword;
+ dest->filePosition = src->filePosition;
+ dest->lineNumber = src->lineNumber;
+ vStringCopy (dest->name, src->name);
+}
+
+static void setAccess (statementInfo *const st, const accessType access)
+{
+ if (isMember (st))
+ {
+ if (isLanguage (Lang_cpp))
+ {
+ int c = skipToNonWhite ();
+
+ if (c == ':')
+ reinitStatement (st, FALSE);
+ else
+ cppUngetc (c);
+
+ st->member.accessDefault = access;
+ }
+ st->member.access = access;
+ }
+}
+
+static void discardTypeList (tokenInfo *const token)
+{
+ int c = skipToNonWhite ();
+ while (isident1 (c))
+ {
+ readIdentifier (token, c);
+ c = skipToNonWhite ();
+ if (c == '.' || c == ',')
+ c = skipToNonWhite ();
+ }
+ cppUngetc (c);
+}
+
+static void addParentClass (statementInfo *const st, tokenInfo *const token)
+{
+ if (vStringLength (token->name) > 0 &&
+ vStringLength (st->parentClasses) > 0)
+ {
+ vStringPut (st->parentClasses, ',');
+ }
+ vStringCat (st->parentClasses, token->name);
+}
+
+static void readParents (statementInfo *const st, const int qualifier)
+{
+ tokenInfo *const token = newToken ();
+ tokenInfo *const parent = newToken ();
+ int c;
+
+ do
+ {
+ c = skipToNonWhite ();
+ if (isident1 (c))
+ {
+ readIdentifier (token, c);
+ if (isType (token, TOKEN_NAME))
+ vStringCat (parent->name, token->name);
+ else
+ {
+ addParentClass (st, parent);
+ initToken (parent);
+ }
+ }
+ else if (c == qualifier)
+ vStringPut (parent->name, c);
+ else if (c == '<')
+ skipToMatch ("<>");
+ else if (isType (token, TOKEN_NAME))
+ {
+ addParentClass (st, parent);
+ initToken (parent);
+ }
+ } while (c != '{' && c != EOF);
+ cppUngetc (c);
+ deleteToken (parent);
+ deleteToken (token);
+}
+
+static void skipStatement (statementInfo *const st)
+{
+ st->declaration = DECL_IGNORE;
+ skipToOneOf (";");
+}
+
+static void processInterface (statementInfo *const st)
+{
+ st->declaration = DECL_INTERFACE;
+}
+
+static void processToken (tokenInfo *const token, statementInfo *const st)
+{
+ switch (token->keyword) /* is it a reserved word? */
+ {
+ default: break;
+
+ case KEYWORD_NONE: processName (st); break;
+ case KEYWORD_ABSTRACT: st->implementation = IMP_ABSTRACT; break;
+ case KEYWORD_ATTRIBUTE: skipParens (); initToken (token); break;
+ case KEYWORD_BIND: st->declaration = DECL_BASE; break;
+ case KEYWORD_BIT: st->declaration = DECL_BASE; break;
+ case KEYWORD_CATCH: skipParens (); skipBraces (); break;
+ case KEYWORD_CHAR: st->declaration = DECL_BASE; break;
+ case KEYWORD_CLASS: st->declaration = DECL_CLASS; break;
+ case KEYWORD_CONST: st->declaration = DECL_BASE; break;
+ case KEYWORD_DOUBLE: st->declaration = DECL_BASE; break;
+ case KEYWORD_ENUM: st->declaration = DECL_ENUM; break;
+ case KEYWORD_EXTENDS: readParents (st, '.');
+ setToken (st, TOKEN_NONE); break;
+ case KEYWORD_FLOAT: st->declaration = DECL_BASE; break;
+ case KEYWORD_FUNCTION: st->declaration = DECL_BASE; break;
+ case KEYWORD_FRIEND: st->scope = SCOPE_FRIEND; break;
+ case KEYWORD_GOTO: skipStatement (st); break;
+ case KEYWORD_IMPLEMENTS:readParents (st, '.');
+ setToken (st, TOKEN_NONE); break;
+ case KEYWORD_IMPORT: skipStatement (st); break;
+ case KEYWORD_INT: st->declaration = DECL_BASE; break;
+ case KEYWORD_INTEGER: st->declaration = DECL_BASE; break;
+ case KEYWORD_INTERFACE: processInterface (st); break;
+ case KEYWORD_LOCAL: setAccess (st, ACCESS_LOCAL); break;
+ case KEYWORD_LONG: st->declaration = DECL_BASE; break;
+ case KEYWORD_OPERATOR: readOperator (st); break;
+ case KEYWORD_PRIVATE: setAccess (st, ACCESS_PRIVATE); break;
+ case KEYWORD_PROGRAM: st->declaration = DECL_PROGRAM; break;
+ case KEYWORD_PROTECTED: setAccess (st, ACCESS_PROTECTED); break;
+ case KEYWORD_PUBLIC: setAccess (st, ACCESS_PUBLIC); break;
+ case KEYWORD_RETURN: skipStatement (st); break;
+ case KEYWORD_SHORT: st->declaration = DECL_BASE; break;
+ case KEYWORD_SIGNED: st->declaration = DECL_BASE; break;
+ case KEYWORD_STRING: st->declaration = DECL_BASE; break;
+ case KEYWORD_STRUCT: st->declaration = DECL_STRUCT; break;
+ case KEYWORD_TASK: st->declaration = DECL_TASK; break;
+ case KEYWORD_THROWS: discardTypeList (token); break;
+ case KEYWORD_UNION: st->declaration = DECL_UNION; break;
+ case KEYWORD_UNSIGNED: st->declaration = DECL_BASE; break;
+ case KEYWORD_USING: skipStatement (st); break;
+ case KEYWORD_VOID: st->declaration = DECL_BASE; break;
+ case KEYWORD_VOLATILE: st->declaration = DECL_BASE; break;
+ case KEYWORD_VIRTUAL: st->implementation = IMP_VIRTUAL; break;
+ case KEYWORD_WCHAR_T: st->declaration = DECL_BASE; break;
+
+ case KEYWORD_NAMESPACE: readPackageOrNamespace (st, DECL_NAMESPACE); break;
+ case KEYWORD_PACKAGE: readPackageOrNamespace (st, DECL_PACKAGE); break;
+
+ case KEYWORD_EVENT:
+ if (isLanguage (Lang_csharp))
+ st->declaration = DECL_EVENT;
+ break;
+
+ case KEYWORD_TYPEDEF:
+ reinitStatement (st, FALSE);
+ st->scope = SCOPE_TYPEDEF;
+ break;
+
+ case KEYWORD_EXTERN:
+ if (! isLanguage (Lang_csharp) || !st->gotName)
+ {
+ reinitStatement (st, FALSE);
+ st->scope = SCOPE_EXTERN;
+ st->declaration = DECL_BASE;
+ }
+ break;
+
+ case KEYWORD_STATIC:
+ if (! (isLanguage (Lang_java) || isLanguage (Lang_csharp)))
+ {
+ reinitStatement (st, FALSE);
+ st->scope = SCOPE_STATIC;
+ st->declaration = DECL_BASE;
+ }
+ break;
+
+ case KEYWORD_FOR:
+ case KEYWORD_FOREACH:
+ case KEYWORD_IF:
+ case KEYWORD_SWITCH:
+ case KEYWORD_WHILE:
+ {
+ int c = skipToNonWhite ();
+ if (c == '(')
+ skipToMatch ("()");
+ break;
+ }
+ }
+}
+
+/*
+* Parenthesis handling functions
+*/
+
+static void restartStatement (statementInfo *const st)
+{
+ tokenInfo *const save = newToken ();
+ tokenInfo *token = activeToken (st);
+
+ copyToken (save, token);
+ DebugStatement ( if (debug (DEBUG_PARSE)) printf ("<ES>");)
+ reinitStatement (st, FALSE);
+ token = activeToken (st);
+ copyToken (token, save);
+ deleteToken (save);
+ processToken (token, st);
+}
+
+/* Skips over a the mem-initializer-list of a ctor-initializer, defined as:
+ *
+ * mem-initializer-list:
+ * mem-initializer, mem-initializer-list
+ *
+ * mem-initializer:
+ * [::] [nested-name-spec] class-name (...)
+ * identifier
+ */
+static void skipMemIntializerList (tokenInfo *const token)
+{
+ int c;
+
+ do
+ {
+ c = skipToNonWhite ();
+ while (isident1 (c) || c == ':')
+ {
+ if (c != ':')
+ readIdentifier (token, c);
+ c = skipToNonWhite ();
+ }
+ if (c == '<')
+ {
+ skipToMatch ("<>");
+ c = skipToNonWhite ();
+ }
+ if (c == '(')
+ {
+ skipToMatch ("()");
+ c = skipToNonWhite ();
+ }
+ } while (c == ',');
+ cppUngetc (c);
+}
+
+static void skipMacro (statementInfo *const st)
+{
+ tokenInfo *const prev2 = prevToken (st, 2);
+
+ if (isType (prev2, TOKEN_NAME))
+ retardToken (st);
+ skipToMatch ("()");
+}
+
+/* Skips over characters following the parameter list. This will be either
+ * non-ANSI style function declarations or C++ stuff. Our choices:
+ *
+ * C (K&R):
+ * int func ();
+ * int func (one, two) int one; float two; {...}
+ * C (ANSI):
+ * int func (int one, float two);
+ * int func (int one, float two) {...}
+ * C++:
+ * int foo (...) [const|volatile] [throw (...)];
+ * int foo (...) [const|volatile] [throw (...)] [ctor-initializer] {...}
+ * int foo (...) [const|volatile] [throw (...)] try [ctor-initializer] {...}
+ * catch (...) {...}
+ */
+static boolean skipPostArgumentStuff (
+ statementInfo *const st, parenInfo *const info)
+{
+ tokenInfo *const token = activeToken (st);
+ unsigned int parameters = info->parameterCount;
+ unsigned int elementCount = 0;
+ boolean restart = FALSE;
+ boolean end = FALSE;
+ int c = skipToNonWhite ();
+
+ do
+ {
+ switch (c)
+ {
+ case ')': break;
+ case ':': skipMemIntializerList (token);break; /* ctor-initializer */
+ case '[': skipToMatch ("[]"); break;
+ case '=': cppUngetc (c); end = TRUE; break;
+ case '{': cppUngetc (c); end = TRUE; break;
+ case '}': cppUngetc (c); end = TRUE; break;
+
+ case '(':
+ if (elementCount > 0)
+ ++elementCount;
+ skipToMatch ("()");
+ break;
+
+ case ';':
+ if (parameters == 0 || elementCount < 2)
+ {
+ cppUngetc (c);
+ end = TRUE;
+ }
+ else if (--parameters == 0)
+ end = TRUE;
+ break;
+
+ default:
+ if (isident1 (c))
+ {
+ readIdentifier (token, c);
+ switch (token->keyword)
+ {
+ case KEYWORD_ATTRIBUTE: skipParens (); break;
+ case KEYWORD_THROW: skipParens (); break;
+ case KEYWORD_TRY: break;
+
+ case KEYWORD_CONST:
+ case KEYWORD_VOLATILE:
+ if (vStringLength (Signature) > 0)
+ {
+ vStringPut (Signature, ' ');
+ vStringCat (Signature, token->name);
+ }
+ break;
+
+ case KEYWORD_CATCH:
+ case KEYWORD_CLASS:
+ case KEYWORD_EXPLICIT:
+ case KEYWORD_EXTERN:
+ case KEYWORD_FRIEND:
+ case KEYWORD_INLINE:
+ case KEYWORD_MUTABLE:
+ case KEYWORD_NAMESPACE:
+ case KEYWORD_NEW:
+ case KEYWORD_NEWCOV:
+ case KEYWORD_OPERATOR:
+ case KEYWORD_OVERLOAD:
+ case KEYWORD_PRIVATE:
+ case KEYWORD_PROTECTED:
+ case KEYWORD_PUBLIC:
+ case KEYWORD_STATIC:
+ case KEYWORD_TEMPLATE:
+ case KEYWORD_TYPEDEF:
+ case KEYWORD_TYPENAME:
+ case KEYWORD_USING:
+ case KEYWORD_VIRTUAL:
+ /* Never allowed within parameter declarations. */
+ restart = TRUE;
+ end = TRUE;
+ break;
+
+ default:
+ if (isType (token, TOKEN_NONE))
+ ;
+ else if (info->isKnrParamList && info->parameterCount > 0)
+ ++elementCount;
+ else
+ {
+ /* If we encounter any other identifier immediately
+ * following an empty parameter list, this is almost
+ * certainly one of those Microsoft macro "thingies"
+ * that the automatic source code generation sticks
+ * in. Terminate the current statement.
+ */
+ restart = TRUE;
+ end = TRUE;
+ }
+ break;
+ }
+ }
+ }
+ if (! end)
+ {
+ c = skipToNonWhite ();
+ if (c == EOF)
+ end = TRUE;
+ }
+ } while (! end);
+
+ if (restart)
+ restartStatement (st);
+ else
+ setToken (st, TOKEN_NONE);
+
+ return (boolean) (c != EOF);
+}
+
+static void skipJavaThrows (statementInfo *const st)
+{
+ tokenInfo *const token = activeToken (st);
+ int c = skipToNonWhite ();
+
+ if (isident1 (c))
+ {
+ readIdentifier (token, c);
+ if (token->keyword == KEYWORD_THROWS)
+ {
+ do
+ {
+ c = skipToNonWhite ();
+ if (isident1 (c))
+ {
+ readIdentifier (token, c);
+ c = skipToNonWhite ();
+ }
+ } while (c == '.' || c == ',');
+ }
+ }
+ cppUngetc (c);
+ setToken (st, TOKEN_NONE);
+}
+
+static void analyzePostParens (statementInfo *const st, parenInfo *const info)
+{
+ const unsigned long inputLineNumber = getInputLineNumber ();
+ int c = skipToNonWhite ();
+
+ cppUngetc (c);
+ if (isOneOf (c, "{;,="))
+ ;
+ else if (isLanguage (Lang_java))
+ skipJavaThrows (st);
+ else
+ {
+ if (! skipPostArgumentStuff (st, info))
+ {
+ verbose (
+ "%s: confusing argument declarations beginning at line %lu\n",
+ getInputFileName (), inputLineNumber);
+ longjmp (Exception, (int) ExceptionFormattingError);
+ }
+ }
+}
+
+static boolean languageSupportsGenerics (void)
+{
+ return (boolean) (isLanguage (Lang_cpp) || isLanguage (Lang_csharp) ||
+ isLanguage (Lang_java));
+}
+
+static void processAngleBracket (void)
+{
+ int c = cppGetc ();
+ if (c == '>') {
+ /* already found match for template */
+ } else if (languageSupportsGenerics () && c != '<' && c != '=') {
+ /* this is a template */
+ cppUngetc (c);
+ skipToMatch ("<>");
+ } else if (c == '<') {
+ /* skip "<<" or "<<=". */
+ c = cppGetc ();
+ if (c != '=') {
+ cppUngetc (c);
+ }
+ } else {
+ cppUngetc (c);
+ }
+}
+
+static void parseJavaAnnotation (statementInfo *const st)
+{
+ /*
+ * @Override
+ * @Target(ElementType.METHOD)
+ * @SuppressWarnings(value = "unchecked")
+ *
+ * But watch out for "@interface"!
+ */
+ tokenInfo *const token = activeToken (st);
+
+ int c = skipToNonWhite ();
+ readIdentifier (token, c);
+ if (token->keyword == KEYWORD_INTERFACE)
+ {
+ /* Oops. This was actually "@interface" defining a new annotation. */
+ processInterface (st);
+ }
+ else
+ {
+ /* Bug #1691412: skip any annotation arguments. */
+ skipParens ();
+ }
+}
+
+static int parseParens (statementInfo *const st, parenInfo *const info)
+{
+ tokenInfo *const token = activeToken (st);
+ unsigned int identifierCount = 0;
+ unsigned int depth = 1;
+ boolean firstChar = TRUE;
+ int nextChar = '\0';
+
+ CollectingSignature = TRUE;
+ vStringClear (Signature);
+ vStringPut (Signature, '(');
+ info->parameterCount = 1;
+ do
+ {
+ int c = skipToNonWhite ();
+ vStringPut (Signature, c);
+
+ switch (c)
+ {
+ case '&':
+ case '*':
+ info->isPointer = TRUE;
+ info->isKnrParamList = FALSE;
+ if (identifierCount == 0)
+ info->isParamList = FALSE;
+ initToken (token);
+ break;
+
+ case ':':
+ info->isKnrParamList = FALSE;
+ break;
+
+ case '.':
+ info->isNameCandidate = FALSE;
+ c = cppGetc ();
+ if (c != '.')
+ {
+ cppUngetc (c);
+ info->isKnrParamList = FALSE;
+ }
+ else
+ {
+ c = cppGetc ();
+ if (c != '.')
+ {
+ cppUngetc (c);
+ info->isKnrParamList = FALSE;
+ }
+ else
+ vStringCatS (Signature, "..."); /* variable arg list */
+ }
+ break;
+
+ case ',':
+ info->isNameCandidate = FALSE;
+ if (info->isKnrParamList)
+ {
+ ++info->parameterCount;
+ identifierCount = 0;
+ }
+ break;
+
+ case '=':
+ info->isKnrParamList = FALSE;
+ info->isNameCandidate = FALSE;
+ if (firstChar)
+ {
+ info->isParamList = FALSE;
+ skipMacro (st);
+ depth = 0;
+ }
+ break;
+
+ case '[':
+ info->isKnrParamList = FALSE;
+ skipToMatch ("[]");
+ break;
+
+ case '<':
+ info->isKnrParamList = FALSE;
+ processAngleBracket ();
+ break;
+
+ case ')':
+ if (firstChar)
+ info->parameterCount = 0;
+ --depth;
+ break;
+
+ case '(':
+ info->isKnrParamList = FALSE;
+ if (firstChar)
+ {
+ info->isNameCandidate = FALSE;
+ cppUngetc (c);
+ vStringClear (Signature);
+ skipMacro (st);
+ depth = 0;
+ vStringChop (Signature);
+ }
+ else if (isType (token, TOKEN_PAREN_NAME))
+ {
+ c = skipToNonWhite ();
+ if (c == '*') /* check for function pointer */
+ {
+ skipToMatch ("()");
+ c = skipToNonWhite ();
+ if (c == '(')
+ skipToMatch ("()");
+ else
+ cppUngetc (c);
+ }
+ else
+ {
+ cppUngetc (c);
+ cppUngetc ('(');
+ info->nestedArgs = TRUE;
+ }
+ }
+ else
+ ++depth;
+ break;
+
+ default:
+ if (c == '@' && isLanguage (Lang_java))
+ {
+ parseJavaAnnotation(st);
+ }
+ else if (isident1 (c))
+ {
+ if (++identifierCount > 1)
+ info->isKnrParamList = FALSE;
+ readIdentifier (token, c);
+ if (isType (token, TOKEN_NAME) && info->isNameCandidate)
+ token->type = TOKEN_PAREN_NAME;
+ else if (isType (token, TOKEN_KEYWORD))
+ {
+ if (token->keyword != KEYWORD_CONST &&
+ token->keyword != KEYWORD_VOLATILE)
+ {
+ info->isKnrParamList = FALSE;
+ info->isNameCandidate = FALSE;
+ }
+ }
+ }
+ else
+ {
+ info->isParamList = FALSE;
+ info->isKnrParamList = FALSE;
+ info->isNameCandidate = FALSE;
+ info->invalidContents = TRUE;
+ }
+ break;
+ }
+ firstChar = FALSE;
+ } while (! info->nestedArgs && depth > 0 &&
+ (info->isKnrParamList || info->isNameCandidate));
+
+ if (! info->nestedArgs) while (depth > 0)
+ {
+ skipToMatch ("()");
+ --depth;
+ }
+
+ if (! info->isNameCandidate)
+ initToken (token);
+
+ vStringTerminate (Signature);
+ if (info->isKnrParamList)
+ vStringClear (Signature);
+ CollectingSignature = FALSE;
+ return nextChar;
+}
+
+static void initParenInfo (parenInfo *const info)
+{
+ info->isPointer = FALSE;
+ info->isParamList = TRUE;
+ info->isKnrParamList = isLanguage (Lang_c);
+ info->isNameCandidate = TRUE;
+ info->invalidContents = FALSE;
+ info->nestedArgs = FALSE;
+ info->parameterCount = 0;
+}
+
+static void analyzeParens (statementInfo *const st)
+{
+ tokenInfo *const prev = prevToken (st, 1);
+
+ if (st->inFunction && ! st->assignment)
+ st->notVariable = TRUE;
+ if (! isType (prev, TOKEN_NONE)) /* in case of ignored enclosing macros */
+ {
+ tokenInfo *const token = activeToken (st);
+ parenInfo info;
+ int c;
+
+ initParenInfo (&info);
+ parseParens (st, &info);
+ c = skipToNonWhite ();
+ cppUngetc (c);
+ if (info.invalidContents)
+ reinitStatement (st, FALSE);
+ else if (info.isNameCandidate && isType (token, TOKEN_PAREN_NAME) &&
+ ! st->gotParenName &&
+ (! info.isParamList || ! st->haveQualifyingName ||
+ c == '(' ||
+ (c == '=' && st->implementation != IMP_VIRTUAL) ||
+ (st->declaration == DECL_NONE && isOneOf (c, ",;"))))
+ {
+ token->type = TOKEN_NAME;
+ processName (st);
+ st->gotParenName = TRUE;
+ if (! (c == '(' && info.nestedArgs))
+ st->isPointer = info.isPointer;
+ }
+ else if (! st->gotArgs && info.isParamList)
+ {
+ st->gotArgs = TRUE;
+ setToken (st, TOKEN_ARGS);
+ advanceToken (st);
+ if (st->scope != SCOPE_TYPEDEF)
+ analyzePostParens (st, &info);
+ }
+ else
+ setToken (st, TOKEN_NONE);
+ }
+}
+
+/*
+* Token parsing functions
+*/
+
+static void addContext (statementInfo *const st, const tokenInfo* const token)
+{
+ if (isType (token, TOKEN_NAME))
+ {
+ if (vStringLength (st->context->name) > 0)
+ {
+ if (isLanguage (Lang_c) || isLanguage (Lang_cpp))
+ vStringCatS (st->context->name, "::");
+ else if (isLanguage (Lang_java) || isLanguage (Lang_csharp))
+ vStringCatS (st->context->name, ".");
+ }
+ vStringCat (st->context->name, token->name);
+ st->context->type = TOKEN_NAME;
+ }
+}
+
+static boolean inheritingDeclaration (declType decl)
+{
+ /* C# supports inheritance for enums. C++0x will too, but not yet. */
+ if (decl == DECL_ENUM)
+ {
+ return (boolean) (isLanguage (Lang_csharp));
+ }
+ return (boolean) (
+ decl == DECL_CLASS ||
+ decl == DECL_STRUCT ||
+ decl == DECL_INTERFACE);
+}
+
+static void processColon (statementInfo *const st)
+{
+ int c = (isLanguage (Lang_cpp) ? cppGetc () : skipToNonWhite ());
+ const boolean doubleColon = (boolean) (c == ':');
+
+ if (doubleColon)
+ {
+ setToken (st, TOKEN_DOUBLE_COLON);
+ st->haveQualifyingName = FALSE;
+ }
+ else
+ {
+ cppUngetc (c);
+ if ((isLanguage (Lang_cpp) || isLanguage (Lang_csharp)) &&
+ inheritingDeclaration (st->declaration))
+ {
+ readParents (st, ':');
+ }
+ else if (parentDecl (st) == DECL_STRUCT)
+ {
+ c = skipToOneOf (",;");
+ if (c == ',')
+ setToken (st, TOKEN_COMMA);
+ else if (c == ';')
+ setToken (st, TOKEN_SEMICOLON);
+ }
+ else
+ {
+ const tokenInfo *const prev = prevToken (st, 1);
+ const tokenInfo *const prev2 = prevToken (st, 2);
+ if (prev->keyword == KEYWORD_DEFAULT ||
+ prev2->keyword == KEYWORD_CASE ||
+ st->parent != NULL)
+ {
+ reinitStatement (st, FALSE);
+ }
+ }
+ }
+}
+
+/* Skips over any initializing value which may follow an '=' character in a
+ * variable definition.
+ */
+static int skipInitializer (statementInfo *const st)
+{
+ boolean done = FALSE;
+ int c;
+
+ while (! done)
+ {
+ c = skipToNonWhite ();
+
+ if (c == EOF)
+ longjmp (Exception, (int) ExceptionFormattingError);
+ else switch (c)
+ {
+ case ',':
+ case ';': done = TRUE; break;
+
+ case '0':
+ if (st->implementation == IMP_VIRTUAL)
+ st->implementation = IMP_PURE_VIRTUAL;
+ break;
+
+ case '[': skipToMatch ("[]"); break;
+ case '(': skipToMatch ("()"); break;
+ case '{': skipToMatch ("{}"); break;
+ case '<': processAngleBracket(); break;
+
+ case '}':
+ if (insideEnumBody (st))
+ done = TRUE;
+ else if (! isBraceFormat ())
+ {
+ verbose ("%s: unexpected closing brace at line %lu\n",
+ getInputFileName (), getInputLineNumber ());
+ longjmp (Exception, (int) ExceptionBraceFormattingError);
+ }
+ break;
+
+ default: break;
+ }
+ }
+ return c;
+}
+
+static void processInitializer (statementInfo *const st)
+{
+ const boolean inEnumBody = insideEnumBody (st);
+ int c = cppGetc ();
+
+ if (c != '=')
+ {
+ cppUngetc (c);
+ c = skipInitializer (st);
+ st->assignment = TRUE;
+ if (c == ';')
+ setToken (st, TOKEN_SEMICOLON);
+ else if (c == ',')
+ setToken (st, TOKEN_COMMA);
+ else if (c == '}' && inEnumBody)
+ {
+ cppUngetc (c);
+ setToken (st, TOKEN_COMMA);
+ }
+ if (st->scope == SCOPE_EXTERN)
+ st->scope = SCOPE_GLOBAL;
+ }
+}
+
+static void parseIdentifier (statementInfo *const st, const int c)
+{
+ tokenInfo *const token = activeToken (st);
+
+ readIdentifier (token, c);
+ if (! isType (token, TOKEN_NONE))
+ processToken (token, st);
+}
+
+static void parseGeneralToken (statementInfo *const st, const int c)
+{
+ const tokenInfo *const prev = prevToken (st, 1);
+
+ if (isident1 (c) || (isLanguage (Lang_java) && isHighChar (c)))
+ {
+ parseIdentifier (st, c);
+ if (isType (st->context, TOKEN_NAME) &&
+ isType (activeToken (st), TOKEN_NAME) && isType (prev, TOKEN_NAME))
+ {
+ initToken (st->context);
+ }
+ }
+ else if (c == '.' || c == '-')
+ {
+ if (! st->assignment)
+ st->notVariable = TRUE;
+ if (c == '-')
+ {
+ int c2 = cppGetc ();
+ if (c2 != '>')
+ cppUngetc (c2);
+ }
+ }
+ else if (c == '!' || c == '>')
+ {
+ int c2 = cppGetc ();
+ if (c2 != '=')
+ cppUngetc (c2);
+ }
+ else if (c == '@' && isLanguage (Lang_java))
+ {
+ parseJavaAnnotation (st);
+ }
+ else if (isExternCDecl (st, c))
+ {
+ st->declaration = DECL_NOMANGLE;
+ st->scope = SCOPE_GLOBAL;
+ }
+}
+
+/* Reads characters from the pre-processor and assembles tokens, setting
+ * the current statement state.
+ */
+static void nextToken (statementInfo *const st)
+{
+ tokenInfo *token;
+ do
+ {
+ int c = skipToNonWhite ();
+ switch (c)
+ {
+ case EOF: longjmp (Exception, (int) ExceptionEOF); break;
+ case '(': analyzeParens (st); break;
+ case '<': processAngleBracket (); break;
+ case '*': st->haveQualifyingName = FALSE; break;
+ case ',': setToken (st, TOKEN_COMMA); break;
+ case ':': processColon (st); break;
+ case ';': setToken (st, TOKEN_SEMICOLON); break;
+ case '=': processInitializer (st); break;
+ case '[': skipToMatch ("[]"); break;
+ case '{': setToken (st, TOKEN_BRACE_OPEN); break;
+ case '}': setToken (st, TOKEN_BRACE_CLOSE); break;
+ default: parseGeneralToken (st, c); break;
+ }
+ token = activeToken (st);
+ } while (isType (token, TOKEN_NONE));
+}
+
+/*
+* Scanning support functions
+*/
+
+static statementInfo *CurrentStatement = NULL;
+
+static statementInfo *newStatement (statementInfo *const parent)
+{
+ statementInfo *const st = xMalloc (1, statementInfo);
+ unsigned int i;
+
+ for (i = 0 ; i < (unsigned int) NumTokens ; ++i)
+ st->token [i] = newToken ();
+
+ st->context = newToken ();
+ st->blockName = newToken ();
+ st->parentClasses = vStringNew ();
+
+ initStatement (st, parent);
+ CurrentStatement = st;
+
+ return st;
+}
+
+static void deleteStatement (void)
+{
+ statementInfo *const st = CurrentStatement;
+ statementInfo *const parent = st->parent;
+ unsigned int i;
+
+ for (i = 0 ; i < (unsigned int) NumTokens ; ++i)
+ {
+ deleteToken (st->token [i]); st->token [i] = NULL;
+ }
+ deleteToken (st->blockName); st->blockName = NULL;
+ deleteToken (st->context); st->context = NULL;
+ vStringDelete (st->parentClasses); st->parentClasses = NULL;
+ eFree (st);
+ CurrentStatement = parent;
+}
+
+static void deleteAllStatements (void)
+{
+ while (CurrentStatement != NULL)
+ deleteStatement ();
+}
+
+static boolean isStatementEnd (const statementInfo *const st)
+{
+ const tokenInfo *const token = activeToken (st);
+ boolean isEnd;
+
+ if (isType (token, TOKEN_SEMICOLON))
+ isEnd = TRUE;
+ else if (isType (token, TOKEN_BRACE_CLOSE))
+ /* Java and C# do not require semicolons to end a block. Neither do C++
+ * namespaces. All other blocks require a semicolon to terminate them.
+ */
+ isEnd = (boolean) (isLanguage (Lang_java) || isLanguage (Lang_csharp) ||
+ ! isContextualStatement (st));
+ else
+ isEnd = FALSE;
+
+ return isEnd;
+}
+
+static void checkStatementEnd (statementInfo *const st)
+{
+ const tokenInfo *const token = activeToken (st);
+
+ if (isType (token, TOKEN_COMMA))
+ reinitStatement (st, TRUE);
+ else if (isStatementEnd (st))
+ {
+ DebugStatement ( if (debug (DEBUG_PARSE)) printf ("<ES>"); )
+ reinitStatement (st, FALSE);
+ cppEndStatement ();
+ }
+ else
+ {
+ cppBeginStatement ();
+ advanceToken (st);
+ }
+}
+
+static void nest (statementInfo *const st, const unsigned int nestLevel)
+{
+ switch (st->declaration)
+ {
+ case DECL_CLASS:
+ case DECL_ENUM:
+ case DECL_INTERFACE:
+ case DECL_NAMESPACE:
+ case DECL_NOMANGLE:
+ case DECL_STRUCT:
+ case DECL_UNION:
+ createTags (nestLevel, st);
+ break;
+
+ case DECL_FUNCTION:
+ case DECL_TASK:
+ st->inFunction = TRUE;
+ /* fall through */
+ default:
+ if (includeTag (TAG_LOCAL, FALSE))
+ createTags (nestLevel, st);
+ else
+ skipToMatch ("{}");
+ break;
+ }
+ advanceToken (st);
+ setToken (st, TOKEN_BRACE_CLOSE);
+}
+
+static void tagCheck (statementInfo *const st)
+{
+ const tokenInfo *const token = activeToken (st);
+ const tokenInfo *const prev = prevToken (st, 1);
+ const tokenInfo *const prev2 = prevToken (st, 2);
+
+ switch (token->type)
+ {
+ case TOKEN_NAME:
+ if (insideEnumBody (st))
+ qualifyEnumeratorTag (st, token);
+ break;
+#if 0
+ case TOKEN_PACKAGE:
+ if (st->haveQualifyingName)
+ makeTag (token, st, FALSE, TAG_PACKAGE);
+ break;
+#endif
+ case TOKEN_BRACE_OPEN:
+ if (isType (prev, TOKEN_ARGS))
+ {
+ if (st->haveQualifyingName)
+ {
+ if (! isLanguage (Lang_vera))
+ st->declaration = DECL_FUNCTION;
+ if (isType (prev2, TOKEN_NAME))
+ copyToken (st->blockName, prev2);
+ qualifyFunctionTag (st, prev2);
+ }
+ }
+ else if (isContextualStatement (st) ||
+ st->declaration == DECL_NAMESPACE ||
+ st->declaration == DECL_PROGRAM)
+ {
+ if (isType (prev, TOKEN_NAME))
+ copyToken (st->blockName, prev);
+ else
+ {
+ /* For an anonymous struct or union we use a unique ID
+ * a number, so that the members can be found.
+ */
+ char buf [20]; /* length of "_anon" + digits + null */
+ sprintf (buf, "__anon%d", ++AnonymousID);
+ vStringCopyS (st->blockName->name, buf);
+ st->blockName->type = TOKEN_NAME;
+ st->blockName->keyword = KEYWORD_NONE;
+ }
+ qualifyBlockTag (st, prev);
+ }
+ else if (isLanguage (Lang_csharp))
+ makeTag (prev, st, FALSE, TAG_PROPERTY);
+ break;
+
+ case TOKEN_SEMICOLON:
+ case TOKEN_COMMA:
+ if (insideEnumBody (st))
+ ;
+ else if (isType (prev, TOKEN_NAME))
+ {
+ if (isContextualKeyword (prev2))
+ makeTag (prev, st, TRUE, TAG_EXTERN_VAR);
+ else
+ qualifyVariableTag (st, prev);
+ }
+ else if (isType (prev, TOKEN_ARGS) && isType (prev2, TOKEN_NAME))
+ {
+ if (st->isPointer)
+ qualifyVariableTag (st, prev2);
+ else
+ qualifyFunctionDeclTag (st, prev2);
+ }
+ if (isLanguage (Lang_java) && token->type == TOKEN_SEMICOLON && insideEnumBody (st))
+ {
+ /* In Java, after an initial enum-like part,
+ * a semicolon introduces a class-like part.
+ * See Bug #1730485 for the full rationale. */
+ st->parent->declaration = DECL_CLASS;
+ }
+ break;
+
+ default: break;
+ }
+}
+
+/* Parses the current file and decides whether to write out and tags that
+ * are discovered.
+ */
+static void createTags (const unsigned int nestLevel,
+ statementInfo *const parent)
+{
+ statementInfo *const st = newStatement (parent);
+
+ DebugStatement ( if (nestLevel > 0) debugParseNest (TRUE, nestLevel); )
+ while (TRUE)
+ {
+ tokenInfo *token;
+
+ nextToken (st);
+ token = activeToken (st);
+ if (isType (token, TOKEN_BRACE_CLOSE))
+ {
+ if (nestLevel > 0)
+ break;
+ else
+ {
+ verbose ("%s: unexpected closing brace at line %lu\n",
+ getInputFileName (), getInputLineNumber ());
+ longjmp (Exception, (int) ExceptionBraceFormattingError);
+ }
+ }
+ else if (isType (token, TOKEN_DOUBLE_COLON))
+ {
+ addContext (st, prevToken (st, 1));
+ advanceToken (st);
+ }
+ else
+ {
+ tagCheck (st);
+ if (isType (token, TOKEN_BRACE_OPEN))
+ nest (st, nestLevel + 1);
+ checkStatementEnd (st);
+ }
+ }
+ deleteStatement ();
+ DebugStatement ( if (nestLevel > 0) debugParseNest (FALSE, nestLevel - 1); )
+}
+
+static boolean findCTags (const unsigned int passCount)
+{
+ exception_t exception;
+ boolean retry;
+
+ Assert (passCount < 3);
+ cppInit ((boolean) (passCount > 1), isLanguage (Lang_csharp));
+ Signature = vStringNew ();
+
+ exception = (exception_t) setjmp (Exception);
+ retry = FALSE;
+ if (exception == ExceptionNone)
+ createTags (0, NULL);
+ else
+ {
+ deleteAllStatements ();
+ if (exception == ExceptionBraceFormattingError && passCount == 1)
+ {
+ retry = TRUE;
+ verbose ("%s: retrying file with fallback brace matching algorithm\n",
+ getInputFileName ());
+ }
+ }
+ vStringDelete (Signature);
+ cppTerminate ();
+ return retry;
+}
+
+static void buildKeywordHash (const langType language, unsigned int idx)
+{
+ const size_t count = sizeof (KeywordTable) / sizeof (KeywordTable [0]);
+ size_t i;
+ for (i = 0 ; i < count ; ++i)
+ {
+ const keywordDesc* const p = &KeywordTable [i];
+ if (p->isValid [idx])
+ addKeyword (p->name, language, (int) p->id);
+ }
+}
+
+static void initializeCParser (const langType language)
+{
+ Lang_c = language;
+ buildKeywordHash (language, 0);
+}
+
+static void initializeCppParser (const langType language)
+{
+ Lang_cpp = language;
+ buildKeywordHash (language, 1);
+}
+
+static void initializeCsharpParser (const langType language)
+{
+ Lang_csharp = language;
+ buildKeywordHash (language, 2);
+}
+
+static void initializeJavaParser (const langType language)
+{
+ Lang_java = language;
+ buildKeywordHash (language, 3);
+}
+
+static void initializeVeraParser (const langType language)
+{
+ Lang_vera = language;
+ buildKeywordHash (language, 4);
+}
+
+extern parserDefinition* CParser (void)
+{
+ static const char *const extensions [] = { "c", NULL };
+ parserDefinition* def = parserNew ("C");
+ def->kinds = CKinds;
+ def->kindCount = KIND_COUNT (CKinds);
+ def->extensions = extensions;
+ def->parser2 = findCTags;
+ def->initialize = initializeCParser;
+ return def;
+}
+
+extern parserDefinition* CppParser (void)
+{
+ static const char *const extensions [] = {
+ "c++", "cc", "cp", "cpp", "cxx", "h", "h++", "hh", "hp", "hpp", "hxx",
+#ifndef CASE_INSENSITIVE_FILENAMES
+ "C", "H",
+#endif
+ NULL
+ };
+ parserDefinition* def = parserNew ("C++");
+ def->kinds = CKinds;
+ def->kindCount = KIND_COUNT (CKinds);
+ def->extensions = extensions;
+ def->parser2 = findCTags;
+ def->initialize = initializeCppParser;
+ return def;
+}
+
+extern parserDefinition* CsharpParser (void)
+{
+ static const char *const extensions [] = { "cs", NULL };
+ parserDefinition* def = parserNew ("C#");
+ def->kinds = CsharpKinds;
+ def->kindCount = KIND_COUNT (CsharpKinds);
+ def->extensions = extensions;
+ def->parser2 = findCTags;
+ def->initialize = initializeCsharpParser;
+ return def;
+}
+
+extern parserDefinition* JavaParser (void)
+{
+ static const char *const extensions [] = { "java", NULL };
+ parserDefinition* def = parserNew ("Java");
+ def->kinds = JavaKinds;
+ def->kindCount = KIND_COUNT (JavaKinds);
+ def->extensions = extensions;
+ def->parser2 = findCTags;
+ def->initialize = initializeJavaParser;
+ return def;
+}
+
+extern parserDefinition* VeraParser (void)
+{
+ static const char *const extensions [] = { "vr", "vri", "vrh", NULL };
+ parserDefinition* def = parserNew ("Vera");
+ def->kinds = VeraKinds;
+ def->kindCount = KIND_COUNT (VeraKinds);
+ def->extensions = extensions;
+ def->parser2 = findCTags;
+ def->initialize = initializeVeraParser;
+ return def;
+}
+
+/* vi:set tabstop=4 shiftwidth=4 noexpandtab: */
diff --git a/cobol.c b/cobol.c
new file mode 100644
index 0000000..e3cdb3e
--- /dev/null
+++ b/cobol.c
@@ -0,0 +1,50 @@
+/*
+* $Id: cobol.c 443 2006-05-30 04:37:13Z darren $
+*
+* Copyright (c) 2000-2003, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* This module contains functions for generating tags for COBOL language
+* files.
+*/
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+#include "parse.h"
+
+/*
+* FUNCTION DEFINITIONS
+*/
+
+static void installCobolRegex (const langType language)
+{
+ addTagRegex (language, "^[ \t]*[0-9]+[ \t]+([A-Z0-9][A-Z0-9-]*)[ \t]+(BLANK|OCCURS|IS|JUST|PIC|REDEFINES|RENAMES|SIGN|SYNC|USAGE|VALUE)",
+ "\\1", "d,data,data items", "i");
+ addTagRegex (language, "^[ \t]*[FSR]D[ \t]+([A-Z0-9][A-Z0-9-]*)\\.",
+ "\\1", "f,file,file descriptions (FD, SD, RD)", "i");
+ addTagRegex (language, "^[ \t]*[0-9]+[ \t]+([A-Z0-9][A-Z0-9-]*)\\.",
+ "\\1", "g,group,group items", "i");
+ addTagRegex (language, "^[ \t]*([A-Z0-9][A-Z0-9-]*)\\.",
+ "\\1", "p,paragraph,paragraphs", "i");
+ addTagRegex (language, "^[ \t]*PROGRAM-ID\\.[ \t]+([A-Z0-9][A-Z0-9-]*)\\.",
+ "\\1", "P,program,program ids", "i");
+ addTagRegex (language, "^[ \t]*([A-Z0-9][A-Z0-9-]*)[ \t]+SECTION\\.",
+ "\\1", "s,section,sections", "i");
+}
+
+extern parserDefinition* CobolParser ()
+{
+ static const char *const extensions [] = {
+ "cbl", "cob", "CBL", "COB", NULL };
+ parserDefinition* def = parserNew ("Cobol");
+ def->extensions = extensions;
+ def->initialize = installCobolRegex;
+ def->regex = TRUE;
+ return def;
+}
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/config.h.in b/config.h.in
new file mode 100644
index 0000000..a45375c
--- /dev/null
+++ b/config.h.in
@@ -0,0 +1,277 @@
+/* config.h.in. Generated from configure.ac by autoheader. */
+
+/* Define this label if your system uses case-insensitive file names */
+#undef CASE_INSENSITIVE_FILENAMES
+
+/* Define this label if you wish to check the regcomp() function at run time
+ for correct behavior. This function is currently broken on Cygwin. */
+#undef CHECK_REGCOMP
+
+/* You can define this label to be a string containing the name of a
+ site-specific configuration file containing site-wide default options. The
+ files /etc/ctags.conf and /usr/local/etc/ctags.conf are already checked, so
+ only define one here if you need a file somewhere else. */
+#undef CUSTOM_CONFIGURATION_FILE
+
+
+/* Define this as desired.
+ * 1: Original ctags format
+ * 2: Extended ctags format with extension flags in EX-style comment.
+ */
+#define DEFAULT_FILE_FORMAT 2
+
+
+
+/* Define this label to use the system sort utility (which is probably more
+* efficient) over the internal sorting algorithm.
+*/
+#ifndef INTERNAL_SORT
+# undef EXTERNAL_SORT
+#endif
+
+
+/* Define to 1 if you have the `chmod' function. */
+#undef HAVE_CHMOD
+
+/* Define to 1 if you have the `chsize' function. */
+#undef HAVE_CHSIZE
+
+/* Define to 1 if you have the `clock' function. */
+#undef HAVE_CLOCK
+
+/* Define to 1 if you have the <dirent.h> header file. */
+#undef HAVE_DIRENT_H
+
+/* Define to 1 if you have the <fcntl.h> header file. */
+#undef HAVE_FCNTL_H
+
+/* Define to 1 if you have the `fgetpos' function. */
+#undef HAVE_FGETPOS
+
+/* Define to 1 if you have the `findfirst' function. */
+#undef HAVE_FINDFIRST
+
+/* Define to 1 if you have the `fnmatch' function. */
+#undef HAVE_FNMATCH
+
+/* Define to 1 if you have the <fnmatch.h> header file. */
+#undef HAVE_FNMATCH_H
+
+/* Define to 1 if you have the `ftruncate' function. */
+#undef HAVE_FTRUNCATE
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#undef HAVE_INTTYPES_H
+
+/* Define to 1 if you have the <memory.h> header file. */
+#undef HAVE_MEMORY_H
+
+/* Define to 1 if you have the `mkstemp' function. */
+#undef HAVE_MKSTEMP
+
+/* Define to 1 if you have the `opendir' function. */
+#undef HAVE_OPENDIR
+
+/* Define to 1 if you have the `putenv' function. */
+#undef HAVE_PUTENV
+
+/* Define to 1 if you have the `regcomp' function. */
+#undef HAVE_REGCOMP
+
+/* Define to 1 if you have the `remove' function. */
+#undef HAVE_REMOVE
+
+/* Define to 1 if you have the `setenv' function. */
+#undef HAVE_SETENV
+
+/* Define to 1 if you have the <stat.h> header file. */
+#undef HAVE_STAT_H
+
+/* Define this macro if the field "st_ino" exists in struct stat in
+ <sys/stat.h>. */
+#undef HAVE_STAT_ST_INO
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#undef HAVE_STDINT_H
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#undef HAVE_STDLIB_H
+
+/* Define to 1 if you have the `strcasecmp' function. */
+#undef HAVE_STRCASECMP
+
+/* Define to 1 if you have the `strerror' function. */
+#undef HAVE_STRERROR
+
+/* Define to 1 if you have the `stricmp' function. */
+#undef HAVE_STRICMP
+
+/* Define to 1 if you have the <strings.h> header file. */
+#undef HAVE_STRINGS_H
+
+/* Define to 1 if you have the <string.h> header file. */
+#undef HAVE_STRING_H
+
+/* Define to 1 if you have the `strncasecmp' function. */
+#undef HAVE_STRNCASECMP
+
+/* Define to 1 if you have the `strnicmp' function. */
+#undef HAVE_STRNICMP
+
+/* Define to 1 if you have the `strstr' function. */
+#undef HAVE_STRSTR
+
+/* Define to 1 if you have the <sys/dir.h> header file. */
+#undef HAVE_SYS_DIR_H
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#undef HAVE_SYS_STAT_H
+
+/* Define to 1 if you have the <sys/times.h> header file. */
+#undef HAVE_SYS_TIMES_H
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#undef HAVE_SYS_TYPES_H
+
+/* Define to 1 if you have the `tempnam' function. */
+#undef HAVE_TEMPNAM
+
+/* Define to 1 if you have the `times' function. */
+#undef HAVE_TIMES
+
+/* Define to 1 if you have the <time.h> header file. */
+#undef HAVE_TIME_H
+
+/* Define to 1 if you have the `truncate' function. */
+#undef HAVE_TRUNCATE
+
+/* Define to 1 if you have the <types.h> header file. */
+#undef HAVE_TYPES_H
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#undef HAVE_UNISTD_H
+
+/* Define to 1 if you have the `_findfirst' function. */
+#undef HAVE__FINDFIRST
+
+/* Define as the maximum integer on your system if not defined <limits.h>. */
+#undef INT_MAX
+
+/* Define to the appropriate size for tmpnam() if <stdio.h> does not define
+ this. */
+#undef L_tmpnam
+
+/* Define this label if you want macro tags (defined lables) to use patterns
+ in the EX command by default (original ctags behavior is to use line
+ numbers). */
+#undef MACROS_USE_PATTERNS
+
+/* If you receive error or warning messages indicating that you are missing a
+ prototype for, or a type mismatch using, the following function, define
+ this label and remake. */
+#undef NEED_PROTO_FGETPOS
+
+/* If you receive error or warning messages indicating that you are missing a
+ prototype for, or a type mismatch using, the following function, define
+ this label and remake. */
+#undef NEED_PROTO_FTRUNCATE
+
+/* If you receive error or warning messages indicating that you are missing a
+ prototype for, or a type mismatch using, the following function, define
+ this label and remake. */
+#undef NEED_PROTO_GETENV
+
+/* If you receive error or warning messages indicating that you are missing a
+ prototype for, or a type mismatch using, the following function, define
+ this label and remake. */
+#undef NEED_PROTO_LSTAT
+
+/* If you receive error or warning messages indicating that you are missing a
+ prototype for, or a type mismatch using, the following function, define
+ this label and remake. */
+#undef NEED_PROTO_MALLOC
+
+/* If you receive error or warning messages indicating that you are missing a
+ prototype for, or a type mismatch using, the following function, define
+ this label and remake. */
+#undef NEED_PROTO_REMOVE
+
+/* If you receive error or warning messages indicating that you are missing a
+ prototype for, or a type mismatch using, the following function, define
+ this label and remake. */
+#undef NEED_PROTO_STAT
+
+/* If you receive error or warning messages indicating that you are missing a
+ prototype for, or a type mismatch using, the following function, define
+ this label and remake. */
+#undef NEED_PROTO_TRUNCATE
+
+/* If you receive error or warning messages indicating that you are missing a
+ prototype for, or a type mismatch using, the following function, define
+ this label and remake. */
+#undef NEED_PROTO_UNLINK
+
+/* Define this is you have a prototype for putenv() in <stdlib.h>, but doesn't
+ declare its argument as "const char *". */
+#undef NON_CONST_PUTENV_PROTOTYPE
+
+/* Package name. */
+#undef PACKAGE
+
+/* Define to the address where bug reports for this package should be sent. */
+#undef PACKAGE_BUGREPORT
+
+/* Define to the full name of this package. */
+#undef PACKAGE_NAME
+
+/* Define to the full name and version of this package. */
+#undef PACKAGE_STRING
+
+/* Define to the one symbol short name of this package. */
+#undef PACKAGE_TARNAME
+
+/* Define to the version of this package. */
+#undef PACKAGE_VERSION
+
+/* Define this label if regcomp() is broken. */
+#undef REGCOMP_BROKEN
+
+/* Define this value used by fseek() appropriately if <stdio.h> (or <unistd.h>
+ on SunOS 4.1.x) does not define them. */
+#undef SEEK_SET
+
+/* Define to 1 if you have the ANSI C header files. */
+#undef STDC_HEADERS
+
+/* Define this label if your system supports starting scripts with a line of
+ the form "#! /bin/sh" to select the interpreter to use for the script. */
+#undef SYS_INTERPRETER
+
+/* If you wish to change the directory in which temporary files are stored,
+ define this label to the directory desired. */
+#undef TMPDIR
+
+/* Package version. */
+#undef VERSION
+
+/* This corrects the problem of missing prototypes for certain functions in
+ some GNU installations (e.g. SunOS 4.1.x). */
+#undef __USE_FIXED_PROTOTYPES__
+
+/* Define to the appropriate type if <time.h> does not define this. */
+#undef clock_t
+
+/* Define to empty if `const' does not conform to ANSI C. */
+#undef const
+
+/* Define to long if <stdio.h> does not define this. */
+#undef fpos_t
+
+/* Define to `long int' if <sys/types.h> does not define. */
+#undef off_t
+
+/* Define remove to unlink if you have unlink(), but not remove(). */
+#undef remove
+
+/* Define to `unsigned int' if <sys/types.h> does not define. */
+#undef size_t
diff --git a/configure b/configure
new file mode 100755
index 0000000..f137cf1
--- /dev/null
+++ b/configure
@@ -0,0 +1,7704 @@
+#! /bin/sh
+# Guess values for system-dependent variables and create Makefiles.
+# Generated by GNU Autoconf 2.61.
+#
+# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
+# 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
+# This configure script is free software; the Free Software Foundation
+# gives unlimited permission to copy, distribute and modify it.
+## --------------------- ##
+## M4sh Initialization. ##
+## --------------------- ##
+
+# Be more Bourne compatible
+DUALCASE=1; export DUALCASE # for MKS sh
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
+ emulate sh
+ NULLCMD=:
+ # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which
+ # is contrary to our usage. Disable this feature.
+ alias -g '${1+"$@"}'='"$@"'
+ setopt NO_GLOB_SUBST
+else
+ case `(set -o) 2>/dev/null` in
+ *posix*) set -o posix ;;
+esac
+
+fi
+
+
+
+
+# PATH needs CR
+# Avoid depending upon Character Ranges.
+as_cr_letters='abcdefghijklmnopqrstuvwxyz'
+as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+as_cr_Letters=$as_cr_letters$as_cr_LETTERS
+as_cr_digits='0123456789'
+as_cr_alnum=$as_cr_Letters$as_cr_digits
+
+# The user is always right.
+if test "${PATH_SEPARATOR+set}" != set; then
+ echo "#! /bin/sh" >conf$$.sh
+ echo "exit 0" >>conf$$.sh
+ chmod +x conf$$.sh
+ if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then
+ PATH_SEPARATOR=';'
+ else
+ PATH_SEPARATOR=:
+ fi
+ rm -f conf$$.sh
+fi
+
+# Support unset when possible.
+if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then
+ as_unset=unset
+else
+ as_unset=false
+fi
+
+
+# IFS
+# We need space, tab and new line, in precisely that order. Quoting is
+# there to prevent editors from complaining about space-tab.
+# (If _AS_PATH_WALK were called with IFS unset, it would disable word
+# splitting by setting IFS to empty value.)
+as_nl='
+'
+IFS=" "" $as_nl"
+
+# Find who we are. Look in the path if we contain no directory separator.
+case $0 in
+ *[\\/]* ) as_myself=$0 ;;
+ *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
+done
+IFS=$as_save_IFS
+
+ ;;
+esac
+# We did not find ourselves, most probably we were run as `sh COMMAND'
+# in which case we are not to be found in the path.
+if test "x$as_myself" = x; then
+ as_myself=$0
+fi
+if test ! -f "$as_myself"; then
+ echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2
+ { (exit 1); exit 1; }
+fi
+
+# Work around bugs in pre-3.0 UWIN ksh.
+for as_var in ENV MAIL MAILPATH
+do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var
+done
+PS1='$ '
+PS2='> '
+PS4='+ '
+
+# NLS nuisances.
+for as_var in \
+ LANG LANGUAGE LC_ADDRESS LC_ALL LC_COLLATE LC_CTYPE LC_IDENTIFICATION \
+ LC_MEASUREMENT LC_MESSAGES LC_MONETARY LC_NAME LC_NUMERIC LC_PAPER \
+ LC_TELEPHONE LC_TIME
+do
+ if (set +x; test -z "`(eval $as_var=C; export $as_var) 2>&1`"); then
+ eval $as_var=C; export $as_var
+ else
+ ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var
+ fi
+done
+
+# Required to use basename.
+if expr a : '\(a\)' >/dev/null 2>&1 &&
+ test "X`expr 00001 : '.*\(...\)'`" = X001; then
+ as_expr=expr
+else
+ as_expr=false
+fi
+
+if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then
+ as_basename=basename
+else
+ as_basename=false
+fi
+
+
+# Name of the executable.
+as_me=`$as_basename -- "$0" ||
+$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
+ X"$0" : 'X\(//\)$' \| \
+ X"$0" : 'X\(/\)' \| . 2>/dev/null ||
+echo X/"$0" |
+ sed '/^.*\/\([^/][^/]*\)\/*$/{
+ s//\1/
+ q
+ }
+ /^X\/\(\/\/\)$/{
+ s//\1/
+ q
+ }
+ /^X\/\(\/\).*/{
+ s//\1/
+ q
+ }
+ s/.*/./; q'`
+
+# CDPATH.
+$as_unset CDPATH
+
+
+if test "x$CONFIG_SHELL" = x; then
+ if (eval ":") 2>/dev/null; then
+ as_have_required=yes
+else
+ as_have_required=no
+fi
+
+ if test $as_have_required = yes && (eval ":
+(as_func_return () {
+ (exit \$1)
+}
+as_func_success () {
+ as_func_return 0
+}
+as_func_failure () {
+ as_func_return 1
+}
+as_func_ret_success () {
+ return 0
+}
+as_func_ret_failure () {
+ return 1
+}
+
+exitcode=0
+if as_func_success; then
+ :
+else
+ exitcode=1
+ echo as_func_success failed.
+fi
+
+if as_func_failure; then
+ exitcode=1
+ echo as_func_failure succeeded.
+fi
+
+if as_func_ret_success; then
+ :
+else
+ exitcode=1
+ echo as_func_ret_success failed.
+fi
+
+if as_func_ret_failure; then
+ exitcode=1
+ echo as_func_ret_failure succeeded.
+fi
+
+if ( set x; as_func_ret_success y && test x = \"\$1\" ); then
+ :
+else
+ exitcode=1
+ echo positional parameters were not saved.
+fi
+
+test \$exitcode = 0) || { (exit 1); exit 1; }
+
+(
+ as_lineno_1=\$LINENO
+ as_lineno_2=\$LINENO
+ test \"x\$as_lineno_1\" != \"x\$as_lineno_2\" &&
+ test \"x\`expr \$as_lineno_1 + 1\`\" = \"x\$as_lineno_2\") || { (exit 1); exit 1; }
+") 2> /dev/null; then
+ :
+else
+ as_candidate_shells=
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ case $as_dir in
+ /*)
+ for as_base in sh bash ksh sh5; do
+ as_candidate_shells="$as_candidate_shells $as_dir/$as_base"
+ done;;
+ esac
+done
+IFS=$as_save_IFS
+
+
+ for as_shell in $as_candidate_shells $SHELL; do
+ # Try only shells that exist, to save several forks.
+ if { test -f "$as_shell" || test -f "$as_shell.exe"; } &&
+ { ("$as_shell") 2> /dev/null <<\_ASEOF
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
+ emulate sh
+ NULLCMD=:
+ # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which
+ # is contrary to our usage. Disable this feature.
+ alias -g '${1+"$@"}'='"$@"'
+ setopt NO_GLOB_SUBST
+else
+ case `(set -o) 2>/dev/null` in
+ *posix*) set -o posix ;;
+esac
+
+fi
+
+
+:
+_ASEOF
+}; then
+ CONFIG_SHELL=$as_shell
+ as_have_required=yes
+ if { "$as_shell" 2> /dev/null <<\_ASEOF
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
+ emulate sh
+ NULLCMD=:
+ # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which
+ # is contrary to our usage. Disable this feature.
+ alias -g '${1+"$@"}'='"$@"'
+ setopt NO_GLOB_SUBST
+else
+ case `(set -o) 2>/dev/null` in
+ *posix*) set -o posix ;;
+esac
+
+fi
+
+
+:
+(as_func_return () {
+ (exit $1)
+}
+as_func_success () {
+ as_func_return 0
+}
+as_func_failure () {
+ as_func_return 1
+}
+as_func_ret_success () {
+ return 0
+}
+as_func_ret_failure () {
+ return 1
+}
+
+exitcode=0
+if as_func_success; then
+ :
+else
+ exitcode=1
+ echo as_func_success failed.
+fi
+
+if as_func_failure; then
+ exitcode=1
+ echo as_func_failure succeeded.
+fi
+
+if as_func_ret_success; then
+ :
+else
+ exitcode=1
+ echo as_func_ret_success failed.
+fi
+
+if as_func_ret_failure; then
+ exitcode=1
+ echo as_func_ret_failure succeeded.
+fi
+
+if ( set x; as_func_ret_success y && test x = "$1" ); then
+ :
+else
+ exitcode=1
+ echo positional parameters were not saved.
+fi
+
+test $exitcode = 0) || { (exit 1); exit 1; }
+
+(
+ as_lineno_1=$LINENO
+ as_lineno_2=$LINENO
+ test "x$as_lineno_1" != "x$as_lineno_2" &&
+ test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2") || { (exit 1); exit 1; }
+
+_ASEOF
+}; then
+ break
+fi
+
+fi
+
+ done
+
+ if test "x$CONFIG_SHELL" != x; then
+ for as_var in BASH_ENV ENV
+ do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var
+ done
+ export CONFIG_SHELL
+ exec "$CONFIG_SHELL" "$as_myself" ${1+"$@"}
+fi
+
+
+ if test $as_have_required = no; then
+ echo This script requires a shell more modern than all the
+ echo shells that I found on your system. Please install a
+ echo modern shell, or manually run the script under such a
+ echo shell if you do have one.
+ { (exit 1); exit 1; }
+fi
+
+
+fi
+
+fi
+
+
+
+(eval "as_func_return () {
+ (exit \$1)
+}
+as_func_success () {
+ as_func_return 0
+}
+as_func_failure () {
+ as_func_return 1
+}
+as_func_ret_success () {
+ return 0
+}
+as_func_ret_failure () {
+ return 1
+}
+
+exitcode=0
+if as_func_success; then
+ :
+else
+ exitcode=1
+ echo as_func_success failed.
+fi
+
+if as_func_failure; then
+ exitcode=1
+ echo as_func_failure succeeded.
+fi
+
+if as_func_ret_success; then
+ :
+else
+ exitcode=1
+ echo as_func_ret_success failed.
+fi
+
+if as_func_ret_failure; then
+ exitcode=1
+ echo as_func_ret_failure succeeded.
+fi
+
+if ( set x; as_func_ret_success y && test x = \"\$1\" ); then
+ :
+else
+ exitcode=1
+ echo positional parameters were not saved.
+fi
+
+test \$exitcode = 0") || {
+ echo No shell found that supports shell functions.
+ echo Please tell autoconf@gnu.org about your system,
+ echo including any error possibly output before this
+ echo message
+}
+
+
+
+ as_lineno_1=$LINENO
+ as_lineno_2=$LINENO
+ test "x$as_lineno_1" != "x$as_lineno_2" &&
+ test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2" || {
+
+ # Create $as_me.lineno as a copy of $as_myself, but with $LINENO
+ # uniformly replaced by the line number. The first 'sed' inserts a
+ # line-number line after each line using $LINENO; the second 'sed'
+ # does the real work. The second script uses 'N' to pair each
+ # line-number line with the line containing $LINENO, and appends
+ # trailing '-' during substitution so that $LINENO is not a special
+ # case at line end.
+ # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the
+ # scripts with optimization help from Paolo Bonzini. Blame Lee
+ # E. McMahon (1931-1989) for sed's syntax. :-)
+ sed -n '
+ p
+ /[$]LINENO/=
+ ' <$as_myself |
+ sed '
+ s/[$]LINENO.*/&-/
+ t lineno
+ b
+ :lineno
+ N
+ :loop
+ s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/
+ t loop
+ s/-\n.*//
+ ' >$as_me.lineno &&
+ chmod +x "$as_me.lineno" ||
+ { echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2
+ { (exit 1); exit 1; }; }
+
+ # Don't try to exec as it changes $[0], causing all sort of problems
+ # (the dirname of $[0] is not the place where we might find the
+ # original and so on. Autoconf is especially sensitive to this).
+ . "./$as_me.lineno"
+ # Exit status is that of the last command.
+ exit
+}
+
+
+if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then
+ as_dirname=dirname
+else
+ as_dirname=false
+fi
+
+ECHO_C= ECHO_N= ECHO_T=
+case `echo -n x` in
+-n*)
+ case `echo 'x\c'` in
+ *c*) ECHO_T=' ';; # ECHO_T is single tab character.
+ *) ECHO_C='\c';;
+ esac;;
+*)
+ ECHO_N='-n';;
+esac
+
+if expr a : '\(a\)' >/dev/null 2>&1 &&
+ test "X`expr 00001 : '.*\(...\)'`" = X001; then
+ as_expr=expr
+else
+ as_expr=false
+fi
+
+rm -f conf$$ conf$$.exe conf$$.file
+if test -d conf$$.dir; then
+ rm -f conf$$.dir/conf$$.file
+else
+ rm -f conf$$.dir
+ mkdir conf$$.dir
+fi
+echo >conf$$.file
+if ln -s conf$$.file conf$$ 2>/dev/null; then
+ as_ln_s='ln -s'
+ # ... but there are two gotchas:
+ # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail.
+ # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable.
+ # In both cases, we have to default to `cp -p'.
+ ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe ||
+ as_ln_s='cp -p'
+elif ln conf$$.file conf$$ 2>/dev/null; then
+ as_ln_s=ln
+else
+ as_ln_s='cp -p'
+fi
+rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file
+rmdir conf$$.dir 2>/dev/null
+
+if mkdir -p . 2>/dev/null; then
+ as_mkdir_p=:
+else
+ test -d ./-p && rmdir ./-p
+ as_mkdir_p=false
+fi
+
+if test -x / >/dev/null 2>&1; then
+ as_test_x='test -x'
+else
+ if ls -dL / >/dev/null 2>&1; then
+ as_ls_L_option=L
+ else
+ as_ls_L_option=
+ fi
+ as_test_x='
+ eval sh -c '\''
+ if test -d "$1"; then
+ test -d "$1/.";
+ else
+ case $1 in
+ -*)set "./$1";;
+ esac;
+ case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in
+ ???[sx]*):;;*)false;;esac;fi
+ '\'' sh
+ '
+fi
+as_executable_p=$as_test_x
+
+# Sed expression to map a string onto a valid CPP name.
+as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
+
+# Sed expression to map a string onto a valid variable name.
+as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'"
+
+
+
+exec 7<&0 </dev/null 6>&1
+
+# Name of the host.
+# hostname on some systems (SVR3.2, Linux) returns a bogus exit status,
+# so uname gets run too.
+ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q`
+
+#
+# Initializations.
+#
+ac_default_prefix=/usr/local
+ac_clean_files=
+ac_config_libobj_dir=.
+LIBOBJS=
+cross_compiling=no
+subdirs=
+MFLAGS=
+MAKEFLAGS=
+SHELL=${CONFIG_SHELL-/bin/sh}
+
+# Identity of this package.
+PACKAGE_NAME=
+PACKAGE_TARNAME=
+PACKAGE_VERSION=
+PACKAGE_STRING=
+PACKAGE_BUGREPORT=
+
+ac_unique_file="ctags.h"
+ac_header_list=
+# Factoring default headers for most tests.
+ac_includes_default="\
+#include <stdio.h>
+#ifdef HAVE_SYS_TYPES_H
+# include <sys/types.h>
+#endif
+#ifdef HAVE_SYS_STAT_H
+# include <sys/stat.h>
+#endif
+#ifdef STDC_HEADERS
+# include <stdlib.h>
+# include <stddef.h>
+#else
+# ifdef HAVE_STDLIB_H
+# include <stdlib.h>
+# endif
+#endif
+#ifdef HAVE_STRING_H
+# if !defined STDC_HEADERS && defined HAVE_MEMORY_H
+# include <memory.h>
+# endif
+# include <string.h>
+#endif
+#ifdef HAVE_STRINGS_H
+# include <strings.h>
+#endif
+#ifdef HAVE_INTTYPES_H
+# include <inttypes.h>
+#endif
+#ifdef HAVE_STDINT_H
+# include <stdint.h>
+#endif
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif"
+
+ac_subst_vars='SHELL
+PATH_SEPARATOR
+PACKAGE_NAME
+PACKAGE_TARNAME
+PACKAGE_VERSION
+PACKAGE_STRING
+PACKAGE_BUGREPORT
+exec_prefix
+prefix
+program_transform_name
+bindir
+sbindir
+libexecdir
+datarootdir
+datadir
+sysconfdir
+sharedstatedir
+localstatedir
+includedir
+oldincludedir
+docdir
+infodir
+htmldir
+dvidir
+pdfdir
+psdir
+libdir
+localedir
+mandir
+DEFS
+ECHO_C
+ECHO_N
+ECHO_T
+LIBS
+build_alias
+host_alias
+target_alias
+install_targets
+CC
+CFLAGS
+LDFLAGS
+CPPFLAGS
+ac_ct_CC
+EXEEXT
+OBJEXT
+LN_S
+STRIP
+sort_found
+CPP
+GREP
+EGREP
+LIBOBJS
+LTLIBOBJS'
+ac_subst_files=''
+ ac_precious_vars='build_alias
+host_alias
+target_alias
+CC
+CFLAGS
+LDFLAGS
+LIBS
+CPPFLAGS
+CPP'
+
+
+# Initialize some variables set by options.
+ac_init_help=
+ac_init_version=false
+# The variables have the same names as the options, with
+# dashes changed to underlines.
+cache_file=/dev/null
+exec_prefix=NONE
+no_create=
+no_recursion=
+prefix=NONE
+program_prefix=NONE
+program_suffix=NONE
+program_transform_name=s,x,x,
+silent=
+site=
+srcdir=
+verbose=
+x_includes=NONE
+x_libraries=NONE
+
+# Installation directory options.
+# These are left unexpanded so users can "make install exec_prefix=/foo"
+# and all the variables that are supposed to be based on exec_prefix
+# by default will actually change.
+# Use braces instead of parens because sh, perl, etc. also accept them.
+# (The list follows the same order as the GNU Coding Standards.)
+bindir='${exec_prefix}/bin'
+sbindir='${exec_prefix}/sbin'
+libexecdir='${exec_prefix}/libexec'
+datarootdir='${prefix}/share'
+datadir='${datarootdir}'
+sysconfdir='${prefix}/etc'
+sharedstatedir='${prefix}/com'
+localstatedir='${prefix}/var'
+includedir='${prefix}/include'
+oldincludedir='/usr/include'
+docdir='${datarootdir}/doc/${PACKAGE}'
+infodir='${datarootdir}/info'
+htmldir='${docdir}'
+dvidir='${docdir}'
+pdfdir='${docdir}'
+psdir='${docdir}'
+libdir='${exec_prefix}/lib'
+localedir='${datarootdir}/locale'
+mandir='${datarootdir}/man'
+
+ac_prev=
+ac_dashdash=
+for ac_option
+do
+ # If the previous option needs an argument, assign it.
+ if test -n "$ac_prev"; then
+ eval $ac_prev=\$ac_option
+ ac_prev=
+ continue
+ fi
+
+ case $ac_option in
+ *=*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;;
+ *) ac_optarg=yes ;;
+ esac
+
+ # Accept the important Cygnus configure options, so we can diagnose typos.
+
+ case $ac_dashdash$ac_option in
+ --)
+ ac_dashdash=yes ;;
+
+ -bindir | --bindir | --bindi | --bind | --bin | --bi)
+ ac_prev=bindir ;;
+ -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*)
+ bindir=$ac_optarg ;;
+
+ -build | --build | --buil | --bui | --bu)
+ ac_prev=build_alias ;;
+ -build=* | --build=* | --buil=* | --bui=* | --bu=*)
+ build_alias=$ac_optarg ;;
+
+ -cache-file | --cache-file | --cache-fil | --cache-fi \
+ | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c)
+ ac_prev=cache_file ;;
+ -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \
+ | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*)
+ cache_file=$ac_optarg ;;
+
+ --config-cache | -C)
+ cache_file=config.cache ;;
+
+ -datadir | --datadir | --datadi | --datad)
+ ac_prev=datadir ;;
+ -datadir=* | --datadir=* | --datadi=* | --datad=*)
+ datadir=$ac_optarg ;;
+
+ -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \
+ | --dataroo | --dataro | --datar)
+ ac_prev=datarootdir ;;
+ -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \
+ | --dataroot=* | --dataroo=* | --dataro=* | --datar=*)
+ datarootdir=$ac_optarg ;;
+
+ -disable-* | --disable-*)
+ ac_feature=`expr "x$ac_option" : 'x-*disable-\(.*\)'`
+ # Reject names that are not valid shell variable names.
+ expr "x$ac_feature" : ".*[^-._$as_cr_alnum]" >/dev/null &&
+ { echo "$as_me: error: invalid feature name: $ac_feature" >&2
+ { (exit 1); exit 1; }; }
+ ac_feature=`echo $ac_feature | sed 's/[-.]/_/g'`
+ eval enable_$ac_feature=no ;;
+
+ -docdir | --docdir | --docdi | --doc | --do)
+ ac_prev=docdir ;;
+ -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*)
+ docdir=$ac_optarg ;;
+
+ -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv)
+ ac_prev=dvidir ;;
+ -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*)
+ dvidir=$ac_optarg ;;
+
+ -enable-* | --enable-*)
+ ac_feature=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'`
+ # Reject names that are not valid shell variable names.
+ expr "x$ac_feature" : ".*[^-._$as_cr_alnum]" >/dev/null &&
+ { echo "$as_me: error: invalid feature name: $ac_feature" >&2
+ { (exit 1); exit 1; }; }
+ ac_feature=`echo $ac_feature | sed 's/[-.]/_/g'`
+ eval enable_$ac_feature=\$ac_optarg ;;
+
+ -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \
+ | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \
+ | --exec | --exe | --ex)
+ ac_prev=exec_prefix ;;
+ -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \
+ | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \
+ | --exec=* | --exe=* | --ex=*)
+ exec_prefix=$ac_optarg ;;
+
+ -gas | --gas | --ga | --g)
+ # Obsolete; use --with-gas.
+ with_gas=yes ;;
+
+ -help | --help | --hel | --he | -h)
+ ac_init_help=long ;;
+ -help=r* | --help=r* | --hel=r* | --he=r* | -hr*)
+ ac_init_help=recursive ;;
+ -help=s* | --help=s* | --hel=s* | --he=s* | -hs*)
+ ac_init_help=short ;;
+
+ -host | --host | --hos | --ho)
+ ac_prev=host_alias ;;
+ -host=* | --host=* | --hos=* | --ho=*)
+ host_alias=$ac_optarg ;;
+
+ -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht)
+ ac_prev=htmldir ;;
+ -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \
+ | --ht=*)
+ htmldir=$ac_optarg ;;
+
+ -includedir | --includedir | --includedi | --included | --include \
+ | --includ | --inclu | --incl | --inc)
+ ac_prev=includedir ;;
+ -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \
+ | --includ=* | --inclu=* | --incl=* | --inc=*)
+ includedir=$ac_optarg ;;
+
+ -infodir | --infodir | --infodi | --infod | --info | --inf)
+ ac_prev=infodir ;;
+ -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*)
+ infodir=$ac_optarg ;;
+
+ -libdir | --libdir | --libdi | --libd)
+ ac_prev=libdir ;;
+ -libdir=* | --libdir=* | --libdi=* | --libd=*)
+ libdir=$ac_optarg ;;
+
+ -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \
+ | --libexe | --libex | --libe)
+ ac_prev=libexecdir ;;
+ -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \
+ | --libexe=* | --libex=* | --libe=*)
+ libexecdir=$ac_optarg ;;
+
+ -localedir | --localedir | --localedi | --localed | --locale)
+ ac_prev=localedir ;;
+ -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*)
+ localedir=$ac_optarg ;;
+
+ -localstatedir | --localstatedir | --localstatedi | --localstated \
+ | --localstate | --localstat | --localsta | --localst | --locals)
+ ac_prev=localstatedir ;;
+ -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \
+ | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*)
+ localstatedir=$ac_optarg ;;
+
+ -mandir | --mandir | --mandi | --mand | --man | --ma | --m)
+ ac_prev=mandir ;;
+ -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*)
+ mandir=$ac_optarg ;;
+
+ -nfp | --nfp | --nf)
+ # Obsolete; use --without-fp.
+ with_fp=no ;;
+
+ -no-create | --no-create | --no-creat | --no-crea | --no-cre \
+ | --no-cr | --no-c | -n)
+ no_create=yes ;;
+
+ -no-recursion | --no-recursion | --no-recursio | --no-recursi \
+ | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r)
+ no_recursion=yes ;;
+
+ -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \
+ | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \
+ | --oldin | --oldi | --old | --ol | --o)
+ ac_prev=oldincludedir ;;
+ -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \
+ | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \
+ | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*)
+ oldincludedir=$ac_optarg ;;
+
+ -prefix | --prefix | --prefi | --pref | --pre | --pr | --p)
+ ac_prev=prefix ;;
+ -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*)
+ prefix=$ac_optarg ;;
+
+ -program-prefix | --program-prefix | --program-prefi | --program-pref \
+ | --program-pre | --program-pr | --program-p)
+ ac_prev=program_prefix ;;
+ -program-prefix=* | --program-prefix=* | --program-prefi=* \
+ | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*)
+ program_prefix=$ac_optarg ;;
+
+ -program-suffix | --program-suffix | --program-suffi | --program-suff \
+ | --program-suf | --program-su | --program-s)
+ ac_prev=program_suffix ;;
+ -program-suffix=* | --program-suffix=* | --program-suffi=* \
+ | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*)
+ program_suffix=$ac_optarg ;;
+
+ -program-transform-name | --program-transform-name \
+ | --program-transform-nam | --program-transform-na \
+ | --program-transform-n | --program-transform- \
+ | --program-transform | --program-transfor \
+ | --program-transfo | --program-transf \
+ | --program-trans | --program-tran \
+ | --progr-tra | --program-tr | --program-t)
+ ac_prev=program_transform_name ;;
+ -program-transform-name=* | --program-transform-name=* \
+ | --program-transform-nam=* | --program-transform-na=* \
+ | --program-transform-n=* | --program-transform-=* \
+ | --program-transform=* | --program-transfor=* \
+ | --program-transfo=* | --program-transf=* \
+ | --program-trans=* | --program-tran=* \
+ | --progr-tra=* | --program-tr=* | --program-t=*)
+ program_transform_name=$ac_optarg ;;
+
+ -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd)
+ ac_prev=pdfdir ;;
+ -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*)
+ pdfdir=$ac_optarg ;;
+
+ -psdir | --psdir | --psdi | --psd | --ps)
+ ac_prev=psdir ;;
+ -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*)
+ psdir=$ac_optarg ;;
+
+ -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+ | -silent | --silent | --silen | --sile | --sil)
+ silent=yes ;;
+
+ -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
+ ac_prev=sbindir ;;
+ -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
+ | --sbi=* | --sb=*)
+ sbindir=$ac_optarg ;;
+
+ -sharedstatedir | --sharedstatedir | --sharedstatedi \
+ | --sharedstated | --sharedstate | --sharedstat | --sharedsta \
+ | --sharedst | --shareds | --shared | --share | --shar \
+ | --sha | --sh)
+ ac_prev=sharedstatedir ;;
+ -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \
+ | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \
+ | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \
+ | --sha=* | --sh=*)
+ sharedstatedir=$ac_optarg ;;
+
+ -site | --site | --sit)
+ ac_prev=site ;;
+ -site=* | --site=* | --sit=*)
+ site=$ac_optarg ;;
+
+ -srcdir | --srcdir | --srcdi | --srcd | --src | --sr)
+ ac_prev=srcdir ;;
+ -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*)
+ srcdir=$ac_optarg ;;
+
+ -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \
+ | --syscon | --sysco | --sysc | --sys | --sy)
+ ac_prev=sysconfdir ;;
+ -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \
+ | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*)
+ sysconfdir=$ac_optarg ;;
+
+ -target | --target | --targe | --targ | --tar | --ta | --t)
+ ac_prev=target_alias ;;
+ -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*)
+ target_alias=$ac_optarg ;;
+
+ -v | -verbose | --verbose | --verbos | --verbo | --verb)
+ verbose=yes ;;
+
+ -version | --version | --versio | --versi | --vers | -V)
+ ac_init_version=: ;;
+
+ -with-* | --with-*)
+ ac_package=`expr "x$ac_option" : 'x-*with-\([^=]*\)'`
+ # Reject names that are not valid shell variable names.
+ expr "x$ac_package" : ".*[^-._$as_cr_alnum]" >/dev/null &&
+ { echo "$as_me: error: invalid package name: $ac_package" >&2
+ { (exit 1); exit 1; }; }
+ ac_package=`echo $ac_package | sed 's/[-.]/_/g'`
+ eval with_$ac_package=\$ac_optarg ;;
+
+ -without-* | --without-*)
+ ac_package=`expr "x$ac_option" : 'x-*without-\(.*\)'`
+ # Reject names that are not valid shell variable names.
+ expr "x$ac_package" : ".*[^-._$as_cr_alnum]" >/dev/null &&
+ { echo "$as_me: error: invalid package name: $ac_package" >&2
+ { (exit 1); exit 1; }; }
+ ac_package=`echo $ac_package | sed 's/[-.]/_/g'`
+ eval with_$ac_package=no ;;
+
+ --x)
+ # Obsolete; use --with-x.
+ with_x=yes ;;
+
+ -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \
+ | --x-incl | --x-inc | --x-in | --x-i)
+ ac_prev=x_includes ;;
+ -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \
+ | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*)
+ x_includes=$ac_optarg ;;
+
+ -x-libraries | --x-libraries | --x-librarie | --x-librari \
+ | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l)
+ ac_prev=x_libraries ;;
+ -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \
+ | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*)
+ x_libraries=$ac_optarg ;;
+
+ -*) { echo "$as_me: error: unrecognized option: $ac_option
+Try \`$0 --help' for more information." >&2
+ { (exit 1); exit 1; }; }
+ ;;
+
+ *=*)
+ ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='`
+ # Reject names that are not valid shell variable names.
+ expr "x$ac_envvar" : ".*[^_$as_cr_alnum]" >/dev/null &&
+ { echo "$as_me: error: invalid variable name: $ac_envvar" >&2
+ { (exit 1); exit 1; }; }
+ eval $ac_envvar=\$ac_optarg
+ export $ac_envvar ;;
+
+ *)
+ # FIXME: should be removed in autoconf 3.0.
+ echo "$as_me: WARNING: you should use --build, --host, --target" >&2
+ expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null &&
+ echo "$as_me: WARNING: invalid host type: $ac_option" >&2
+ : ${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}
+ ;;
+
+ esac
+done
+
+if test -n "$ac_prev"; then
+ ac_option=--`echo $ac_prev | sed 's/_/-/g'`
+ { echo "$as_me: error: missing argument to $ac_option" >&2
+ { (exit 1); exit 1; }; }
+fi
+
+# Be sure to have absolute directory names.
+for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \
+ datadir sysconfdir sharedstatedir localstatedir includedir \
+ oldincludedir docdir infodir htmldir dvidir pdfdir psdir \
+ libdir localedir mandir
+do
+ eval ac_val=\$$ac_var
+ case $ac_val in
+ [\\/$]* | ?:[\\/]* ) continue;;
+ NONE | '' ) case $ac_var in *prefix ) continue;; esac;;
+ esac
+ { echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2
+ { (exit 1); exit 1; }; }
+done
+
+# There might be people who depend on the old broken behavior: `$host'
+# used to hold the argument of --host etc.
+# FIXME: To remove some day.
+build=$build_alias
+host=$host_alias
+target=$target_alias
+
+# FIXME: To remove some day.
+if test "x$host_alias" != x; then
+ if test "x$build_alias" = x; then
+ cross_compiling=maybe
+ echo "$as_me: WARNING: If you wanted to set the --build type, don't use --host.
+ If a cross compiler is detected then cross compile mode will be used." >&2
+ elif test "x$build_alias" != "x$host_alias"; then
+ cross_compiling=yes
+ fi
+fi
+
+ac_tool_prefix=
+test -n "$host_alias" && ac_tool_prefix=$host_alias-
+
+test "$silent" = yes && exec 6>/dev/null
+
+
+ac_pwd=`pwd` && test -n "$ac_pwd" &&
+ac_ls_di=`ls -di .` &&
+ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` ||
+ { echo "$as_me: error: Working directory cannot be determined" >&2
+ { (exit 1); exit 1; }; }
+test "X$ac_ls_di" = "X$ac_pwd_ls_di" ||
+ { echo "$as_me: error: pwd does not report name of working directory" >&2
+ { (exit 1); exit 1; }; }
+
+
+# Find the source files, if location was not specified.
+if test -z "$srcdir"; then
+ ac_srcdir_defaulted=yes
+ # Try the directory containing this script, then the parent directory.
+ ac_confdir=`$as_dirname -- "$0" ||
+$as_expr X"$0" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+ X"$0" : 'X\(//\)[^/]' \| \
+ X"$0" : 'X\(//\)$' \| \
+ X"$0" : 'X\(/\)' \| . 2>/dev/null ||
+echo X"$0" |
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+ s//\1/
+ q
+ }
+ /^X\(\/\/\)[^/].*/{
+ s//\1/
+ q
+ }
+ /^X\(\/\/\)$/{
+ s//\1/
+ q
+ }
+ /^X\(\/\).*/{
+ s//\1/
+ q
+ }
+ s/.*/./; q'`
+ srcdir=$ac_confdir
+ if test ! -r "$srcdir/$ac_unique_file"; then
+ srcdir=..
+ fi
+else
+ ac_srcdir_defaulted=no
+fi
+if test ! -r "$srcdir/$ac_unique_file"; then
+ test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .."
+ { echo "$as_me: error: cannot find sources ($ac_unique_file) in $srcdir" >&2
+ { (exit 1); exit 1; }; }
+fi
+ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work"
+ac_abs_confdir=`(
+ cd "$srcdir" && test -r "./$ac_unique_file" || { echo "$as_me: error: $ac_msg" >&2
+ { (exit 1); exit 1; }; }
+ pwd)`
+# When building in place, set srcdir=.
+if test "$ac_abs_confdir" = "$ac_pwd"; then
+ srcdir=.
+fi
+# Remove unnecessary trailing slashes from srcdir.
+# Double slashes in file names in object file debugging info
+# mess up M-x gdb in Emacs.
+case $srcdir in
+*/) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;;
+esac
+for ac_var in $ac_precious_vars; do
+ eval ac_env_${ac_var}_set=\${${ac_var}+set}
+ eval ac_env_${ac_var}_value=\$${ac_var}
+ eval ac_cv_env_${ac_var}_set=\${${ac_var}+set}
+ eval ac_cv_env_${ac_var}_value=\$${ac_var}
+done
+
+#
+# Report the --help message.
+#
+if test "$ac_init_help" = "long"; then
+ # Omit some internal or obsolete options to make the list less imposing.
+ # This message is too long to be a string in the A/UX 3.1 sh.
+ cat <<_ACEOF
+\`configure' configures this package to adapt to many kinds of systems.
+
+Usage: $0 [OPTION]... [VAR=VALUE]...
+
+To assign environment variables (e.g., CC, CFLAGS...), specify them as
+VAR=VALUE. See below for descriptions of some of the useful variables.
+
+Defaults for the options are specified in brackets.
+
+Configuration:
+ -h, --help display this help and exit
+ --help=short display options specific to this package
+ --help=recursive display the short help of all the included packages
+ -V, --version display version information and exit
+ -q, --quiet, --silent do not print \`checking...' messages
+ --cache-file=FILE cache test results in FILE [disabled]
+ -C, --config-cache alias for \`--cache-file=config.cache'
+ -n, --no-create do not create output files
+ --srcdir=DIR find the sources in DIR [configure dir or \`..']
+
+Installation directories:
+ --prefix=PREFIX install architecture-independent files in PREFIX
+ [$ac_default_prefix]
+ --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX
+ [PREFIX]
+
+By default, \`make install' will install all the files in
+\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify
+an installation prefix other than \`$ac_default_prefix' using \`--prefix',
+for instance \`--prefix=\$HOME'.
+
+For better control, use the options below.
+
+Fine tuning of the installation directories:
+ --bindir=DIR user executables [EPREFIX/bin]
+ --sbindir=DIR system admin executables [EPREFIX/sbin]
+ --libexecdir=DIR program executables [EPREFIX/libexec]
+ --sysconfdir=DIR read-only single-machine data [PREFIX/etc]
+ --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com]
+ --localstatedir=DIR modifiable single-machine data [PREFIX/var]
+ --libdir=DIR object code libraries [EPREFIX/lib]
+ --includedir=DIR C header files [PREFIX/include]
+ --oldincludedir=DIR C header files for non-gcc [/usr/include]
+ --datarootdir=DIR read-only arch.-independent data root [PREFIX/share]
+ --datadir=DIR read-only architecture-independent data [DATAROOTDIR]
+ --infodir=DIR info documentation [DATAROOTDIR/info]
+ --localedir=DIR locale-dependent data [DATAROOTDIR/locale]
+ --mandir=DIR man documentation [DATAROOTDIR/man]
+ --docdir=DIR documentation root [DATAROOTDIR/doc/PACKAGE]
+ --htmldir=DIR html documentation [DOCDIR]
+ --dvidir=DIR dvi documentation [DOCDIR]
+ --pdfdir=DIR pdf documentation [DOCDIR]
+ --psdir=DIR ps documentation [DOCDIR]
+_ACEOF
+
+ cat <<\_ACEOF
+_ACEOF
+fi
+
+if test -n "$ac_init_help"; then
+
+ cat <<\_ACEOF
+
+Optional Features:
+ --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no)
+ --enable-FEATURE[=ARG] include FEATURE [ARG=yes]
+ --enable-etags enable the installation of links for etags
+ --disable-extended-format
+ disable extension flags; use original ctags file
+ format only
+ --disable-external-sort use internal sort algorithm instead of sort program
+ --enable-custom-config=FILE
+ enable custom config file for site-wide defaults
+ --enable-macro-patterns use patterns as default method to locate macros
+ instead of line numbers
+ --enable-maintainer-mode
+ use maintainer makefile
+ --enable-shell-globbing=DIR
+ does shell expand wildcards (yes|no)? yes
+ --enable-tmpdir=DIR default directory for temporary files ARG=/tmp
+
+Optional Packages:
+ --with-PACKAGE[=ARG] use PACKAGE [ARG=yes]
+ --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no)
+ --with-posix-regex use Posix regex interface, if available
+ --with-readlib include readtags library object during install
+
+Some influential environment variables:
+ CC C compiler command
+ CFLAGS C compiler flags
+ LDFLAGS linker flags, e.g. -L<lib dir> if you have libraries in a
+ nonstandard directory <lib dir>
+ LIBS libraries to pass to the linker, e.g. -l<library>
+ CPPFLAGS C/C++/Objective C preprocessor flags, e.g. -I<include dir> if
+ you have headers in a nonstandard directory <include dir>
+ CPP C preprocessor
+
+Use these variables to override the choices made by `configure' or to help
+it to find libraries and programs with nonstandard names/locations.
+
+_ACEOF
+ac_status=$?
+fi
+
+if test "$ac_init_help" = "recursive"; then
+ # If there are subdirs, report their specific --help.
+ for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue
+ test -d "$ac_dir" || continue
+ ac_builddir=.
+
+case "$ac_dir" in
+.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;;
+*)
+ ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'`
+ # A ".." for each directory in $ac_dir_suffix.
+ ac_top_builddir_sub=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,/..,g;s,/,,'`
+ case $ac_top_builddir_sub in
+ "") ac_top_builddir_sub=. ac_top_build_prefix= ;;
+ *) ac_top_build_prefix=$ac_top_builddir_sub/ ;;
+ esac ;;
+esac
+ac_abs_top_builddir=$ac_pwd
+ac_abs_builddir=$ac_pwd$ac_dir_suffix
+# for backward compatibility:
+ac_top_builddir=$ac_top_build_prefix
+
+case $srcdir in
+ .) # We are building in place.
+ ac_srcdir=.
+ ac_top_srcdir=$ac_top_builddir_sub
+ ac_abs_top_srcdir=$ac_pwd ;;
+ [\\/]* | ?:[\\/]* ) # Absolute name.
+ ac_srcdir=$srcdir$ac_dir_suffix;
+ ac_top_srcdir=$srcdir
+ ac_abs_top_srcdir=$srcdir ;;
+ *) # Relative name.
+ ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix
+ ac_top_srcdir=$ac_top_build_prefix$srcdir
+ ac_abs_top_srcdir=$ac_pwd/$srcdir ;;
+esac
+ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix
+
+ cd "$ac_dir" || { ac_status=$?; continue; }
+ # Check for guested configure.
+ if test -f "$ac_srcdir/configure.gnu"; then
+ echo &&
+ $SHELL "$ac_srcdir/configure.gnu" --help=recursive
+ elif test -f "$ac_srcdir/configure"; then
+ echo &&
+ $SHELL "$ac_srcdir/configure" --help=recursive
+ else
+ echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2
+ fi || ac_status=$?
+ cd "$ac_pwd" || { ac_status=$?; break; }
+ done
+fi
+
+test -n "$ac_init_help" && exit $ac_status
+if $ac_init_version; then
+ cat <<\_ACEOF
+configure
+generated by GNU Autoconf 2.61
+
+Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
+2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
+This configure script is free software; the Free Software Foundation
+gives unlimited permission to copy, distribute and modify it.
+_ACEOF
+ exit
+fi
+cat >config.log <<_ACEOF
+This file contains any messages produced by compilers while
+running configure, to aid debugging if configure makes a mistake.
+
+It was created by $as_me, which was
+generated by GNU Autoconf 2.61. Invocation command line was
+
+ $ $0 $@
+
+_ACEOF
+exec 5>>config.log
+{
+cat <<_ASUNAME
+## --------- ##
+## Platform. ##
+## --------- ##
+
+hostname = `(hostname || uname -n) 2>/dev/null | sed 1q`
+uname -m = `(uname -m) 2>/dev/null || echo unknown`
+uname -r = `(uname -r) 2>/dev/null || echo unknown`
+uname -s = `(uname -s) 2>/dev/null || echo unknown`
+uname -v = `(uname -v) 2>/dev/null || echo unknown`
+
+/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown`
+/bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown`
+
+/bin/arch = `(/bin/arch) 2>/dev/null || echo unknown`
+/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown`
+/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown`
+/usr/bin/hostinfo = `(/usr/bin/hostinfo) 2>/dev/null || echo unknown`
+/bin/machine = `(/bin/machine) 2>/dev/null || echo unknown`
+/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown`
+/bin/universe = `(/bin/universe) 2>/dev/null || echo unknown`
+
+_ASUNAME
+
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ echo "PATH: $as_dir"
+done
+IFS=$as_save_IFS
+
+} >&5
+
+cat >&5 <<_ACEOF
+
+
+## ----------- ##
+## Core tests. ##
+## ----------- ##
+
+_ACEOF
+
+
+# Keep a trace of the command line.
+# Strip out --no-create and --no-recursion so they do not pile up.
+# Strip out --silent because we don't want to record it for future runs.
+# Also quote any args containing shell meta-characters.
+# Make two passes to allow for proper duplicate-argument suppression.
+ac_configure_args=
+ac_configure_args0=
+ac_configure_args1=
+ac_must_keep_next=false
+for ac_pass in 1 2
+do
+ for ac_arg
+ do
+ case $ac_arg in
+ -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;;
+ -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+ | -silent | --silent | --silen | --sile | --sil)
+ continue ;;
+ *\'*)
+ ac_arg=`echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;;
+ esac
+ case $ac_pass in
+ 1) ac_configure_args0="$ac_configure_args0 '$ac_arg'" ;;
+ 2)
+ ac_configure_args1="$ac_configure_args1 '$ac_arg'"
+ if test $ac_must_keep_next = true; then
+ ac_must_keep_next=false # Got value, back to normal.
+ else
+ case $ac_arg in
+ *=* | --config-cache | -C | -disable-* | --disable-* \
+ | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \
+ | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \
+ | -with-* | --with-* | -without-* | --without-* | --x)
+ case "$ac_configure_args0 " in
+ "$ac_configure_args1"*" '$ac_arg' "* ) continue ;;
+ esac
+ ;;
+ -* ) ac_must_keep_next=true ;;
+ esac
+ fi
+ ac_configure_args="$ac_configure_args '$ac_arg'"
+ ;;
+ esac
+ done
+done
+$as_unset ac_configure_args0 || test "${ac_configure_args0+set}" != set || { ac_configure_args0=; export ac_configure_args0; }
+$as_unset ac_configure_args1 || test "${ac_configure_args1+set}" != set || { ac_configure_args1=; export ac_configure_args1; }
+
+# When interrupted or exit'd, cleanup temporary files, and complete
+# config.log. We remove comments because anyway the quotes in there
+# would cause problems or look ugly.
+# WARNING: Use '\'' to represent an apostrophe within the trap.
+# WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug.
+trap 'exit_status=$?
+ # Save into config.log some information that might help in debugging.
+ {
+ echo
+
+ cat <<\_ASBOX
+## ---------------- ##
+## Cache variables. ##
+## ---------------- ##
+_ASBOX
+ echo
+ # The following way of writing the cache mishandles newlines in values,
+(
+ for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do
+ eval ac_val=\$$ac_var
+ case $ac_val in #(
+ *${as_nl}*)
+ case $ac_var in #(
+ *_cv_*) { echo "$as_me:$LINENO: WARNING: Cache variable $ac_var contains a newline." >&5
+echo "$as_me: WARNING: Cache variable $ac_var contains a newline." >&2;} ;;
+ esac
+ case $ac_var in #(
+ _ | IFS | as_nl) ;; #(
+ *) $as_unset $ac_var ;;
+ esac ;;
+ esac
+ done
+ (set) 2>&1 |
+ case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #(
+ *${as_nl}ac_space=\ *)
+ sed -n \
+ "s/'\''/'\''\\\\'\'''\''/g;
+ s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p"
+ ;; #(
+ *)
+ sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p"
+ ;;
+ esac |
+ sort
+)
+ echo
+
+ cat <<\_ASBOX
+## ----------------- ##
+## Output variables. ##
+## ----------------- ##
+_ASBOX
+ echo
+ for ac_var in $ac_subst_vars
+ do
+ eval ac_val=\$$ac_var
+ case $ac_val in
+ *\'\''*) ac_val=`echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;;
+ esac
+ echo "$ac_var='\''$ac_val'\''"
+ done | sort
+ echo
+
+ if test -n "$ac_subst_files"; then
+ cat <<\_ASBOX
+## ------------------- ##
+## File substitutions. ##
+## ------------------- ##
+_ASBOX
+ echo
+ for ac_var in $ac_subst_files
+ do
+ eval ac_val=\$$ac_var
+ case $ac_val in
+ *\'\''*) ac_val=`echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;;
+ esac
+ echo "$ac_var='\''$ac_val'\''"
+ done | sort
+ echo
+ fi
+
+ if test -s confdefs.h; then
+ cat <<\_ASBOX
+## ----------- ##
+## confdefs.h. ##
+## ----------- ##
+_ASBOX
+ echo
+ cat confdefs.h
+ echo
+ fi
+ test "$ac_signal" != 0 &&
+ echo "$as_me: caught signal $ac_signal"
+ echo "$as_me: exit $exit_status"
+ } >&5
+ rm -f core *.core core.conftest.* &&
+ rm -f -r conftest* confdefs* conf$$* $ac_clean_files &&
+ exit $exit_status
+' 0
+for ac_signal in 1 2 13 15; do
+ trap 'ac_signal='$ac_signal'; { (exit 1); exit 1; }' $ac_signal
+done
+ac_signal=0
+
+# confdefs.h avoids OS command line length limits that DEFS can exceed.
+rm -f -r conftest* confdefs.h
+
+# Predefined preprocessor variables.
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_NAME "$PACKAGE_NAME"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_TARNAME "$PACKAGE_TARNAME"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_VERSION "$PACKAGE_VERSION"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_STRING "$PACKAGE_STRING"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT"
+_ACEOF
+
+
+# Let the site file select an alternate cache file if it wants to.
+# Prefer explicitly selected file to automatically selected ones.
+if test -n "$CONFIG_SITE"; then
+ set x "$CONFIG_SITE"
+elif test "x$prefix" != xNONE; then
+ set x "$prefix/share/config.site" "$prefix/etc/config.site"
+else
+ set x "$ac_default_prefix/share/config.site" \
+ "$ac_default_prefix/etc/config.site"
+fi
+shift
+for ac_site_file
+do
+ if test -r "$ac_site_file"; then
+ { echo "$as_me:$LINENO: loading site script $ac_site_file" >&5
+echo "$as_me: loading site script $ac_site_file" >&6;}
+ sed 's/^/| /' "$ac_site_file" >&5
+ . "$ac_site_file"
+ fi
+done
+
+if test -r "$cache_file"; then
+ # Some versions of bash will fail to source /dev/null (special
+ # files actually), so we avoid doing that.
+ if test -f "$cache_file"; then
+ { echo "$as_me:$LINENO: loading cache $cache_file" >&5
+echo "$as_me: loading cache $cache_file" >&6;}
+ case $cache_file in
+ [\\/]* | ?:[\\/]* ) . "$cache_file";;
+ *) . "./$cache_file";;
+ esac
+ fi
+else
+ { echo "$as_me:$LINENO: creating cache $cache_file" >&5
+echo "$as_me: creating cache $cache_file" >&6;}
+ >$cache_file
+fi
+
+ac_header_list="$ac_header_list dirent.h"
+ac_header_list="$ac_header_list fcntl.h"
+ac_header_list="$ac_header_list fnmatch.h"
+ac_header_list="$ac_header_list stat.h"
+ac_header_list="$ac_header_list stdlib.h"
+ac_header_list="$ac_header_list string.h"
+ac_header_list="$ac_header_list time.h"
+ac_header_list="$ac_header_list types.h"
+ac_header_list="$ac_header_list unistd.h"
+ac_header_list="$ac_header_list sys/dir.h"
+ac_header_list="$ac_header_list sys/stat.h"
+ac_header_list="$ac_header_list sys/times.h"
+ac_header_list="$ac_header_list sys/types.h"
+# Check that the precious variables saved in the cache have kept the same
+# value.
+ac_cache_corrupted=false
+for ac_var in $ac_precious_vars; do
+ eval ac_old_set=\$ac_cv_env_${ac_var}_set
+ eval ac_new_set=\$ac_env_${ac_var}_set
+ eval ac_old_val=\$ac_cv_env_${ac_var}_value
+ eval ac_new_val=\$ac_env_${ac_var}_value
+ case $ac_old_set,$ac_new_set in
+ set,)
+ { echo "$as_me:$LINENO: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5
+echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;}
+ ac_cache_corrupted=: ;;
+ ,set)
+ { echo "$as_me:$LINENO: error: \`$ac_var' was not set in the previous run" >&5
+echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;}
+ ac_cache_corrupted=: ;;
+ ,);;
+ *)
+ if test "x$ac_old_val" != "x$ac_new_val"; then
+ { echo "$as_me:$LINENO: error: \`$ac_var' has changed since the previous run:" >&5
+echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;}
+ { echo "$as_me:$LINENO: former value: $ac_old_val" >&5
+echo "$as_me: former value: $ac_old_val" >&2;}
+ { echo "$as_me:$LINENO: current value: $ac_new_val" >&5
+echo "$as_me: current value: $ac_new_val" >&2;}
+ ac_cache_corrupted=:
+ fi;;
+ esac
+ # Pass precious variables to config.status.
+ if test "$ac_new_set" = set; then
+ case $ac_new_val in
+ *\'*) ac_arg=$ac_var=`echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;;
+ *) ac_arg=$ac_var=$ac_new_val ;;
+ esac
+ case " $ac_configure_args " in
+ *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy.
+ *) ac_configure_args="$ac_configure_args '$ac_arg'" ;;
+ esac
+ fi
+done
+if $ac_cache_corrupted; then
+ { echo "$as_me:$LINENO: error: changes in the environment can compromise the build" >&5
+echo "$as_me: error: changes in the environment can compromise the build" >&2;}
+ { { echo "$as_me:$LINENO: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&5
+echo "$as_me: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&2;}
+ { (exit 1); exit 1; }; }
+fi
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+ac_config_headers="$ac_config_headers config.h"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+# Report system info
+# ------------------
+program_name=`grep 'PROGRAM_NAME *"' ctags.h | sed -e 's/.*"\([^"]*\)".*/\1/'`
+program_version=`grep 'PROGRAM_VERSION *"' ctags.h | sed -e 's/.*"\([^"]*\)".*/\1/'`
+echo "$program_name, version $program_version"
+uname -mrsv 2>/dev/null
+
+# Define convenience macros
+# -------------------------
+# CHECK_HEADER_DEFINE(LABEL, HEADER [,ACTION-IF-FOUND [, ACTION-IF-NOT-FOUND ] ])
+
+
+# Checks for configuration options
+# --------------------------------
+
+
+# Check whether --with-posix-regex was given.
+if test "${with_posix_regex+set}" = set; then
+ withval=$with_posix_regex;
+fi
+
+
+
+# Check whether --with-readlib was given.
+if test "${with_readlib+set}" = set; then
+ withval=$with_readlib;
+fi
+
+
+# AC_ARG_WITH(perl-regex,
+# [ --with-perl-regex use Perl pcre interface, if available])
+
+# Check whether --enable-etags was given.
+if test "${enable_etags+set}" = set; then
+ enableval=$enable_etags;
+fi
+
+
+# Check whether --enable-extended-format was given.
+if test "${enable_extended_format+set}" = set; then
+ enableval=$enable_extended_format; cat >>confdefs.h <<\_ACEOF
+#define DEFAULT_FILE_FORMAT 1
+_ACEOF
+
+else
+ cat >>confdefs.h <<\_ACEOF
+#define DEFAULT_FILE_FORMAT 2
+_ACEOF
+
+fi
+
+
+# Check whether --enable-external-sort was given.
+if test "${enable_external_sort+set}" = set; then
+ enableval=$enable_external_sort;
+fi
+
+
+# Check whether --enable-custom-config was given.
+if test "${enable_custom_config+set}" = set; then
+ enableval=$enable_custom_config;
+fi
+
+
+# Check whether --enable-macro-patterns was given.
+if test "${enable_macro_patterns+set}" = set; then
+ enableval=$enable_macro_patterns;
+fi
+
+
+# Check whether --enable-maintainer-mode was given.
+if test "${enable_maintainer_mode+set}" = set; then
+ enableval=$enable_maintainer_mode;
+fi
+
+
+# Check whether --enable-shell-globbing was given.
+if test "${enable_shell_globbing+set}" = set; then
+ enableval=$enable_shell_globbing;
+fi
+
+
+# Check whether --enable-tmpdir was given.
+if test "${enable_tmpdir+set}" = set; then
+ enableval=$enable_tmpdir; tmpdir_specified=yes
+fi
+
+
+
+# Process configuration options
+# -----------------------------
+
+if test "$enable_maintainer_mode" = yes ; then
+ { echo "$as_me:$LINENO: result: enabling maintainer mode" >&5
+echo "${ECHO_T}enabling maintainer mode" >&6; }
+fi
+
+install_targets="install-ctags"
+{ echo "$as_me:$LINENO: checking whether to install link to etags" >&5
+echo $ECHO_N "checking whether to install link to etags... $ECHO_C" >&6; }
+if test yes = "$enable_etags"; then
+ { echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6; }
+ install_targets="$install_targets install-etags"
+else
+ { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+{ echo "$as_me:$LINENO: checking whether to install readtags object file" >&5
+echo $ECHO_N "checking whether to install readtags object file... $ECHO_C" >&6; }
+if test yes = "$with_readlib"; then
+ { echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6; }
+ install_targets="$install_targets install-lib"
+else
+ { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+if test "$enable_custom_config" = no -o "$enable_custom_config" = yes ; then
+ { echo "$as_me:$LINENO: result: no name supplied for custom configuration file" >&5
+echo "${ECHO_T}no name supplied for custom configuration file" >&6; }
+elif test -n "$enable_custom_config" ; then
+ cat >>confdefs.h <<_ACEOF
+#define CUSTOM_CONFIGURATION_FILE "$enable_custom_config"
+_ACEOF
+
+ { echo "$as_me:$LINENO: result: $enable_custom_config will be used as custom configuration file" >&5
+echo "${ECHO_T}$enable_custom_config will be used as custom configuration file" >&6; }
+fi
+
+if test "$enable_macro_patterns" = yes ; then
+ cat >>confdefs.h <<\_ACEOF
+#define MACROS_USE_PATTERNS 1
+_ACEOF
+
+ { echo "$as_me:$LINENO: result: tag file will use patterns for macros by default" >&5
+echo "${ECHO_T}tag file will use patterns for macros by default" >&6; }
+fi
+
+# Checks for programs
+# -------------------
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+if test -n "$ac_tool_prefix"; then
+ # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args.
+set dummy ${ac_tool_prefix}gcc; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_prog_CC+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test -n "$CC"; then
+ ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ ac_cv_prog_CC="${ac_tool_prefix}gcc"
+ echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+ { echo "$as_me:$LINENO: result: $CC" >&5
+echo "${ECHO_T}$CC" >&6; }
+else
+ { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_CC"; then
+ ac_ct_CC=$CC
+ # Extract the first word of "gcc", so it can be a program name with args.
+set dummy gcc; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_prog_ac_ct_CC+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test -n "$ac_ct_CC"; then
+ ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ ac_cv_prog_ac_ct_CC="gcc"
+ echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_CC=$ac_cv_prog_ac_ct_CC
+if test -n "$ac_ct_CC"; then
+ { echo "$as_me:$LINENO: result: $ac_ct_CC" >&5
+echo "${ECHO_T}$ac_ct_CC" >&6; }
+else
+ { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+ if test "x$ac_ct_CC" = x; then
+ CC=""
+ else
+ case $cross_compiling:$ac_tool_warned in
+yes:)
+{ echo "$as_me:$LINENO: WARNING: In the future, Autoconf will not detect cross-tools
+whose name does not start with the host triplet. If you think this
+configuration is useful to you, please write to autoconf@gnu.org." >&5
+echo "$as_me: WARNING: In the future, Autoconf will not detect cross-tools
+whose name does not start with the host triplet. If you think this
+configuration is useful to you, please write to autoconf@gnu.org." >&2;}
+ac_tool_warned=yes ;;
+esac
+ CC=$ac_ct_CC
+ fi
+else
+ CC="$ac_cv_prog_CC"
+fi
+
+if test -z "$CC"; then
+ if test -n "$ac_tool_prefix"; then
+ # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args.
+set dummy ${ac_tool_prefix}cc; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_prog_CC+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test -n "$CC"; then
+ ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ ac_cv_prog_CC="${ac_tool_prefix}cc"
+ echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+ { echo "$as_me:$LINENO: result: $CC" >&5
+echo "${ECHO_T}$CC" >&6; }
+else
+ { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+ fi
+fi
+if test -z "$CC"; then
+ # Extract the first word of "cc", so it can be a program name with args.
+set dummy cc; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_prog_CC+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test -n "$CC"; then
+ ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+ ac_prog_rejected=no
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then
+ ac_prog_rejected=yes
+ continue
+ fi
+ ac_cv_prog_CC="cc"
+ echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+IFS=$as_save_IFS
+
+if test $ac_prog_rejected = yes; then
+ # We found a bogon in the path, so make sure we never use it.
+ set dummy $ac_cv_prog_CC
+ shift
+ if test $# != 0; then
+ # We chose a different compiler from the bogus one.
+ # However, it has the same basename, so the bogon will be chosen
+ # first if we set CC to just the basename; use the full file name.
+ shift
+ ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@"
+ fi
+fi
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+ { echo "$as_me:$LINENO: result: $CC" >&5
+echo "${ECHO_T}$CC" >&6; }
+else
+ { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+fi
+if test -z "$CC"; then
+ if test -n "$ac_tool_prefix"; then
+ for ac_prog in cl.exe
+ do
+ # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
+set dummy $ac_tool_prefix$ac_prog; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_prog_CC+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test -n "$CC"; then
+ ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ ac_cv_prog_CC="$ac_tool_prefix$ac_prog"
+ echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+ { echo "$as_me:$LINENO: result: $CC" >&5
+echo "${ECHO_T}$CC" >&6; }
+else
+ { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+ test -n "$CC" && break
+ done
+fi
+if test -z "$CC"; then
+ ac_ct_CC=$CC
+ for ac_prog in cl.exe
+do
+ # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_prog_ac_ct_CC+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test -n "$ac_ct_CC"; then
+ ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ ac_cv_prog_ac_ct_CC="$ac_prog"
+ echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_CC=$ac_cv_prog_ac_ct_CC
+if test -n "$ac_ct_CC"; then
+ { echo "$as_me:$LINENO: result: $ac_ct_CC" >&5
+echo "${ECHO_T}$ac_ct_CC" >&6; }
+else
+ { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+ test -n "$ac_ct_CC" && break
+done
+
+ if test "x$ac_ct_CC" = x; then
+ CC=""
+ else
+ case $cross_compiling:$ac_tool_warned in
+yes:)
+{ echo "$as_me:$LINENO: WARNING: In the future, Autoconf will not detect cross-tools
+whose name does not start with the host triplet. If you think this
+configuration is useful to you, please write to autoconf@gnu.org." >&5
+echo "$as_me: WARNING: In the future, Autoconf will not detect cross-tools
+whose name does not start with the host triplet. If you think this
+configuration is useful to you, please write to autoconf@gnu.org." >&2;}
+ac_tool_warned=yes ;;
+esac
+ CC=$ac_ct_CC
+ fi
+fi
+
+fi
+
+
+test -z "$CC" && { { echo "$as_me:$LINENO: error: no acceptable C compiler found in \$PATH
+See \`config.log' for more details." >&5
+echo "$as_me: error: no acceptable C compiler found in \$PATH
+See \`config.log' for more details." >&2;}
+ { (exit 1); exit 1; }; }
+
+# Provide some information about the compiler.
+echo "$as_me:$LINENO: checking for C compiler version" >&5
+ac_compiler=`set X $ac_compile; echo $2`
+{ (ac_try="$ac_compiler --version >&5"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_compiler --version >&5") 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }
+{ (ac_try="$ac_compiler -v >&5"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_compiler -v >&5") 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }
+{ (ac_try="$ac_compiler -V >&5"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_compiler -V >&5") 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }
+
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+ac_clean_files_save=$ac_clean_files
+ac_clean_files="$ac_clean_files a.out a.exe b.out"
+# Try to create an executable without -o first, disregard a.out.
+# It will help us diagnose broken compilers, and finding out an intuition
+# of exeext.
+{ echo "$as_me:$LINENO: checking for C compiler default output file name" >&5
+echo $ECHO_N "checking for C compiler default output file name... $ECHO_C" >&6; }
+ac_link_default=`echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'`
+#
+# List of possible output files, starting from the most likely.
+# The algorithm is not robust to junk in `.', hence go to wildcards (a.*)
+# only as a last resort. b.out is created by i960 compilers.
+ac_files='a_out.exe a.exe conftest.exe a.out conftest a.* conftest.* b.out'
+#
+# The IRIX 6 linker writes into existing files which may not be
+# executable, retaining their permissions. Remove them first so a
+# subsequent execution test works.
+ac_rmfiles=
+for ac_file in $ac_files
+do
+ case $ac_file in
+ *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.o | *.obj ) ;;
+ * ) ac_rmfiles="$ac_rmfiles $ac_file";;
+ esac
+done
+rm -f $ac_rmfiles
+
+if { (ac_try="$ac_link_default"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_link_default") 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; then
+ # Autoconf-2.13 could set the ac_cv_exeext variable to `no'.
+# So ignore a value of `no', otherwise this would lead to `EXEEXT = no'
+# in a Makefile. We should not override ac_cv_exeext if it was cached,
+# so that the user can short-circuit this test for compilers unknown to
+# Autoconf.
+for ac_file in $ac_files ''
+do
+ test -f "$ac_file" || continue
+ case $ac_file in
+ *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.o | *.obj )
+ ;;
+ [ab].out )
+ # We found the default executable, but exeext='' is most
+ # certainly right.
+ break;;
+ *.* )
+ if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no;
+ then :; else
+ ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'`
+ fi
+ # We set ac_cv_exeext here because the later test for it is not
+ # safe: cross compilers may not add the suffix if given an `-o'
+ # argument, so we may need to know it at that point already.
+ # Even if this section looks crufty: it has the advantage of
+ # actually working.
+ break;;
+ * )
+ break;;
+ esac
+done
+test "$ac_cv_exeext" = no && ac_cv_exeext=
+
+else
+ ac_file=''
+fi
+
+{ echo "$as_me:$LINENO: result: $ac_file" >&5
+echo "${ECHO_T}$ac_file" >&6; }
+if test -z "$ac_file"; then
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+{ { echo "$as_me:$LINENO: error: C compiler cannot create executables
+See \`config.log' for more details." >&5
+echo "$as_me: error: C compiler cannot create executables
+See \`config.log' for more details." >&2;}
+ { (exit 77); exit 77; }; }
+fi
+
+ac_exeext=$ac_cv_exeext
+
+# Check that the compiler produces executables we can run. If not, either
+# the compiler is broken, or we cross compile.
+{ echo "$as_me:$LINENO: checking whether the C compiler works" >&5
+echo $ECHO_N "checking whether the C compiler works... $ECHO_C" >&6; }
+# FIXME: These cross compiler hacks should be removed for Autoconf 3.0
+# If not cross compiling, check that we can run a simple program.
+if test "$cross_compiling" != yes; then
+ if { ac_try='./$ac_file'
+ { (case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_try") 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ cross_compiling=no
+ else
+ if test "$cross_compiling" = maybe; then
+ cross_compiling=yes
+ else
+ { { echo "$as_me:$LINENO: error: cannot run C compiled programs.
+If you meant to cross compile, use \`--host'.
+See \`config.log' for more details." >&5
+echo "$as_me: error: cannot run C compiled programs.
+If you meant to cross compile, use \`--host'.
+See \`config.log' for more details." >&2;}
+ { (exit 1); exit 1; }; }
+ fi
+ fi
+fi
+{ echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6; }
+
+rm -f a.out a.exe conftest$ac_cv_exeext b.out
+ac_clean_files=$ac_clean_files_save
+# Check that the compiler produces executables we can run. If not, either
+# the compiler is broken, or we cross compile.
+{ echo "$as_me:$LINENO: checking whether we are cross compiling" >&5
+echo $ECHO_N "checking whether we are cross compiling... $ECHO_C" >&6; }
+{ echo "$as_me:$LINENO: result: $cross_compiling" >&5
+echo "${ECHO_T}$cross_compiling" >&6; }
+
+{ echo "$as_me:$LINENO: checking for suffix of executables" >&5
+echo $ECHO_N "checking for suffix of executables... $ECHO_C" >&6; }
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_link") 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; then
+ # If both `conftest.exe' and `conftest' are `present' (well, observable)
+# catch `conftest.exe'. For instance with Cygwin, `ls conftest' will
+# work properly (i.e., refer to `conftest.exe'), while it won't with
+# `rm'.
+for ac_file in conftest.exe conftest conftest.*; do
+ test -f "$ac_file" || continue
+ case $ac_file in
+ *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.o | *.obj ) ;;
+ *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'`
+ break;;
+ * ) break;;
+ esac
+done
+else
+ { { echo "$as_me:$LINENO: error: cannot compute suffix of executables: cannot compile and link
+See \`config.log' for more details." >&5
+echo "$as_me: error: cannot compute suffix of executables: cannot compile and link
+See \`config.log' for more details." >&2;}
+ { (exit 1); exit 1; }; }
+fi
+
+rm -f conftest$ac_cv_exeext
+{ echo "$as_me:$LINENO: result: $ac_cv_exeext" >&5
+echo "${ECHO_T}$ac_cv_exeext" >&6; }
+
+rm -f conftest.$ac_ext
+EXEEXT=$ac_cv_exeext
+ac_exeext=$EXEEXT
+{ echo "$as_me:$LINENO: checking for suffix of object files" >&5
+echo $ECHO_N "checking for suffix of object files... $ECHO_C" >&6; }
+if test "${ac_cv_objext+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.o conftest.obj
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_compile") 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; then
+ for ac_file in conftest.o conftest.obj conftest.*; do
+ test -f "$ac_file" || continue;
+ case $ac_file in
+ *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf ) ;;
+ *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'`
+ break;;
+ esac
+done
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+{ { echo "$as_me:$LINENO: error: cannot compute suffix of object files: cannot compile
+See \`config.log' for more details." >&5
+echo "$as_me: error: cannot compute suffix of object files: cannot compile
+See \`config.log' for more details." >&2;}
+ { (exit 1); exit 1; }; }
+fi
+
+rm -f conftest.$ac_cv_objext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_objext" >&5
+echo "${ECHO_T}$ac_cv_objext" >&6; }
+OBJEXT=$ac_cv_objext
+ac_objext=$OBJEXT
+{ echo "$as_me:$LINENO: checking whether we are using the GNU C compiler" >&5
+echo $ECHO_N "checking whether we are using the GNU C compiler... $ECHO_C" >&6; }
+if test "${ac_cv_c_compiler_gnu+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+
+int
+main ()
+{
+#ifndef __GNUC__
+ choke me
+#endif
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
+ ac_compiler_gnu=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_compiler_gnu=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ac_cv_c_compiler_gnu=$ac_compiler_gnu
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_c_compiler_gnu" >&5
+echo "${ECHO_T}$ac_cv_c_compiler_gnu" >&6; }
+GCC=`test $ac_compiler_gnu = yes && echo yes`
+ac_test_CFLAGS=${CFLAGS+set}
+ac_save_CFLAGS=$CFLAGS
+{ echo "$as_me:$LINENO: checking whether $CC accepts -g" >&5
+echo $ECHO_N "checking whether $CC accepts -g... $ECHO_C" >&6; }
+if test "${ac_cv_prog_cc_g+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ ac_save_c_werror_flag=$ac_c_werror_flag
+ ac_c_werror_flag=yes
+ ac_cv_prog_cc_g=no
+ CFLAGS="-g"
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
+ ac_cv_prog_cc_g=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ CFLAGS=""
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
+ :
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_c_werror_flag=$ac_save_c_werror_flag
+ CFLAGS="-g"
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
+ ac_cv_prog_cc_g=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ ac_c_werror_flag=$ac_save_c_werror_flag
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_prog_cc_g" >&5
+echo "${ECHO_T}$ac_cv_prog_cc_g" >&6; }
+if test "$ac_test_CFLAGS" = set; then
+ CFLAGS=$ac_save_CFLAGS
+elif test $ac_cv_prog_cc_g = yes; then
+ if test "$GCC" = yes; then
+ CFLAGS="-g -O2"
+ else
+ CFLAGS="-g"
+ fi
+else
+ if test "$GCC" = yes; then
+ CFLAGS="-O2"
+ else
+ CFLAGS=
+ fi
+fi
+{ echo "$as_me:$LINENO: checking for $CC option to accept ISO C89" >&5
+echo $ECHO_N "checking for $CC option to accept ISO C89... $ECHO_C" >&6; }
+if test "${ac_cv_prog_cc_c89+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ ac_cv_prog_cc_c89=no
+ac_save_CC=$CC
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <stdarg.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */
+struct buf { int x; };
+FILE * (*rcsopen) (struct buf *, struct stat *, int);
+static char *e (p, i)
+ char **p;
+ int i;
+{
+ return p[i];
+}
+static char *f (char * (*g) (char **, int), char **p, ...)
+{
+ char *s;
+ va_list v;
+ va_start (v,p);
+ s = g (p, va_arg (v,int));
+ va_end (v);
+ return s;
+}
+
+/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has
+ function prototypes and stuff, but not '\xHH' hex character constants.
+ These don't provoke an error unfortunately, instead are silently treated
+ as 'x'. The following induces an error, until -std is added to get
+ proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an
+ array size at least. It's necessary to write '\x00'==0 to get something
+ that's true only with -std. */
+int osf4_cc_array ['\x00' == 0 ? 1 : -1];
+
+/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters
+ inside strings and character constants. */
+#define FOO(x) 'x'
+int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1];
+
+int test (int i, double x);
+struct s1 {int (*f) (int a);};
+struct s2 {int (*f) (double a);};
+int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int);
+int argc;
+char **argv;
+int
+main ()
+{
+return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1];
+ ;
+ return 0;
+}
+_ACEOF
+for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \
+ -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__"
+do
+ CC="$ac_save_CC $ac_arg"
+ rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
+ ac_cv_prog_cc_c89=$ac_arg
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext
+ test "x$ac_cv_prog_cc_c89" != "xno" && break
+done
+rm -f conftest.$ac_ext
+CC=$ac_save_CC
+
+fi
+# AC_CACHE_VAL
+case "x$ac_cv_prog_cc_c89" in
+ x)
+ { echo "$as_me:$LINENO: result: none needed" >&5
+echo "${ECHO_T}none needed" >&6; } ;;
+ xno)
+ { echo "$as_me:$LINENO: result: unsupported" >&5
+echo "${ECHO_T}unsupported" >&6; } ;;
+ *)
+ CC="$CC $ac_cv_prog_cc_c89"
+ { echo "$as_me:$LINENO: result: $ac_cv_prog_cc_c89" >&5
+echo "${ECHO_T}$ac_cv_prog_cc_c89" >&6; } ;;
+esac
+
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+case `uname` in
+ HP-UX)
+ { echo "$as_me:$LINENO: checking HP-UX native compiler" >&5
+echo $ECHO_N "checking HP-UX native compiler... $ECHO_C" >&6; }
+ if test "$CC" = "cc"; then
+ { echo "$as_me:$LINENO: result: yes; adding compiler options for ANSI support" >&5
+echo "${ECHO_T}yes; adding compiler options for ANSI support" >&6; }
+ CFLAGS="$CFLAGS -Aa -D_HPUX_SOURCE"
+ else
+ { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+ fi
+ ;;
+ SunOS)
+ if uname -r | grep '5\.' >/dev/null 2>&1; then
+ { echo "$as_me:$LINENO: checking Solaris native compiler" >&5
+echo $ECHO_N "checking Solaris native compiler... $ECHO_C" >&6; }
+ if test "$CC" = "cc" -a "`which cc`" = "/usr/ucb/cc"; then
+ { echo "$as_me:$LINENO: result: yes; adding compiler option for ANSI support" >&5
+echo "${ECHO_T}yes; adding compiler option for ANSI support" >&6; }
+ CC="$CC -Xa"
+ else
+ { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+ fi
+ fi
+ ;;
+esac
+
+{ echo "$as_me:$LINENO: checking whether ln -s works" >&5
+echo $ECHO_N "checking whether ln -s works... $ECHO_C" >&6; }
+LN_S=$as_ln_s
+if test "$LN_S" = "ln -s"; then
+ { echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6; }
+else
+ { echo "$as_me:$LINENO: result: no, using $LN_S" >&5
+echo "${ECHO_T}no, using $LN_S" >&6; }
+fi
+
+# Extract the first word of "strip", so it can be a program name with args.
+set dummy strip; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_prog_STRIP+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test -n "$STRIP"; then
+ ac_cv_prog_STRIP="$STRIP" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ ac_cv_prog_STRIP="strip"
+ echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+IFS=$as_save_IFS
+
+ test -z "$ac_cv_prog_STRIP" && ac_cv_prog_STRIP=":"
+fi
+fi
+STRIP=$ac_cv_prog_STRIP
+if test -n "$STRIP"; then
+ { echo "$as_me:$LINENO: result: $STRIP" >&5
+echo "${ECHO_T}$STRIP" >&6; }
+else
+ { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+
+
+# Checks for operating environment
+# --------------------------------
+
+# Check for temporary directory
+{ echo "$as_me:$LINENO: checking directory to use for temporary files" >&5
+echo $ECHO_N "checking directory to use for temporary files... $ECHO_C" >&6; }
+if test -n "$enable_tmpdir"; then
+ tmpdir="$enable_tmpdir"
+elif test -n "$TMPDIR"; then
+ tmpdir="$TMPDIR"
+elif test -n "$TMPDIR"; then
+ tmpdir="$TMPDIR"
+elif test -n "$TMP"; then
+ tmpdir="$TMP"
+elif test -n "$TEMP"; then
+ tmpdir="$TEMP"
+elif test -d "c:/"; then
+ tmpdir="c:/"
+else
+ tmpdir="/tmp"
+fi
+if test -d $tmpdir ; then
+ { echo "$as_me:$LINENO: result: $tmpdir" >&5
+echo "${ECHO_T}$tmpdir" >&6; }
+ cat >>confdefs.h <<_ACEOF
+#define TMPDIR "$tmpdir"
+_ACEOF
+
+else
+ { { echo "$as_me:$LINENO: error: $tmpdir does not exist" >&5
+echo "$as_me: error: $tmpdir does not exist" >&2;}
+ { (exit 1); exit 1; }; }
+fi
+
+# Check whether system supports #! scripts
+{ echo "$as_me:$LINENO: checking whether #! works in shell scripts" >&5
+echo $ECHO_N "checking whether #! works in shell scripts... $ECHO_C" >&6; }
+if test "${ac_cv_sys_interpreter+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ echo '#! /bin/cat
+exit 69
+' >conftest
+chmod u+x conftest
+(SHELL=/bin/sh; export SHELL; ./conftest >/dev/null 2>&1)
+if test $? -ne 69; then
+ ac_cv_sys_interpreter=yes
+else
+ ac_cv_sys_interpreter=no
+fi
+rm -f conftest
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_sys_interpreter" >&5
+echo "${ECHO_T}$ac_cv_sys_interpreter" >&6; }
+interpval=$ac_cv_sys_interpreter
+
+if test yes = "$interpval"; then
+ cat >>confdefs.h <<\_ACEOF
+#define SYS_INTERPRETER 1
+_ACEOF
+
+fi
+
+# Test for case-insensitive filenames
+{ echo "$as_me:$LINENO: checking for case-insensitive filenames" >&5
+echo $ECHO_N "checking for case-insensitive filenames... $ECHO_C" >&6; }
+touch conftest.cif
+if test -f CONFTEST.CIF; then
+ { echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6; }
+ cat >>confdefs.h <<\_ACEOF
+#define CASE_INSENSITIVE_FILENAMES 1
+_ACEOF
+
+else
+ { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+rm -f conftest.cif
+
+{ echo "$as_me:$LINENO: checking selected sort method" >&5
+echo $ECHO_N "checking selected sort method... $ECHO_C" >&6; }
+if test no = "$enable_external_sort"; then
+ { echo "$as_me:$LINENO: result: simple internal algorithm" >&5
+echo "${ECHO_T}simple internal algorithm" >&6; }
+else
+ { echo "$as_me:$LINENO: result: external sort utility" >&5
+echo "${ECHO_T}external sort utility" >&6; }
+ enable_external_sort=no
+ # Extract the first word of "sort", so it can be a program name with args.
+set dummy sort; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_prog_sort_found+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test -n "$sort_found"; then
+ ac_cv_prog_sort_found="$sort_found" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ ac_cv_prog_sort_found="yes"
+ echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+done
+IFS=$as_save_IFS
+
+ test -z "$ac_cv_prog_sort_found" && ac_cv_prog_sort_found="no"
+fi
+fi
+sort_found=$ac_cv_prog_sort_found
+if test -n "$sort_found"; then
+ { echo "$as_me:$LINENO: result: $sort_found" >&5
+echo "${ECHO_T}$sort_found" >&6; }
+else
+ { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+ if test "$sort_found" = yes ; then
+ { echo "$as_me:$LINENO: checking if sort accepts our command line" >&5
+echo $ECHO_N "checking if sort accepts our command line... $ECHO_C" >&6; }
+ touch ${tmpdir}/sort.test
+ sort -u -f -o ${tmpdir}/sort.test ${tmpdir}/sort.test 1>/dev/null 2>&1
+ if test $? -ne 0 ; then
+ { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+ else
+ { echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6; }
+ cat >>confdefs.h <<\_ACEOF
+#define EXTERNAL_SORT 1
+_ACEOF
+
+ enable_external_sort=yes
+ fi
+ rm -f ${tmpdir}/sort.test
+ fi
+fi
+if test "$enable_external_sort" != yes ; then
+ { echo "$as_me:$LINENO: result: using internal sort algorithm as fallback" >&5
+echo "${ECHO_T}using internal sort algorithm as fallback" >&6; }
+fi
+
+
+# Checks for header files
+# -----------------------
+
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+{ echo "$as_me:$LINENO: checking how to run the C preprocessor" >&5
+echo $ECHO_N "checking how to run the C preprocessor... $ECHO_C" >&6; }
+# On Suns, sometimes $CPP names a directory.
+if test -n "$CPP" && test -d "$CPP"; then
+ CPP=
+fi
+if test -z "$CPP"; then
+ if test "${ac_cv_prog_CPP+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ # Double quotes because CPP needs to be expanded
+ for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp"
+ do
+ ac_preproc_ok=false
+for ac_c_preproc_warn_flag in '' yes
+do
+ # Use a header file that comes with gcc, so configuring glibc
+ # with a fresh cross-compiler works.
+ # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+ # <limits.h> exists even on freestanding compilers.
+ # On the NeXT, cc -E runs the code through the compiler's parser,
+ # not just through cpp. "Syntax error" is here to catch this case.
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+ Syntax error
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } >/dev/null && {
+ test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ }; then
+ :
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ # Broken: fails on valid input.
+continue
+fi
+
+rm -f conftest.err conftest.$ac_ext
+
+ # OK, works on sane cases. Now check whether nonexistent headers
+ # can be detected and how.
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <ac_nonexistent.h>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } >/dev/null && {
+ test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ }; then
+ # Broken: success on invalid input.
+continue
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ # Passes both tests.
+ac_preproc_ok=:
+break
+fi
+
+rm -f conftest.err conftest.$ac_ext
+
+done
+# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
+rm -f conftest.err conftest.$ac_ext
+if $ac_preproc_ok; then
+ break
+fi
+
+ done
+ ac_cv_prog_CPP=$CPP
+
+fi
+ CPP=$ac_cv_prog_CPP
+else
+ ac_cv_prog_CPP=$CPP
+fi
+{ echo "$as_me:$LINENO: result: $CPP" >&5
+echo "${ECHO_T}$CPP" >&6; }
+ac_preproc_ok=false
+for ac_c_preproc_warn_flag in '' yes
+do
+ # Use a header file that comes with gcc, so configuring glibc
+ # with a fresh cross-compiler works.
+ # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+ # <limits.h> exists even on freestanding compilers.
+ # On the NeXT, cc -E runs the code through the compiler's parser,
+ # not just through cpp. "Syntax error" is here to catch this case.
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+ Syntax error
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } >/dev/null && {
+ test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ }; then
+ :
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ # Broken: fails on valid input.
+continue
+fi
+
+rm -f conftest.err conftest.$ac_ext
+
+ # OK, works on sane cases. Now check whether nonexistent headers
+ # can be detected and how.
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <ac_nonexistent.h>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } >/dev/null && {
+ test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ }; then
+ # Broken: success on invalid input.
+continue
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ # Passes both tests.
+ac_preproc_ok=:
+break
+fi
+
+rm -f conftest.err conftest.$ac_ext
+
+done
+# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
+rm -f conftest.err conftest.$ac_ext
+if $ac_preproc_ok; then
+ :
+else
+ { { echo "$as_me:$LINENO: error: C preprocessor \"$CPP\" fails sanity check
+See \`config.log' for more details." >&5
+echo "$as_me: error: C preprocessor \"$CPP\" fails sanity check
+See \`config.log' for more details." >&2;}
+ { (exit 1); exit 1; }; }
+fi
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+{ echo "$as_me:$LINENO: checking for grep that handles long lines and -e" >&5
+echo $ECHO_N "checking for grep that handles long lines and -e... $ECHO_C" >&6; }
+if test "${ac_cv_path_GREP+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ # Extract the first word of "grep ggrep" to use in msg output
+if test -z "$GREP"; then
+set dummy grep ggrep; ac_prog_name=$2
+if test "${ac_cv_path_GREP+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ ac_path_GREP_found=false
+# Loop through the user's path and test for each of PROGNAME-LIST
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_prog in grep ggrep; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext"
+ { test -f "$ac_path_GREP" && $as_test_x "$ac_path_GREP"; } || continue
+ # Check for GNU ac_path_GREP and select it if it is found.
+ # Check for GNU $ac_path_GREP
+case `"$ac_path_GREP" --version 2>&1` in
+*GNU*)
+ ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;;
+*)
+ ac_count=0
+ echo $ECHO_N "0123456789$ECHO_C" >"conftest.in"
+ while :
+ do
+ cat "conftest.in" "conftest.in" >"conftest.tmp"
+ mv "conftest.tmp" "conftest.in"
+ cp "conftest.in" "conftest.nl"
+ echo 'GREP' >> "conftest.nl"
+ "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break
+ diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+ ac_count=`expr $ac_count + 1`
+ if test $ac_count -gt ${ac_path_GREP_max-0}; then
+ # Best one so far, save it but keep looking for a better one
+ ac_cv_path_GREP="$ac_path_GREP"
+ ac_path_GREP_max=$ac_count
+ fi
+ # 10*(2^10) chars as input seems more than enough
+ test $ac_count -gt 10 && break
+ done
+ rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+
+ $ac_path_GREP_found && break 3
+ done
+done
+
+done
+IFS=$as_save_IFS
+
+
+fi
+
+GREP="$ac_cv_path_GREP"
+if test -z "$GREP"; then
+ { { echo "$as_me:$LINENO: error: no acceptable $ac_prog_name could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&5
+echo "$as_me: error: no acceptable $ac_prog_name could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&2;}
+ { (exit 1); exit 1; }; }
+fi
+
+else
+ ac_cv_path_GREP=$GREP
+fi
+
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_path_GREP" >&5
+echo "${ECHO_T}$ac_cv_path_GREP" >&6; }
+ GREP="$ac_cv_path_GREP"
+
+
+{ echo "$as_me:$LINENO: checking for egrep" >&5
+echo $ECHO_N "checking for egrep... $ECHO_C" >&6; }
+if test "${ac_cv_path_EGREP+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if echo a | $GREP -E '(a|b)' >/dev/null 2>&1
+ then ac_cv_path_EGREP="$GREP -E"
+ else
+ # Extract the first word of "egrep" to use in msg output
+if test -z "$EGREP"; then
+set dummy egrep; ac_prog_name=$2
+if test "${ac_cv_path_EGREP+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ ac_path_EGREP_found=false
+# Loop through the user's path and test for each of PROGNAME-LIST
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_prog in egrep; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext"
+ { test -f "$ac_path_EGREP" && $as_test_x "$ac_path_EGREP"; } || continue
+ # Check for GNU ac_path_EGREP and select it if it is found.
+ # Check for GNU $ac_path_EGREP
+case `"$ac_path_EGREP" --version 2>&1` in
+*GNU*)
+ ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;;
+*)
+ ac_count=0
+ echo $ECHO_N "0123456789$ECHO_C" >"conftest.in"
+ while :
+ do
+ cat "conftest.in" "conftest.in" >"conftest.tmp"
+ mv "conftest.tmp" "conftest.in"
+ cp "conftest.in" "conftest.nl"
+ echo 'EGREP' >> "conftest.nl"
+ "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break
+ diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+ ac_count=`expr $ac_count + 1`
+ if test $ac_count -gt ${ac_path_EGREP_max-0}; then
+ # Best one so far, save it but keep looking for a better one
+ ac_cv_path_EGREP="$ac_path_EGREP"
+ ac_path_EGREP_max=$ac_count
+ fi
+ # 10*(2^10) chars as input seems more than enough
+ test $ac_count -gt 10 && break
+ done
+ rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+
+ $ac_path_EGREP_found && break 3
+ done
+done
+
+done
+IFS=$as_save_IFS
+
+
+fi
+
+EGREP="$ac_cv_path_EGREP"
+if test -z "$EGREP"; then
+ { { echo "$as_me:$LINENO: error: no acceptable $ac_prog_name could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&5
+echo "$as_me: error: no acceptable $ac_prog_name could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&2;}
+ { (exit 1); exit 1; }; }
+fi
+
+else
+ ac_cv_path_EGREP=$EGREP
+fi
+
+
+ fi
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_path_EGREP" >&5
+echo "${ECHO_T}$ac_cv_path_EGREP" >&6; }
+ EGREP="$ac_cv_path_EGREP"
+
+
+{ echo "$as_me:$LINENO: checking for ANSI C header files" >&5
+echo $ECHO_N "checking for ANSI C header files... $ECHO_C" >&6; }
+if test "${ac_cv_header_stdc+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <float.h>
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
+ ac_cv_header_stdc=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_cv_header_stdc=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+if test $ac_cv_header_stdc = yes; then
+ # SunOS 4.x string.h does not declare mem*, contrary to ANSI.
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <string.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ $EGREP "memchr" >/dev/null 2>&1; then
+ :
+else
+ ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+ # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI.
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <stdlib.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ $EGREP "free" >/dev/null 2>&1; then
+ :
+else
+ ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+ # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi.
+ if test "$cross_compiling" = yes; then
+ :
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <ctype.h>
+#include <stdlib.h>
+#if ((' ' & 0x0FF) == 0x020)
+# define ISLOWER(c) ('a' <= (c) && (c) <= 'z')
+# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c))
+#else
+# define ISLOWER(c) \
+ (('a' <= (c) && (c) <= 'i') \
+ || ('j' <= (c) && (c) <= 'r') \
+ || ('s' <= (c) && (c) <= 'z'))
+# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c))
+#endif
+
+#define XOR(e, f) (((e) && !(f)) || (!(e) && (f)))
+int
+main ()
+{
+ int i;
+ for (i = 0; i < 256; i++)
+ if (XOR (islower (i), ISLOWER (i))
+ || toupper (i) != TOUPPER (i))
+ return 2;
+ return 0;
+}
+_ACEOF
+rm -f conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_link") 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && { ac_try='./conftest$ac_exeext'
+ { (case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_try") 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ :
+else
+ echo "$as_me: program exited with status $ac_status" >&5
+echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+( exit $ac_status )
+ac_cv_header_stdc=no
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
+fi
+
+
+fi
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_header_stdc" >&5
+echo "${ECHO_T}$ac_cv_header_stdc" >&6; }
+if test $ac_cv_header_stdc = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define STDC_HEADERS 1
+_ACEOF
+
+fi
+
+# On IRIX 5.3, sys/types and inttypes.h are conflicting.
+
+
+
+
+
+
+
+
+
+for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \
+ inttypes.h stdint.h unistd.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+$ac_includes_default
+
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
+ eval "$as_ac_Header=yes"
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ eval "$as_ac_Header=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+ { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+ cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
+
+
+
+
+for ac_header in $ac_header_list
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+ { echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+ { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+else
+ # Is the header compilable?
+{ echo "$as_me:$LINENO: checking $ac_header usability" >&5
+echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+$ac_includes_default
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
+ ac_header_compiler=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_header_compiler=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ echo "$as_me:$LINENO: checking $ac_header presence" >&5
+echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <$ac_header>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } >/dev/null && {
+ test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ }; then
+ ac_header_preproc=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_header_preproc=no
+fi
+
+rm -f conftest.err conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6; }
+
+# So? What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+ yes:no: )
+ { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;}
+ { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;}
+ ac_header_preproc=yes
+ ;;
+ no:yes:* )
+ { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5
+echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;}
+ { echo "$as_me:$LINENO: WARNING: $ac_header: check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: $ac_header: check for missing prerequisite headers?" >&2;}
+ { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5
+echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;}
+ { echo "$as_me:$LINENO: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&5
+echo "$as_me: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&2;}
+ { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
+ { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
+echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
+
+ ;;
+esac
+{ echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ eval "$as_ac_Header=\$ac_header_preproc"
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+ { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+
+fi
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+ cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+# Checks for header file macros
+# -----------------------------
+
+
+ { echo "$as_me:$LINENO: checking if L_tmpnam is defined in stdio.h" >&5
+echo $ECHO_N "checking if L_tmpnam is defined in stdio.h... $ECHO_C" >&6; }
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <stdio.h>
+#ifdef L_tmpnam
+ yes
+#endif
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ $EGREP "yes" >/dev/null 2>&1; then
+
+ { echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6; }
+
+
+else
+
+ { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+ cat >>confdefs.h <<\_ACEOF
+#define L_tmpnam 20
+_ACEOF
+
+
+fi
+rm -f conftest*
+
+
+
+ { echo "$as_me:$LINENO: checking if INT_MAX is defined in limits.h" >&5
+echo $ECHO_N "checking if INT_MAX is defined in limits.h... $ECHO_C" >&6; }
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <limits.h>
+#ifdef INT_MAX
+ yes
+#endif
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ $EGREP "yes" >/dev/null 2>&1; then
+
+ { echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6; }
+
+
+else
+
+ { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+
+ { echo "$as_me:$LINENO: checking if MAXINT is defined in limits.h" >&5
+echo $ECHO_N "checking if MAXINT is defined in limits.h... $ECHO_C" >&6; }
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <limits.h>
+#ifdef MAXINT
+ yes
+#endif
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ $EGREP "yes" >/dev/null 2>&1; then
+
+ { echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6; }
+ cat >>confdefs.h <<\_ACEOF
+#define INT_MAX MAXINT
+_ACEOF
+
+
+else
+
+ { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+ cat >>confdefs.h <<\_ACEOF
+#define INT_MAX 32767
+_ACEOF
+
+
+fi
+rm -f conftest*
+
+
+fi
+rm -f conftest*
+
+
+
+# Checks for typedefs
+# -------------------
+
+{ echo "$as_me:$LINENO: checking for size_t" >&5
+echo $ECHO_N "checking for size_t... $ECHO_C" >&6; }
+if test "${ac_cv_type_size_t+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+$ac_includes_default
+typedef size_t ac__type_new_;
+int
+main ()
+{
+if ((ac__type_new_ *) 0)
+ return 0;
+if (sizeof (ac__type_new_))
+ return 0;
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
+ ac_cv_type_size_t=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_cv_type_size_t=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_type_size_t" >&5
+echo "${ECHO_T}$ac_cv_type_size_t" >&6; }
+if test $ac_cv_type_size_t = yes; then
+ :
+else
+
+cat >>confdefs.h <<_ACEOF
+#define size_t unsigned int
+_ACEOF
+
+fi
+
+{ echo "$as_me:$LINENO: checking for off_t" >&5
+echo $ECHO_N "checking for off_t... $ECHO_C" >&6; }
+if test "${ac_cv_type_off_t+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+$ac_includes_default
+typedef off_t ac__type_new_;
+int
+main ()
+{
+if ((ac__type_new_ *) 0)
+ return 0;
+if (sizeof (ac__type_new_))
+ return 0;
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
+ ac_cv_type_off_t=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_cv_type_off_t=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_type_off_t" >&5
+echo "${ECHO_T}$ac_cv_type_off_t" >&6; }
+if test $ac_cv_type_off_t = yes; then
+ :
+else
+
+cat >>confdefs.h <<_ACEOF
+#define off_t long int
+_ACEOF
+
+fi
+
+
+{ echo "$as_me:$LINENO: checking for fpos_t" >&5
+echo $ECHO_N "checking for fpos_t... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <stdio.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ $EGREP "fpos_t" >/dev/null 2>&1; then
+ { echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6; }
+else
+
+ { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+ cat >>confdefs.h <<\_ACEOF
+#define fpos_t long
+_ACEOF
+
+
+fi
+rm -f conftest*
+
+
+{ echo "$as_me:$LINENO: checking for clock_t" >&5
+echo $ECHO_N "checking for clock_t... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <time.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ $EGREP "clock_t" >/dev/null 2>&1; then
+ { echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6; }
+else
+
+ { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+ cat >>confdefs.h <<\_ACEOF
+#define clock_t long
+_ACEOF
+
+
+fi
+rm -f conftest*
+
+
+
+# Checks for compiler characteristics
+# -----------------------------------
+
+# AC_CYGWIN
+# AC_MINGW32
+{ echo "$as_me:$LINENO: checking for an ANSI C-conforming const" >&5
+echo $ECHO_N "checking for an ANSI C-conforming const... $ECHO_C" >&6; }
+if test "${ac_cv_c_const+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+
+int
+main ()
+{
+/* FIXME: Include the comments suggested by Paul. */
+#ifndef __cplusplus
+ /* Ultrix mips cc rejects this. */
+ typedef int charset[2];
+ const charset cs;
+ /* SunOS 4.1.1 cc rejects this. */
+ char const *const *pcpcc;
+ char **ppc;
+ /* NEC SVR4.0.2 mips cc rejects this. */
+ struct point {int x, y;};
+ static struct point const zero = {0,0};
+ /* AIX XL C 1.02.0.0 rejects this.
+ It does not let you subtract one const X* pointer from another in
+ an arm of an if-expression whose if-part is not a constant
+ expression */
+ const char *g = "string";
+ pcpcc = &g + (g ? g-g : 0);
+ /* HPUX 7.0 cc rejects these. */
+ ++pcpcc;
+ ppc = (char**) pcpcc;
+ pcpcc = (char const *const *) ppc;
+ { /* SCO 3.2v4 cc rejects this. */
+ char *t;
+ char const *s = 0 ? (char *) 0 : (char const *) 0;
+
+ *t++ = 0;
+ if (s) return 0;
+ }
+ { /* Someone thinks the Sun supposedly-ANSI compiler will reject this. */
+ int x[] = {25, 17};
+ const int *foo = &x[0];
+ ++foo;
+ }
+ { /* Sun SC1.0 ANSI compiler rejects this -- but not the above. */
+ typedef const int *iptr;
+ iptr p = 0;
+ ++p;
+ }
+ { /* AIX XL C 1.02.0.0 rejects this saying
+ "k.c", line 2.27: 1506-025 (S) Operand must be a modifiable lvalue. */
+ struct s { int j; const int *ap[3]; };
+ struct s *b; b->j = 5;
+ }
+ { /* ULTRIX-32 V3.1 (Rev 9) vcc rejects this */
+ const int foo = 10;
+ if (!foo) return 0;
+ }
+ return !cs[0] && !zero.x;
+#endif
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
+ ac_cv_c_const=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_cv_c_const=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_c_const" >&5
+echo "${ECHO_T}$ac_cv_c_const" >&6; }
+if test $ac_cv_c_const = no; then
+
+cat >>confdefs.h <<\_ACEOF
+#define const
+_ACEOF
+
+fi
+
+
+
+
+{ echo "$as_me:$LINENO: checking if struct stat contains st_ino" >&5
+echo $ECHO_N "checking if struct stat contains st_ino... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <sys/stat.h>
+int
+main ()
+{
+
+ struct stat st;
+ stat(".", &st);
+ if (st.st_ino > 0)
+ exit(0);
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
+ have_st_ino=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ have_st_ino=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $have_st_ino" >&5
+echo "${ECHO_T}$have_st_ino" >&6; }
+if test yes = "$have_st_ino"; then
+ cat >>confdefs.h <<\_ACEOF
+#define HAVE_STAT_ST_INO 1
+_ACEOF
+
+fi
+
+
+# Checks for library functions
+# ----------------------------
+
+
+for ac_func in fnmatch
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+ For example, HP-UX 11i <limits.h> declares gettimeofday. */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+ which can conflict with char $ac_func (); below.
+ Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+ <limits.h> exists even on freestanding compilers. */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+ to always fail with ENOSYS. Some functions are actually named
+ something starting with __ and the normal name is an alias. */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_link") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest$ac_exeext &&
+ $as_test_x conftest$ac_exeext; then
+ eval "$as_ac_var=yes"
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+ { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+ cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
+
+for ac_func in strstr
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+ For example, HP-UX 11i <limits.h> declares gettimeofday. */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+ which can conflict with char $ac_func (); below.
+ Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+ <limits.h> exists even on freestanding compilers. */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+ to always fail with ENOSYS. Some functions are actually named
+ something starting with __ and the normal name is an alias. */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_link") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest$ac_exeext &&
+ $as_test_x conftest$ac_exeext; then
+ eval "$as_ac_var=yes"
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+ { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+ cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
+
+
+for ac_func in strcasecmp stricmp
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+ For example, HP-UX 11i <limits.h> declares gettimeofday. */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+ which can conflict with char $ac_func (); below.
+ Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+ <limits.h> exists even on freestanding compilers. */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+ to always fail with ENOSYS. Some functions are actually named
+ something starting with __ and the normal name is an alias. */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_link") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest$ac_exeext &&
+ $as_test_x conftest$ac_exeext; then
+ eval "$as_ac_var=yes"
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+ { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+ cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+ break
+fi
+done
+
+
+
+for ac_func in strncasecmp strnicmp
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+ For example, HP-UX 11i <limits.h> declares gettimeofday. */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+ which can conflict with char $ac_func (); below.
+ Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+ <limits.h> exists even on freestanding compilers. */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+ to always fail with ENOSYS. Some functions are actually named
+ something starting with __ and the normal name is an alias. */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_link") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest$ac_exeext &&
+ $as_test_x conftest$ac_exeext; then
+ eval "$as_ac_var=yes"
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+ { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+ cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+ break
+fi
+done
+
+
+for ac_func in fgetpos
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+ For example, HP-UX 11i <limits.h> declares gettimeofday. */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+ which can conflict with char $ac_func (); below.
+ Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+ <limits.h> exists even on freestanding compilers. */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+ to always fail with ENOSYS. Some functions are actually named
+ something starting with __ and the normal name is an alias. */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_link") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest$ac_exeext &&
+ $as_test_x conftest$ac_exeext; then
+ eval "$as_ac_var=yes"
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+ { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+ cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+ have_fgetpos=yes
+fi
+done
+
+
+# SEEK_SET should be in stdio.h, but may be in unistd.h on SunOS 4.1.x
+if test "$have_fgetpos" != yes ; then
+
+ { echo "$as_me:$LINENO: checking if SEEK_SET is defined in stdio.h" >&5
+echo $ECHO_N "checking if SEEK_SET is defined in stdio.h... $ECHO_C" >&6; }
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <stdio.h>
+#ifdef SEEK_SET
+ yes
+#endif
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ $EGREP "yes" >/dev/null 2>&1; then
+
+ { echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6; }
+
+
+else
+
+ { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+
+ { echo "$as_me:$LINENO: checking if SEEK_SET is defined in unistd.h" >&5
+echo $ECHO_N "checking if SEEK_SET is defined in unistd.h... $ECHO_C" >&6; }
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <unistd.h>
+#ifdef SEEK_SET
+ yes
+#endif
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ $EGREP "yes" >/dev/null 2>&1; then
+
+ { echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6; }
+
+
+else
+
+ { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+ cat >>confdefs.h <<\_ACEOF
+#define SEEK_SET 0
+_ACEOF
+
+
+fi
+rm -f conftest*
+
+
+fi
+rm -f conftest*
+
+fi
+
+
+for ac_func in mkstemp
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+ For example, HP-UX 11i <limits.h> declares gettimeofday. */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+ which can conflict with char $ac_func (); below.
+ Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+ <limits.h> exists even on freestanding compilers. */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+ to always fail with ENOSYS. Some functions are actually named
+ something starting with __ and the normal name is an alias. */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_link") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest$ac_exeext &&
+ $as_test_x conftest$ac_exeext; then
+ eval "$as_ac_var=yes"
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+ { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+ cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+ have_mkstemp=yes
+fi
+done
+
+if test "$have_mkstemp" != yes ; then
+
+for ac_func in tempnam
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+ For example, HP-UX 11i <limits.h> declares gettimeofday. */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+ which can conflict with char $ac_func (); below.
+ Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+ <limits.h> exists even on freestanding compilers. */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+ to always fail with ENOSYS. Some functions are actually named
+ something starting with __ and the normal name is an alias. */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_link") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest$ac_exeext &&
+ $as_test_x conftest$ac_exeext; then
+ eval "$as_ac_var=yes"
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+ { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+ cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+ have_tempnam=yes
+fi
+done
+
+fi
+if test "$have_mkstemp" != yes -a "$have_tempnam" != yes; then
+
+for ac_func in chmod
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+ For example, HP-UX 11i <limits.h> declares gettimeofday. */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+ which can conflict with char $ac_func (); below.
+ Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+ <limits.h> exists even on freestanding compilers. */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+ to always fail with ENOSYS. Some functions are actually named
+ something starting with __ and the normal name is an alias. */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_link") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest$ac_exeext &&
+ $as_test_x conftest$ac_exeext; then
+ eval "$as_ac_var=yes"
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+ { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+ cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
+ if test "$tmpdir_specified" = yes ; then
+ { echo "$as_me:$LINENO: result: use of tmpnam overrides temporary directory selection" >&5
+echo "${ECHO_T}use of tmpnam overrides temporary directory selection" >&6; }
+ fi
+fi
+
+
+
+
+for ac_func in opendir findfirst _findfirst
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+ For example, HP-UX 11i <limits.h> declares gettimeofday. */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+ which can conflict with char $ac_func (); below.
+ Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+ <limits.h> exists even on freestanding compilers. */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+ to always fail with ENOSYS. Some functions are actually named
+ something starting with __ and the normal name is an alias. */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_link") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest$ac_exeext &&
+ $as_test_x conftest$ac_exeext; then
+ eval "$as_ac_var=yes"
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+ { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+ cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+ break
+fi
+done
+
+
+for ac_func in strerror
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+ For example, HP-UX 11i <limits.h> declares gettimeofday. */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+ which can conflict with char $ac_func (); below.
+ Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+ <limits.h> exists even on freestanding compilers. */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+ to always fail with ENOSYS. Some functions are actually named
+ something starting with __ and the normal name is an alias. */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_link") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest$ac_exeext &&
+ $as_test_x conftest$ac_exeext; then
+ eval "$as_ac_var=yes"
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+ { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+ cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
+
+
+for ac_func in clock times
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+ For example, HP-UX 11i <limits.h> declares gettimeofday. */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+ which can conflict with char $ac_func (); below.
+ Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+ <limits.h> exists even on freestanding compilers. */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+ to always fail with ENOSYS. Some functions are actually named
+ something starting with __ and the normal name is an alias. */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_link") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest$ac_exeext &&
+ $as_test_x conftest$ac_exeext; then
+ eval "$as_ac_var=yes"
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+ { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+ cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+ break
+fi
+done
+
+
+for ac_func in remove
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+ For example, HP-UX 11i <limits.h> declares gettimeofday. */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+ which can conflict with char $ac_func (); below.
+ Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+ <limits.h> exists even on freestanding compilers. */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+ to always fail with ENOSYS. Some functions are actually named
+ something starting with __ and the normal name is an alias. */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_link") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest$ac_exeext &&
+ $as_test_x conftest$ac_exeext; then
+ eval "$as_ac_var=yes"
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+ { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+ cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+ have_remove=yes
+else
+
+ { echo "$as_me:$LINENO: checking if remove is defined in unistd.h" >&5
+echo $ECHO_N "checking if remove is defined in unistd.h... $ECHO_C" >&6; }
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <unistd.h>
+#ifdef remove
+ yes
+#endif
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ $EGREP "yes" >/dev/null 2>&1; then
+
+ { echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6; }
+
+
+else
+
+ { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+ cat >>confdefs.h <<\_ACEOF
+#define remove unlink
+_ACEOF
+
+
+fi
+rm -f conftest*
+
+fi
+done
+
+
+
+for ac_func in truncate
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+ For example, HP-UX 11i <limits.h> declares gettimeofday. */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+ which can conflict with char $ac_func (); below.
+ Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+ <limits.h> exists even on freestanding compilers. */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+ to always fail with ENOSYS. Some functions are actually named
+ something starting with __ and the normal name is an alias. */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_link") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest$ac_exeext &&
+ $as_test_x conftest$ac_exeext; then
+ eval "$as_ac_var=yes"
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+ { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+ cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+ have_truncate=yes
+fi
+done
+
+# === Cannot nest AC_CHECK_FUNCS() calls
+if test "$have_truncate" != yes ; then
+
+for ac_func in ftruncate
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+ For example, HP-UX 11i <limits.h> declares gettimeofday. */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+ which can conflict with char $ac_func (); below.
+ Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+ <limits.h> exists even on freestanding compilers. */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+ to always fail with ENOSYS. Some functions are actually named
+ something starting with __ and the normal name is an alias. */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_link") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest$ac_exeext &&
+ $as_test_x conftest$ac_exeext; then
+ eval "$as_ac_var=yes"
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+ { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+ cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+ have_ftruncate=yes
+fi
+done
+
+ if test "$have_ftruncate" != yes ; then
+
+for ac_func in chsize
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+ For example, HP-UX 11i <limits.h> declares gettimeofday. */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+ which can conflict with char $ac_func (); below.
+ Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+ <limits.h> exists even on freestanding compilers. */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+ to always fail with ENOSYS. Some functions are actually named
+ something starting with __ and the normal name is an alias. */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_link") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest$ac_exeext &&
+ $as_test_x conftest$ac_exeext; then
+ eval "$as_ac_var=yes"
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+ { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+ cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
+ fi
+fi
+
+
+for ac_func in setenv
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+ For example, HP-UX 11i <limits.h> declares gettimeofday. */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+ which can conflict with char $ac_func (); below.
+ Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+ <limits.h> exists even on freestanding compilers. */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+ to always fail with ENOSYS. Some functions are actually named
+ something starting with __ and the normal name is an alias. */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_link") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest$ac_exeext &&
+ $as_test_x conftest$ac_exeext; then
+ eval "$as_ac_var=yes"
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+ { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+ cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+ have_setenv=yes
+fi
+done
+
+# === Cannot nest AC_CHECK_FUNCS() calls
+if test "$have_setenv" != yes ; then
+
+for ac_func in putenv
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+ For example, HP-UX 11i <limits.h> declares gettimeofday. */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+ which can conflict with char $ac_func (); below.
+ Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+ <limits.h> exists even on freestanding compilers. */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+ to always fail with ENOSYS. Some functions are actually named
+ something starting with __ and the normal name is an alias. */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_link") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest$ac_exeext &&
+ $as_test_x conftest$ac_exeext; then
+ eval "$as_ac_var=yes"
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+ { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+ cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+ have_putenv=yes
+fi
+done
+
+ if test "$have_putenv" = yes ; then
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <stdlib.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ $EGREP "putenv" >/dev/null 2>&1; then
+ have_putenv_prototype=yes
+fi
+rm -f conftest*
+
+ if test "$have_putenv_prototype" = yes ; then
+ { echo "$as_me:$LINENO: checking putenv prototype" >&5
+echo $ECHO_N "checking putenv prototype... $ECHO_C" >&6; }
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <stdlib.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ $EGREP "[^A-Za-zo-9_]putenv[ ]*\(.*const.*\)[ ]*;" >/dev/null 2>&1; then
+ { echo "$as_me:$LINENO: result: correct" >&5
+echo "${ECHO_T}correct" >&6; }
+else
+
+ { echo "$as_me:$LINENO: result: no const" >&5
+echo "${ECHO_T}no const" >&6; }
+ cat >>confdefs.h <<\_ACEOF
+#define NON_CONST_PUTENV_PROTOTYPE 1
+_ACEOF
+
+
+fi
+rm -f conftest*
+
+ fi
+ fi
+fi
+
+#
+# if test yes = "$CYGWIN"; then with_posix_regex=no; fi
+if test no != "$with_posix_regex"; then
+
+for ac_func in regcomp
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+ For example, HP-UX 11i <limits.h> declares gettimeofday. */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+ which can conflict with char $ac_func (); below.
+ Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+ <limits.h> exists even on freestanding compilers. */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+ to always fail with ENOSYS. Some functions are actually named
+ something starting with __ and the normal name is an alias. */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_link") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest$ac_exeext &&
+ $as_test_x conftest$ac_exeext; then
+ eval "$as_ac_var=yes"
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+ { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+ cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
+ { echo "$as_me:$LINENO: checking if regcomp works" >&5
+echo $ECHO_N "checking if regcomp works... $ECHO_C" >&6; }
+ if test "$cross_compiling" = yes; then
+ cat >>confdefs.h <<\_ACEOF
+#define CHECK_REGCOMP 1
+_ACEOF
+
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+
+#include <sys/types.h>
+#include <regex.h>
+main() {
+ regex_t patbuf;
+ exit (regcomp (&patbuf, "/hello/", 0) != 0);
+}
+_ACEOF
+rm -f conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_link") 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && { ac_try='./conftest$ac_exeext'
+ { (case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_try") 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ regcomp_works=yes
+else
+ echo "$as_me: program exited with status $ac_status" >&5
+echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+( exit $ac_status )
+regcomp_works=no
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
+fi
+
+
+ { echo "$as_me:$LINENO: result: $regcomp_works" >&5
+echo "${ECHO_T}$regcomp_works" >&6; }
+ if test yes != "$regcomp_works"; then
+ cat >>confdefs.h <<\_ACEOF
+#define REGCOMP_BROKEN 1
+_ACEOF
+
+ fi
+fi
+
+# if test yes = "$with_perl_regex"; then
+# AC_MSG_CHECKING(for Perl regex library)
+# pcre_candidates="$with_perl_regex $HOME/local/lib* /usr*/local/lib* /usr/lib*"
+# for lib in $pcre_candidates; do
+# if test -f $lib/libpcreposix.so; then
+# pcre_lib="-L$lib -lpcreposix"
+# break
+# elif test -f $lib/libpcreposix.a; then
+# pcre_lib="$lib/libpcreposix.a"
+# break
+# fi
+# done
+# if test -z "$pcre_lib"; then
+# AC_MSG_RESULT(not found)
+# else
+# AC_MSG_RESULT($lib)
+# AC_DEFINE(HAVE_REGCOMP)
+# LDFLAGS="$LDFLAGS $pcre_lib"
+# have_regex=yes
+# fi
+# fi
+
+
+# Checks for missing prototypes
+# -----------------------------
+{ echo "$as_me:$LINENO: checking for new missing prototypes..." >&5
+echo "$as_me: checking for new missing prototypes..." >&6;}
+
+
+
+if test "$have_remove" = yes ; then
+
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <stdio.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ $EGREP "[^A-Za-z0-9_]remove([ ]+[A-Za-z0-9_]*)?[ ]*\(" >/dev/null 2>&1; then
+ :
+else
+
+ { echo "$as_me:$LINENO: result: adding prototype for remove" >&5
+echo "${ECHO_T}adding prototype for remove" >&6; }
+ cat >>confdefs.h <<\_ACEOF
+#define NEED_PROTO_REMOVE 1
+_ACEOF
+
+
+fi
+rm -f conftest*
+
+else
+
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <unistd.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ $EGREP "[^A-Za-z0-9_]unlink([ ]+[A-Za-z0-9_]*)?[ ]*\(" >/dev/null 2>&1; then
+ :
+else
+
+ { echo "$as_me:$LINENO: result: adding prototype for unlink" >&5
+echo "${ECHO_T}adding prototype for unlink" >&6; }
+ cat >>confdefs.h <<\_ACEOF
+#define NEED_PROTO_UNLINK 1
+_ACEOF
+
+
+fi
+rm -f conftest*
+
+fi
+
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <stdlib.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ $EGREP "[^A-Za-z0-9_]malloc([ ]+[A-Za-z0-9_]*)?[ ]*\(" >/dev/null 2>&1; then
+ :
+else
+
+ { echo "$as_me:$LINENO: result: adding prototype for malloc" >&5
+echo "${ECHO_T}adding prototype for malloc" >&6; }
+ cat >>confdefs.h <<\_ACEOF
+#define NEED_PROTO_MALLOC 1
+_ACEOF
+
+
+fi
+rm -f conftest*
+
+
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <stdlib.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ $EGREP "[^A-Za-z0-9_]getenv([ ]+[A-Za-z0-9_]*)?[ ]*\(" >/dev/null 2>&1; then
+ :
+else
+
+ { echo "$as_me:$LINENO: result: adding prototype for getenv" >&5
+echo "${ECHO_T}adding prototype for getenv" >&6; }
+ cat >>confdefs.h <<\_ACEOF
+#define NEED_PROTO_GETENV 1
+_ACEOF
+
+
+fi
+rm -f conftest*
+
+
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <sys/stat.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ $EGREP "[^A-Za-z0-9_]stat([ ]+[A-Za-z0-9_]*)?[ ]*\(" >/dev/null 2>&1; then
+ :
+else
+
+ { echo "$as_me:$LINENO: result: adding prototype for stat" >&5
+echo "${ECHO_T}adding prototype for stat" >&6; }
+ cat >>confdefs.h <<\_ACEOF
+#define NEED_PROTO_STAT 1
+_ACEOF
+
+
+fi
+rm -f conftest*
+
+
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <sys/stat.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ $EGREP "[^A-Za-z0-9_]lstat([ ]+[A-Za-z0-9_]*)?[ ]*\(" >/dev/null 2>&1; then
+ :
+else
+
+ { echo "$as_me:$LINENO: result: adding prototype for lstat" >&5
+echo "${ECHO_T}adding prototype for lstat" >&6; }
+ cat >>confdefs.h <<\_ACEOF
+#define NEED_PROTO_LSTAT 1
+_ACEOF
+
+
+fi
+rm -f conftest*
+
+if test "$have_fgetpos" = yes ; then
+
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <stdio.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ $EGREP "[^A-Za-z0-9_]fgetpos([ ]+[A-Za-z0-9_]*)?[ ]*\(" >/dev/null 2>&1; then
+ :
+else
+
+ { echo "$as_me:$LINENO: result: adding prototype for fgetpos" >&5
+echo "${ECHO_T}adding prototype for fgetpos" >&6; }
+ cat >>confdefs.h <<\_ACEOF
+#define NEED_PROTO_FGETPOS 1
+_ACEOF
+
+
+fi
+rm -f conftest*
+
+fi
+if test "$have_truncate" = yes ; then
+
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <unistd.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ $EGREP "[^A-Za-z0-9_]truncate([ ]+[A-Za-z0-9_]*)?[ ]*\(" >/dev/null 2>&1; then
+ :
+else
+
+ { echo "$as_me:$LINENO: result: adding prototype for truncate" >&5
+echo "${ECHO_T}adding prototype for truncate" >&6; }
+ cat >>confdefs.h <<\_ACEOF
+#define NEED_PROTO_TRUNCATE 1
+_ACEOF
+
+
+fi
+rm -f conftest*
+
+fi
+if test "$have_ftruncate" = yes ; then
+
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <unistd.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ $EGREP "[^A-Za-z0-9_]ftruncate([ ]+[A-Za-z0-9_]*)?[ ]*\(" >/dev/null 2>&1; then
+ :
+else
+
+ { echo "$as_me:$LINENO: result: adding prototype for ftruncate" >&5
+echo "${ECHO_T}adding prototype for ftruncate" >&6; }
+ cat >>confdefs.h <<\_ACEOF
+#define NEED_PROTO_FTRUNCATE 1
+_ACEOF
+
+
+fi
+rm -f conftest*
+
+fi
+
+
+# Output files
+# ------------
+
+
+rm -f Makefile
+if test "$enable_maintainer_mode" = yes ; then
+ { echo "$as_me:$LINENO: result: creating maintainer Makefile" >&5
+echo "${ECHO_T}creating maintainer Makefile" >&6; }
+ ln -s maintainer.mak Makefile
+ makefile_out=
+else
+ makefile_out=Makefile
+fi
+ac_config_files="$ac_config_files $makefile_out"
+
+cat >confcache <<\_ACEOF
+# This file is a shell script that caches the results of configure
+# tests run on this system so they can be shared between configure
+# scripts and configure runs, see configure's option --config-cache.
+# It is not useful on other systems. If it contains results you don't
+# want to keep, you may remove or edit it.
+#
+# config.status only pays attention to the cache file if you give it
+# the --recheck option to rerun configure.
+#
+# `ac_cv_env_foo' variables (set or unset) will be overridden when
+# loading this file, other *unset* `ac_cv_foo' will be assigned the
+# following values.
+
+_ACEOF
+
+# The following way of writing the cache mishandles newlines in values,
+# but we know of no workaround that is simple, portable, and efficient.
+# So, we kill variables containing newlines.
+# Ultrix sh set writes to stderr and can't be redirected directly,
+# and sets the high bit in the cache file unless we assign to the vars.
+(
+ for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do
+ eval ac_val=\$$ac_var
+ case $ac_val in #(
+ *${as_nl}*)
+ case $ac_var in #(
+ *_cv_*) { echo "$as_me:$LINENO: WARNING: Cache variable $ac_var contains a newline." >&5
+echo "$as_me: WARNING: Cache variable $ac_var contains a newline." >&2;} ;;
+ esac
+ case $ac_var in #(
+ _ | IFS | as_nl) ;; #(
+ *) $as_unset $ac_var ;;
+ esac ;;
+ esac
+ done
+
+ (set) 2>&1 |
+ case $as_nl`(ac_space=' '; set) 2>&1` in #(
+ *${as_nl}ac_space=\ *)
+ # `set' does not quote correctly, so add quotes (double-quote
+ # substitution turns \\\\ into \\, and sed turns \\ into \).
+ sed -n \
+ "s/'/'\\\\''/g;
+ s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p"
+ ;; #(
+ *)
+ # `set' quotes correctly as required by POSIX, so do not add quotes.
+ sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p"
+ ;;
+ esac |
+ sort
+) |
+ sed '
+ /^ac_cv_env_/b end
+ t clear
+ :clear
+ s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/
+ t end
+ s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/
+ :end' >>confcache
+if diff "$cache_file" confcache >/dev/null 2>&1; then :; else
+ if test -w "$cache_file"; then
+ test "x$cache_file" != "x/dev/null" &&
+ { echo "$as_me:$LINENO: updating cache $cache_file" >&5
+echo "$as_me: updating cache $cache_file" >&6;}
+ cat confcache >$cache_file
+ else
+ { echo "$as_me:$LINENO: not updating unwritable cache $cache_file" >&5
+echo "$as_me: not updating unwritable cache $cache_file" >&6;}
+ fi
+fi
+rm -f confcache
+
+test "x$prefix" = xNONE && prefix=$ac_default_prefix
+# Let make expand exec_prefix.
+test "x$exec_prefix" = xNONE && exec_prefix='${prefix}'
+
+DEFS=-DHAVE_CONFIG_H
+
+ac_libobjs=
+ac_ltlibobjs=
+for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue
+ # 1. Remove the extension, and $U if already installed.
+ ac_script='s/\$U\././;s/\.o$//;s/\.obj$//'
+ ac_i=`echo "$ac_i" | sed "$ac_script"`
+ # 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR
+ # will be set to the directory where LIBOBJS objects are built.
+ ac_libobjs="$ac_libobjs \${LIBOBJDIR}$ac_i\$U.$ac_objext"
+ ac_ltlibobjs="$ac_ltlibobjs \${LIBOBJDIR}$ac_i"'$U.lo'
+done
+LIBOBJS=$ac_libobjs
+
+LTLIBOBJS=$ac_ltlibobjs
+
+
+
+: ${CONFIG_STATUS=./config.status}
+ac_clean_files_save=$ac_clean_files
+ac_clean_files="$ac_clean_files $CONFIG_STATUS"
+{ echo "$as_me:$LINENO: creating $CONFIG_STATUS" >&5
+echo "$as_me: creating $CONFIG_STATUS" >&6;}
+cat >$CONFIG_STATUS <<_ACEOF
+#! $SHELL
+# Generated by $as_me.
+# Run this file to recreate the current configuration.
+# Compiler output produced by configure, useful for debugging
+# configure, is in config.log if it exists.
+
+debug=false
+ac_cs_recheck=false
+ac_cs_silent=false
+SHELL=\${CONFIG_SHELL-$SHELL}
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+## --------------------- ##
+## M4sh Initialization. ##
+## --------------------- ##
+
+# Be more Bourne compatible
+DUALCASE=1; export DUALCASE # for MKS sh
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
+ emulate sh
+ NULLCMD=:
+ # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which
+ # is contrary to our usage. Disable this feature.
+ alias -g '${1+"$@"}'='"$@"'
+ setopt NO_GLOB_SUBST
+else
+ case `(set -o) 2>/dev/null` in
+ *posix*) set -o posix ;;
+esac
+
+fi
+
+
+
+
+# PATH needs CR
+# Avoid depending upon Character Ranges.
+as_cr_letters='abcdefghijklmnopqrstuvwxyz'
+as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+as_cr_Letters=$as_cr_letters$as_cr_LETTERS
+as_cr_digits='0123456789'
+as_cr_alnum=$as_cr_Letters$as_cr_digits
+
+# The user is always right.
+if test "${PATH_SEPARATOR+set}" != set; then
+ echo "#! /bin/sh" >conf$$.sh
+ echo "exit 0" >>conf$$.sh
+ chmod +x conf$$.sh
+ if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then
+ PATH_SEPARATOR=';'
+ else
+ PATH_SEPARATOR=:
+ fi
+ rm -f conf$$.sh
+fi
+
+# Support unset when possible.
+if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then
+ as_unset=unset
+else
+ as_unset=false
+fi
+
+
+# IFS
+# We need space, tab and new line, in precisely that order. Quoting is
+# there to prevent editors from complaining about space-tab.
+# (If _AS_PATH_WALK were called with IFS unset, it would disable word
+# splitting by setting IFS to empty value.)
+as_nl='
+'
+IFS=" "" $as_nl"
+
+# Find who we are. Look in the path if we contain no directory separator.
+case $0 in
+ *[\\/]* ) as_myself=$0 ;;
+ *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
+done
+IFS=$as_save_IFS
+
+ ;;
+esac
+# We did not find ourselves, most probably we were run as `sh COMMAND'
+# in which case we are not to be found in the path.
+if test "x$as_myself" = x; then
+ as_myself=$0
+fi
+if test ! -f "$as_myself"; then
+ echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2
+ { (exit 1); exit 1; }
+fi
+
+# Work around bugs in pre-3.0 UWIN ksh.
+for as_var in ENV MAIL MAILPATH
+do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var
+done
+PS1='$ '
+PS2='> '
+PS4='+ '
+
+# NLS nuisances.
+for as_var in \
+ LANG LANGUAGE LC_ADDRESS LC_ALL LC_COLLATE LC_CTYPE LC_IDENTIFICATION \
+ LC_MEASUREMENT LC_MESSAGES LC_MONETARY LC_NAME LC_NUMERIC LC_PAPER \
+ LC_TELEPHONE LC_TIME
+do
+ if (set +x; test -z "`(eval $as_var=C; export $as_var) 2>&1`"); then
+ eval $as_var=C; export $as_var
+ else
+ ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var
+ fi
+done
+
+# Required to use basename.
+if expr a : '\(a\)' >/dev/null 2>&1 &&
+ test "X`expr 00001 : '.*\(...\)'`" = X001; then
+ as_expr=expr
+else
+ as_expr=false
+fi
+
+if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then
+ as_basename=basename
+else
+ as_basename=false
+fi
+
+
+# Name of the executable.
+as_me=`$as_basename -- "$0" ||
+$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
+ X"$0" : 'X\(//\)$' \| \
+ X"$0" : 'X\(/\)' \| . 2>/dev/null ||
+echo X/"$0" |
+ sed '/^.*\/\([^/][^/]*\)\/*$/{
+ s//\1/
+ q
+ }
+ /^X\/\(\/\/\)$/{
+ s//\1/
+ q
+ }
+ /^X\/\(\/\).*/{
+ s//\1/
+ q
+ }
+ s/.*/./; q'`
+
+# CDPATH.
+$as_unset CDPATH
+
+
+
+ as_lineno_1=$LINENO
+ as_lineno_2=$LINENO
+ test "x$as_lineno_1" != "x$as_lineno_2" &&
+ test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2" || {
+
+ # Create $as_me.lineno as a copy of $as_myself, but with $LINENO
+ # uniformly replaced by the line number. The first 'sed' inserts a
+ # line-number line after each line using $LINENO; the second 'sed'
+ # does the real work. The second script uses 'N' to pair each
+ # line-number line with the line containing $LINENO, and appends
+ # trailing '-' during substitution so that $LINENO is not a special
+ # case at line end.
+ # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the
+ # scripts with optimization help from Paolo Bonzini. Blame Lee
+ # E. McMahon (1931-1989) for sed's syntax. :-)
+ sed -n '
+ p
+ /[$]LINENO/=
+ ' <$as_myself |
+ sed '
+ s/[$]LINENO.*/&-/
+ t lineno
+ b
+ :lineno
+ N
+ :loop
+ s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/
+ t loop
+ s/-\n.*//
+ ' >$as_me.lineno &&
+ chmod +x "$as_me.lineno" ||
+ { echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2
+ { (exit 1); exit 1; }; }
+
+ # Don't try to exec as it changes $[0], causing all sort of problems
+ # (the dirname of $[0] is not the place where we might find the
+ # original and so on. Autoconf is especially sensitive to this).
+ . "./$as_me.lineno"
+ # Exit status is that of the last command.
+ exit
+}
+
+
+if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then
+ as_dirname=dirname
+else
+ as_dirname=false
+fi
+
+ECHO_C= ECHO_N= ECHO_T=
+case `echo -n x` in
+-n*)
+ case `echo 'x\c'` in
+ *c*) ECHO_T=' ';; # ECHO_T is single tab character.
+ *) ECHO_C='\c';;
+ esac;;
+*)
+ ECHO_N='-n';;
+esac
+
+if expr a : '\(a\)' >/dev/null 2>&1 &&
+ test "X`expr 00001 : '.*\(...\)'`" = X001; then
+ as_expr=expr
+else
+ as_expr=false
+fi
+
+rm -f conf$$ conf$$.exe conf$$.file
+if test -d conf$$.dir; then
+ rm -f conf$$.dir/conf$$.file
+else
+ rm -f conf$$.dir
+ mkdir conf$$.dir
+fi
+echo >conf$$.file
+if ln -s conf$$.file conf$$ 2>/dev/null; then
+ as_ln_s='ln -s'
+ # ... but there are two gotchas:
+ # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail.
+ # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable.
+ # In both cases, we have to default to `cp -p'.
+ ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe ||
+ as_ln_s='cp -p'
+elif ln conf$$.file conf$$ 2>/dev/null; then
+ as_ln_s=ln
+else
+ as_ln_s='cp -p'
+fi
+rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file
+rmdir conf$$.dir 2>/dev/null
+
+if mkdir -p . 2>/dev/null; then
+ as_mkdir_p=:
+else
+ test -d ./-p && rmdir ./-p
+ as_mkdir_p=false
+fi
+
+if test -x / >/dev/null 2>&1; then
+ as_test_x='test -x'
+else
+ if ls -dL / >/dev/null 2>&1; then
+ as_ls_L_option=L
+ else
+ as_ls_L_option=
+ fi
+ as_test_x='
+ eval sh -c '\''
+ if test -d "$1"; then
+ test -d "$1/.";
+ else
+ case $1 in
+ -*)set "./$1";;
+ esac;
+ case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in
+ ???[sx]*):;;*)false;;esac;fi
+ '\'' sh
+ '
+fi
+as_executable_p=$as_test_x
+
+# Sed expression to map a string onto a valid CPP name.
+as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
+
+# Sed expression to map a string onto a valid variable name.
+as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'"
+
+
+exec 6>&1
+
+# Save the log message, to keep $[0] and so on meaningful, and to
+# report actual input values of CONFIG_FILES etc. instead of their
+# values after options handling.
+ac_log="
+This file was extended by $as_me, which was
+generated by GNU Autoconf 2.61. Invocation command line was
+
+ CONFIG_FILES = $CONFIG_FILES
+ CONFIG_HEADERS = $CONFIG_HEADERS
+ CONFIG_LINKS = $CONFIG_LINKS
+ CONFIG_COMMANDS = $CONFIG_COMMANDS
+ $ $0 $@
+
+on `(hostname || uname -n) 2>/dev/null | sed 1q`
+"
+
+_ACEOF
+
+cat >>$CONFIG_STATUS <<_ACEOF
+# Files that config.status was made for.
+config_files="$ac_config_files"
+config_headers="$ac_config_headers"
+
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+ac_cs_usage="\
+\`$as_me' instantiates files from templates according to the
+current configuration.
+
+Usage: $0 [OPTIONS] [FILE]...
+
+ -h, --help print this help, then exit
+ -V, --version print version number and configuration settings, then exit
+ -q, --quiet do not print progress messages
+ -d, --debug don't remove temporary files
+ --recheck update $as_me by reconfiguring in the same conditions
+ --file=FILE[:TEMPLATE]
+ instantiate the configuration file FILE
+ --header=FILE[:TEMPLATE]
+ instantiate the configuration header FILE
+
+Configuration files:
+$config_files
+
+Configuration headers:
+$config_headers
+
+Report bugs to <bug-autoconf@gnu.org>."
+
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF
+ac_cs_version="\\
+config.status
+configured by $0, generated by GNU Autoconf 2.61,
+ with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
+
+Copyright (C) 2006 Free Software Foundation, Inc.
+This config.status script is free software; the Free Software Foundation
+gives unlimited permission to copy, distribute and modify it."
+
+ac_pwd='$ac_pwd'
+srcdir='$srcdir'
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+# If no file are specified by the user, then we need to provide default
+# value. By we need to know if files were specified by the user.
+ac_need_defaults=:
+while test $# != 0
+do
+ case $1 in
+ --*=*)
+ ac_option=`expr "X$1" : 'X\([^=]*\)='`
+ ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'`
+ ac_shift=:
+ ;;
+ *)
+ ac_option=$1
+ ac_optarg=$2
+ ac_shift=shift
+ ;;
+ esac
+
+ case $ac_option in
+ # Handling of the options.
+ -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r)
+ ac_cs_recheck=: ;;
+ --version | --versio | --versi | --vers | --ver | --ve | --v | -V )
+ echo "$ac_cs_version"; exit ;;
+ --debug | --debu | --deb | --de | --d | -d )
+ debug=: ;;
+ --file | --fil | --fi | --f )
+ $ac_shift
+ CONFIG_FILES="$CONFIG_FILES $ac_optarg"
+ ac_need_defaults=false;;
+ --header | --heade | --head | --hea )
+ $ac_shift
+ CONFIG_HEADERS="$CONFIG_HEADERS $ac_optarg"
+ ac_need_defaults=false;;
+ --he | --h)
+ # Conflict between --help and --header
+ { echo "$as_me: error: ambiguous option: $1
+Try \`$0 --help' for more information." >&2
+ { (exit 1); exit 1; }; };;
+ --help | --hel | -h )
+ echo "$ac_cs_usage"; exit ;;
+ -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+ | -silent | --silent | --silen | --sile | --sil | --si | --s)
+ ac_cs_silent=: ;;
+
+ # This is an error.
+ -*) { echo "$as_me: error: unrecognized option: $1
+Try \`$0 --help' for more information." >&2
+ { (exit 1); exit 1; }; } ;;
+
+ *) ac_config_targets="$ac_config_targets $1"
+ ac_need_defaults=false ;;
+
+ esac
+ shift
+done
+
+ac_configure_extra_args=
+
+if $ac_cs_silent; then
+ exec 6>/dev/null
+ ac_configure_extra_args="$ac_configure_extra_args --silent"
+fi
+
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF
+if \$ac_cs_recheck; then
+ echo "running CONFIG_SHELL=$SHELL $SHELL $0 "$ac_configure_args \$ac_configure_extra_args " --no-create --no-recursion" >&6
+ CONFIG_SHELL=$SHELL
+ export CONFIG_SHELL
+ exec $SHELL "$0"$ac_configure_args \$ac_configure_extra_args --no-create --no-recursion
+fi
+
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF
+exec 5>>config.log
+{
+ echo
+ sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX
+## Running $as_me. ##
+_ASBOX
+ echo "$ac_log"
+} >&5
+
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+
+# Handling of arguments.
+for ac_config_target in $ac_config_targets
+do
+ case $ac_config_target in
+ "config.h") CONFIG_HEADERS="$CONFIG_HEADERS config.h" ;;
+ "$makefile_out") CONFIG_FILES="$CONFIG_FILES $makefile_out" ;;
+
+ *) { { echo "$as_me:$LINENO: error: invalid argument: $ac_config_target" >&5
+echo "$as_me: error: invalid argument: $ac_config_target" >&2;}
+ { (exit 1); exit 1; }; };;
+ esac
+done
+
+
+# If the user did not use the arguments to specify the items to instantiate,
+# then the envvar interface is used. Set only those that are not.
+# We use the long form for the default assignment because of an extremely
+# bizarre bug on SunOS 4.1.3.
+if $ac_need_defaults; then
+ test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files
+ test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers
+fi
+
+# Have a temporary directory for convenience. Make it in the build tree
+# simply because there is no reason against having it here, and in addition,
+# creating and moving files from /tmp can sometimes cause problems.
+# Hook for its removal unless debugging.
+# Note that there is a small window in which the directory will not be cleaned:
+# after its creation but before its name has been assigned to `$tmp'.
+$debug ||
+{
+ tmp=
+ trap 'exit_status=$?
+ { test -z "$tmp" || test ! -d "$tmp" || rm -fr "$tmp"; } && exit $exit_status
+' 0
+ trap '{ (exit 1); exit 1; }' 1 2 13 15
+}
+# Create a (secure) tmp directory for tmp files.
+
+{
+ tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` &&
+ test -n "$tmp" && test -d "$tmp"
+} ||
+{
+ tmp=./conf$$-$RANDOM
+ (umask 077 && mkdir "$tmp")
+} ||
+{
+ echo "$me: cannot create a temporary directory in ." >&2
+ { (exit 1); exit 1; }
+}
+
+#
+# Set up the sed scripts for CONFIG_FILES section.
+#
+
+# No need to generate the scripts if there are no CONFIG_FILES.
+# This happens for instance when ./config.status config.h
+if test -n "$CONFIG_FILES"; then
+
+_ACEOF
+
+
+
+ac_delim='%!_!# '
+for ac_last_try in false false false false false :; do
+ cat >conf$$subs.sed <<_ACEOF
+SHELL!$SHELL$ac_delim
+PATH_SEPARATOR!$PATH_SEPARATOR$ac_delim
+PACKAGE_NAME!$PACKAGE_NAME$ac_delim
+PACKAGE_TARNAME!$PACKAGE_TARNAME$ac_delim
+PACKAGE_VERSION!$PACKAGE_VERSION$ac_delim
+PACKAGE_STRING!$PACKAGE_STRING$ac_delim
+PACKAGE_BUGREPORT!$PACKAGE_BUGREPORT$ac_delim
+exec_prefix!$exec_prefix$ac_delim
+prefix!$prefix$ac_delim
+program_transform_name!$program_transform_name$ac_delim
+bindir!$bindir$ac_delim
+sbindir!$sbindir$ac_delim
+libexecdir!$libexecdir$ac_delim
+datarootdir!$datarootdir$ac_delim
+datadir!$datadir$ac_delim
+sysconfdir!$sysconfdir$ac_delim
+sharedstatedir!$sharedstatedir$ac_delim
+localstatedir!$localstatedir$ac_delim
+includedir!$includedir$ac_delim
+oldincludedir!$oldincludedir$ac_delim
+docdir!$docdir$ac_delim
+infodir!$infodir$ac_delim
+htmldir!$htmldir$ac_delim
+dvidir!$dvidir$ac_delim
+pdfdir!$pdfdir$ac_delim
+psdir!$psdir$ac_delim
+libdir!$libdir$ac_delim
+localedir!$localedir$ac_delim
+mandir!$mandir$ac_delim
+DEFS!$DEFS$ac_delim
+ECHO_C!$ECHO_C$ac_delim
+ECHO_N!$ECHO_N$ac_delim
+ECHO_T!$ECHO_T$ac_delim
+LIBS!$LIBS$ac_delim
+build_alias!$build_alias$ac_delim
+host_alias!$host_alias$ac_delim
+target_alias!$target_alias$ac_delim
+install_targets!$install_targets$ac_delim
+CC!$CC$ac_delim
+CFLAGS!$CFLAGS$ac_delim
+LDFLAGS!$LDFLAGS$ac_delim
+CPPFLAGS!$CPPFLAGS$ac_delim
+ac_ct_CC!$ac_ct_CC$ac_delim
+EXEEXT!$EXEEXT$ac_delim
+OBJEXT!$OBJEXT$ac_delim
+LN_S!$LN_S$ac_delim
+STRIP!$STRIP$ac_delim
+sort_found!$sort_found$ac_delim
+CPP!$CPP$ac_delim
+GREP!$GREP$ac_delim
+EGREP!$EGREP$ac_delim
+LIBOBJS!$LIBOBJS$ac_delim
+LTLIBOBJS!$LTLIBOBJS$ac_delim
+_ACEOF
+
+ if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 53; then
+ break
+ elif $ac_last_try; then
+ { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
+echo "$as_me: error: could not make $CONFIG_STATUS" >&2;}
+ { (exit 1); exit 1; }; }
+ else
+ ac_delim="$ac_delim!$ac_delim _$ac_delim!! "
+ fi
+done
+
+ac_eof=`sed -n '/^CEOF[0-9]*$/s/CEOF/0/p' conf$$subs.sed`
+if test -n "$ac_eof"; then
+ ac_eof=`echo "$ac_eof" | sort -nru | sed 1q`
+ ac_eof=`expr $ac_eof + 1`
+fi
+
+cat >>$CONFIG_STATUS <<_ACEOF
+cat >"\$tmp/subs-1.sed" <<\CEOF$ac_eof
+/@[a-zA-Z_][a-zA-Z_0-9]*@/!b end
+_ACEOF
+sed '
+s/[,\\&]/\\&/g; s/@/@|#_!!_#|/g
+s/^/s,@/; s/!/@,|#_!!_#|/
+:n
+t n
+s/'"$ac_delim"'$/,g/; t
+s/$/\\/; p
+N; s/^.*\n//; s/[,\\&]/\\&/g; s/@/@|#_!!_#|/g; b n
+' >>$CONFIG_STATUS <conf$$subs.sed
+rm -f conf$$subs.sed
+cat >>$CONFIG_STATUS <<_ACEOF
+:end
+s/|#_!!_#|//g
+CEOF$ac_eof
+_ACEOF
+
+
+# VPATH may cause trouble with some makes, so we remove $(srcdir),
+# ${srcdir} and @srcdir@ from VPATH if srcdir is ".", strip leading and
+# trailing colons and then remove the whole line if VPATH becomes empty
+# (actually we leave an empty line to preserve line numbers).
+if test "x$srcdir" = x.; then
+ ac_vpsub='/^[ ]*VPATH[ ]*=/{
+s/:*\$(srcdir):*/:/
+s/:*\${srcdir}:*/:/
+s/:*@srcdir@:*/:/
+s/^\([^=]*=[ ]*\):*/\1/
+s/:*$//
+s/^[^=]*=[ ]*$//
+}'
+fi
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+fi # test -n "$CONFIG_FILES"
+
+
+for ac_tag in :F $CONFIG_FILES :H $CONFIG_HEADERS
+do
+ case $ac_tag in
+ :[FHLC]) ac_mode=$ac_tag; continue;;
+ esac
+ case $ac_mode$ac_tag in
+ :[FHL]*:*);;
+ :L* | :C*:*) { { echo "$as_me:$LINENO: error: Invalid tag $ac_tag." >&5
+echo "$as_me: error: Invalid tag $ac_tag." >&2;}
+ { (exit 1); exit 1; }; };;
+ :[FH]-) ac_tag=-:-;;
+ :[FH]*) ac_tag=$ac_tag:$ac_tag.in;;
+ esac
+ ac_save_IFS=$IFS
+ IFS=:
+ set x $ac_tag
+ IFS=$ac_save_IFS
+ shift
+ ac_file=$1
+ shift
+
+ case $ac_mode in
+ :L) ac_source=$1;;
+ :[FH])
+ ac_file_inputs=
+ for ac_f
+ do
+ case $ac_f in
+ -) ac_f="$tmp/stdin";;
+ *) # Look for the file first in the build tree, then in the source tree
+ # (if the path is not absolute). The absolute path cannot be DOS-style,
+ # because $ac_f cannot contain `:'.
+ test -f "$ac_f" ||
+ case $ac_f in
+ [\\/$]*) false;;
+ *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";;
+ esac ||
+ { { echo "$as_me:$LINENO: error: cannot find input file: $ac_f" >&5
+echo "$as_me: error: cannot find input file: $ac_f" >&2;}
+ { (exit 1); exit 1; }; };;
+ esac
+ ac_file_inputs="$ac_file_inputs $ac_f"
+ done
+
+ # Let's still pretend it is `configure' which instantiates (i.e., don't
+ # use $as_me), people would be surprised to read:
+ # /* config.h. Generated by config.status. */
+ configure_input="Generated from "`IFS=:
+ echo $* | sed 's|^[^:]*/||;s|:[^:]*/|, |g'`" by configure."
+ if test x"$ac_file" != x-; then
+ configure_input="$ac_file. $configure_input"
+ { echo "$as_me:$LINENO: creating $ac_file" >&5
+echo "$as_me: creating $ac_file" >&6;}
+ fi
+
+ case $ac_tag in
+ *:-:* | *:-) cat >"$tmp/stdin";;
+ esac
+ ;;
+ esac
+
+ ac_dir=`$as_dirname -- "$ac_file" ||
+$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+ X"$ac_file" : 'X\(//\)[^/]' \| \
+ X"$ac_file" : 'X\(//\)$' \| \
+ X"$ac_file" : 'X\(/\)' \| . 2>/dev/null ||
+echo X"$ac_file" |
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+ s//\1/
+ q
+ }
+ /^X\(\/\/\)[^/].*/{
+ s//\1/
+ q
+ }
+ /^X\(\/\/\)$/{
+ s//\1/
+ q
+ }
+ /^X\(\/\).*/{
+ s//\1/
+ q
+ }
+ s/.*/./; q'`
+ { as_dir="$ac_dir"
+ case $as_dir in #(
+ -*) as_dir=./$as_dir;;
+ esac
+ test -d "$as_dir" || { $as_mkdir_p && mkdir -p "$as_dir"; } || {
+ as_dirs=
+ while :; do
+ case $as_dir in #(
+ *\'*) as_qdir=`echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #(
+ *) as_qdir=$as_dir;;
+ esac
+ as_dirs="'$as_qdir' $as_dirs"
+ as_dir=`$as_dirname -- "$as_dir" ||
+$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+ X"$as_dir" : 'X\(//\)[^/]' \| \
+ X"$as_dir" : 'X\(//\)$' \| \
+ X"$as_dir" : 'X\(/\)' \| . 2>/dev/null ||
+echo X"$as_dir" |
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+ s//\1/
+ q
+ }
+ /^X\(\/\/\)[^/].*/{
+ s//\1/
+ q
+ }
+ /^X\(\/\/\)$/{
+ s//\1/
+ q
+ }
+ /^X\(\/\).*/{
+ s//\1/
+ q
+ }
+ s/.*/./; q'`
+ test -d "$as_dir" && break
+ done
+ test -z "$as_dirs" || eval "mkdir $as_dirs"
+ } || test -d "$as_dir" || { { echo "$as_me:$LINENO: error: cannot create directory $as_dir" >&5
+echo "$as_me: error: cannot create directory $as_dir" >&2;}
+ { (exit 1); exit 1; }; }; }
+ ac_builddir=.
+
+case "$ac_dir" in
+.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;;
+*)
+ ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'`
+ # A ".." for each directory in $ac_dir_suffix.
+ ac_top_builddir_sub=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,/..,g;s,/,,'`
+ case $ac_top_builddir_sub in
+ "") ac_top_builddir_sub=. ac_top_build_prefix= ;;
+ *) ac_top_build_prefix=$ac_top_builddir_sub/ ;;
+ esac ;;
+esac
+ac_abs_top_builddir=$ac_pwd
+ac_abs_builddir=$ac_pwd$ac_dir_suffix
+# for backward compatibility:
+ac_top_builddir=$ac_top_build_prefix
+
+case $srcdir in
+ .) # We are building in place.
+ ac_srcdir=.
+ ac_top_srcdir=$ac_top_builddir_sub
+ ac_abs_top_srcdir=$ac_pwd ;;
+ [\\/]* | ?:[\\/]* ) # Absolute name.
+ ac_srcdir=$srcdir$ac_dir_suffix;
+ ac_top_srcdir=$srcdir
+ ac_abs_top_srcdir=$srcdir ;;
+ *) # Relative name.
+ ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix
+ ac_top_srcdir=$ac_top_build_prefix$srcdir
+ ac_abs_top_srcdir=$ac_pwd/$srcdir ;;
+esac
+ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix
+
+
+ case $ac_mode in
+ :F)
+ #
+ # CONFIG_FILE
+ #
+
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+# If the template does not know about datarootdir, expand it.
+# FIXME: This hack should be removed a few years after 2.60.
+ac_datarootdir_hack=; ac_datarootdir_seen=
+
+case `sed -n '/datarootdir/ {
+ p
+ q
+}
+/@datadir@/p
+/@docdir@/p
+/@infodir@/p
+/@localedir@/p
+/@mandir@/p
+' $ac_file_inputs` in
+*datarootdir*) ac_datarootdir_seen=yes;;
+*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*)
+ { echo "$as_me:$LINENO: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5
+echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;}
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF
+ ac_datarootdir_hack='
+ s&@datadir@&$datadir&g
+ s&@docdir@&$docdir&g
+ s&@infodir@&$infodir&g
+ s&@localedir@&$localedir&g
+ s&@mandir@&$mandir&g
+ s&\\\${datarootdir}&$datarootdir&g' ;;
+esac
+_ACEOF
+
+# Neutralize VPATH when `$srcdir' = `.'.
+# Shell code in configure.ac might set extrasub.
+# FIXME: do we really want to maintain this feature?
+cat >>$CONFIG_STATUS <<_ACEOF
+ sed "$ac_vpsub
+$extrasub
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF
+:t
+/@[a-zA-Z_][a-zA-Z_0-9]*@/!b
+s&@configure_input@&$configure_input&;t t
+s&@top_builddir@&$ac_top_builddir_sub&;t t
+s&@srcdir@&$ac_srcdir&;t t
+s&@abs_srcdir@&$ac_abs_srcdir&;t t
+s&@top_srcdir@&$ac_top_srcdir&;t t
+s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t
+s&@builddir@&$ac_builddir&;t t
+s&@abs_builddir@&$ac_abs_builddir&;t t
+s&@abs_top_builddir@&$ac_abs_top_builddir&;t t
+$ac_datarootdir_hack
+" $ac_file_inputs | sed -f "$tmp/subs-1.sed" >$tmp/out
+
+test -z "$ac_datarootdir_hack$ac_datarootdir_seen" &&
+ { ac_out=`sed -n '/\${datarootdir}/p' "$tmp/out"`; test -n "$ac_out"; } &&
+ { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' "$tmp/out"`; test -z "$ac_out"; } &&
+ { echo "$as_me:$LINENO: WARNING: $ac_file contains a reference to the variable \`datarootdir'
+which seems to be undefined. Please make sure it is defined." >&5
+echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir'
+which seems to be undefined. Please make sure it is defined." >&2;}
+
+ rm -f "$tmp/stdin"
+ case $ac_file in
+ -) cat "$tmp/out"; rm -f "$tmp/out";;
+ *) rm -f "$ac_file"; mv "$tmp/out" $ac_file;;
+ esac
+ ;;
+ :H)
+ #
+ # CONFIG_HEADER
+ #
+_ACEOF
+
+# Transform confdefs.h into a sed script `conftest.defines', that
+# substitutes the proper values into config.h.in to produce config.h.
+rm -f conftest.defines conftest.tail
+# First, append a space to every undef/define line, to ease matching.
+echo 's/$/ /' >conftest.defines
+# Then, protect against being on the right side of a sed subst, or in
+# an unquoted here document, in config.status. If some macros were
+# called several times there might be several #defines for the same
+# symbol, which is useless. But do not sort them, since the last
+# AC_DEFINE must be honored.
+ac_word_re=[_$as_cr_Letters][_$as_cr_alnum]*
+# These sed commands are passed to sed as "A NAME B PARAMS C VALUE D", where
+# NAME is the cpp macro being defined, VALUE is the value it is being given.
+# PARAMS is the parameter list in the macro definition--in most cases, it's
+# just an empty string.
+ac_dA='s,^\\([ #]*\\)[^ ]*\\([ ]*'
+ac_dB='\\)[ (].*,\\1define\\2'
+ac_dC=' '
+ac_dD=' ,'
+
+uniq confdefs.h |
+ sed -n '
+ t rset
+ :rset
+ s/^[ ]*#[ ]*define[ ][ ]*//
+ t ok
+ d
+ :ok
+ s/[\\&,]/\\&/g
+ s/^\('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/ '"$ac_dA"'\1'"$ac_dB"'\2'"${ac_dC}"'\3'"$ac_dD"'/p
+ s/^\('"$ac_word_re"'\)[ ]*\(.*\)/'"$ac_dA"'\1'"$ac_dB$ac_dC"'\2'"$ac_dD"'/p
+ ' >>conftest.defines
+
+# Remove the space that was appended to ease matching.
+# Then replace #undef with comments. This is necessary, for
+# example, in the case of _POSIX_SOURCE, which is predefined and required
+# on some systems where configure will not decide to define it.
+# (The regexp can be short, since the line contains either #define or #undef.)
+echo 's/ $//
+s,^[ #]*u.*,/* & */,' >>conftest.defines
+
+# Break up conftest.defines:
+ac_max_sed_lines=50
+
+# First sed command is: sed -f defines.sed $ac_file_inputs >"$tmp/out1"
+# Second one is: sed -f defines.sed "$tmp/out1" >"$tmp/out2"
+# Third one will be: sed -f defines.sed "$tmp/out2" >"$tmp/out1"
+# et cetera.
+ac_in='$ac_file_inputs'
+ac_out='"$tmp/out1"'
+ac_nxt='"$tmp/out2"'
+
+while :
+do
+ # Write a here document:
+ cat >>$CONFIG_STATUS <<_ACEOF
+ # First, check the format of the line:
+ cat >"\$tmp/defines.sed" <<\\CEOF
+/^[ ]*#[ ]*undef[ ][ ]*$ac_word_re[ ]*\$/b def
+/^[ ]*#[ ]*define[ ][ ]*$ac_word_re[( ]/b def
+b
+:def
+_ACEOF
+ sed ${ac_max_sed_lines}q conftest.defines >>$CONFIG_STATUS
+ echo 'CEOF
+ sed -f "$tmp/defines.sed"' "$ac_in >$ac_out" >>$CONFIG_STATUS
+ ac_in=$ac_out; ac_out=$ac_nxt; ac_nxt=$ac_in
+ sed 1,${ac_max_sed_lines}d conftest.defines >conftest.tail
+ grep . conftest.tail >/dev/null || break
+ rm -f conftest.defines
+ mv conftest.tail conftest.defines
+done
+rm -f conftest.defines conftest.tail
+
+echo "ac_result=$ac_in" >>$CONFIG_STATUS
+cat >>$CONFIG_STATUS <<\_ACEOF
+ if test x"$ac_file" != x-; then
+ echo "/* $configure_input */" >"$tmp/config.h"
+ cat "$ac_result" >>"$tmp/config.h"
+ if diff $ac_file "$tmp/config.h" >/dev/null 2>&1; then
+ { echo "$as_me:$LINENO: $ac_file is unchanged" >&5
+echo "$as_me: $ac_file is unchanged" >&6;}
+ else
+ rm -f $ac_file
+ mv "$tmp/config.h" $ac_file
+ fi
+ else
+ echo "/* $configure_input */"
+ cat "$ac_result"
+ fi
+ rm -f "$tmp/out12"
+ ;;
+
+
+ esac
+
+done # for ac_tag
+
+
+{ (exit 0); exit 0; }
+_ACEOF
+chmod +x $CONFIG_STATUS
+ac_clean_files=$ac_clean_files_save
+
+
+# configure is writing to config.log, and then calls config.status.
+# config.status does its own redirection, appending to config.log.
+# Unfortunately, on DOS this fails, as config.log is still kept open
+# by configure, so config.status won't be able to write to it; its
+# output is simply discarded. So we exec the FD to /dev/null,
+# effectively closing config.log, so it can be properly (re)opened and
+# appended to by config.status. When coming back to configure, we
+# need to make the FD available again.
+if test "$no_create" != yes; then
+ ac_cs_success=:
+ ac_config_status_args=
+ test "$silent" = yes &&
+ ac_config_status_args="$ac_config_status_args --quiet"
+ exec 5>/dev/null
+ $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false
+ exec 5>>config.log
+ # Use ||, not &&, to avoid exiting from the if with $? = 1, which
+ # would make configure fail if this is the last instruction.
+ $ac_cs_success || { (exit 1); exit 1; }
+fi
+
+
+# vim:ts=4:sw=4:
diff --git a/configure.ac b/configure.ac
new file mode 100644
index 0000000..7fc81d4
--- /dev/null
+++ b/configure.ac
@@ -0,0 +1,532 @@
+# $Id$
+
+# Copyright (c) 2009, Darren Hiebert
+#
+# This source code is released for free distribution under the terms
+# of the GNU General Public License.
+
+# Process this file with autoconf to produce a configure script.
+
+AC_PREREQ([2.61])
+AC_INIT([ctags.h])
+AC_CONFIG_HEADERS([config.h])
+
+AH_TEMPLATE([PACKAGE], [Package name.])
+AH_TEMPLATE([VERSION], [Package version.])
+AH_TEMPLATE([clock_t],
+ [Define to the appropriate type if <time.h> does not define this.])
+AH_TEMPLATE([fpos_t],
+ [Define to long if <stdio.h> does not define this.])
+AH_TEMPLATE([L_tmpnam],
+ [Define to the appropriate size for tmpnam() if <stdio.h> does not define
+ this.])
+AH_TEMPLATE([HAVE_STAT_ST_INO],
+ [Define this macro if the field "st_ino" exists in struct stat in
+ <sys/stat.h>.])
+AH_TEMPLATE([remove],
+ [Define remove to unlink if you have unlink(), but not remove().])
+AH_TEMPLATE([SEEK_SET],
+ [Define this value used by fseek() appropriately if <stdio.h>
+ (or <unistd.h> on SunOS 4.1.x) does not define them.])
+AH_TEMPLATE([INT_MAX],
+ [Define as the maximum integer on your system if not defined <limits.h>.])
+AH_TEMPLATE([CUSTOM_CONFIGURATION_FILE],
+ [You can define this label to be a string containing the name of a
+ site-specific configuration file containing site-wide default options. The
+ files /etc/ctags.conf and /usr/local/etc/ctags.conf are already checked,
+ so only define one here if you need a file somewhere else.])
+AH_TEMPLATE([MACROS_USE_PATTERNS],
+ [Define this label if you want macro tags (defined lables) to use patterns
+ in the EX command by default (original ctags behavior is to use line
+ numbers).])
+AH_VERBATIM([DEFAULT_FILE_FORMAT], [
+/* Define this as desired.
+ * 1: Original ctags format
+ * 2: Extended ctags format with extension flags in EX-style comment.
+ */
+#define DEFAULT_FILE_FORMAT 2
+])
+AH_TEMPLATE([SYS_INTERPRETER],
+ [Define this label if your system supports starting scripts with a line of
+ the form "#! /bin/sh" to select the interpreter to use for the script.])
+AH_TEMPLATE([CASE_INSENSITIVE_FILENAMES],
+ [Define this label if your system uses case-insensitive file names])
+AH_VERBATIM([EXTERNAL_SORT], [
+/* Define this label to use the system sort utility (which is probably more
+* efficient) over the internal sorting algorithm.
+*/
+#ifndef INTERNAL_SORT
+# undef EXTERNAL_SORT
+#endif
+])
+AH_TEMPLATE([TMPDIR],
+ [If you wish to change the directory in which temporary files are stored,
+ define this label to the directory desired.])
+AH_TEMPLATE([REGCOMP_BROKEN],
+ [Define this label if regcomp() is broken.])
+AH_TEMPLATE([CHECK_REGCOMP],
+ [Define this label if you wish to check the regcomp() function at run time
+ for correct behavior. This function is currently broken on Cygwin.])
+AH_TEMPLATE([__USE_FIXED_PROTOTYPES__],
+ [This corrects the problem of missing prototypes for certain functions
+ in some GNU installations (e.g. SunOS 4.1.x).])
+AH_TEMPLATE([NON_CONST_PUTENV_PROTOTYPE],
+ [Define this is you have a prototype for putenv() in <stdlib.h>, but
+ doesn't declare its argument as "const char *".])
+AH_TEMPLATE([NEED_PROTO_REMOVE],
+ [If you receive error or warning messages indicating that you are missing
+ a prototype for, or a type mismatch using, the following function, define
+ this label and remake.])
+AH_TEMPLATE([NEED_PROTO_UNLINK],
+ [If you receive error or warning messages indicating that you are missing
+ a prototype for, or a type mismatch using, the following function, define
+ this label and remake.])
+AH_TEMPLATE([NEED_PROTO_MALLOC],
+ [If you receive error or warning messages indicating that you are missing
+ a prototype for, or a type mismatch using, the following function, define
+ this label and remake.])
+AH_TEMPLATE([NEED_PROTO_GETENV],
+ [If you receive error or warning messages indicating that you are missing
+ a prototype for, or a type mismatch using, the following function, define
+ this label and remake.])
+AH_TEMPLATE([NEED_PROTO_FGETPOS],
+ [If you receive error or warning messages indicating that you are missing
+ a prototype for, or a type mismatch using, the following function, define
+ this label and remake.])
+AH_TEMPLATE([NEED_PROTO_STAT],
+ [If you receive error or warning messages indicating that you are missing
+ a prototype for, or a type mismatch using, the following function, define
+ this label and remake.])
+AH_TEMPLATE([NEED_PROTO_LSTAT],
+ [If you receive error or warning messages indicating that you are missing
+ a prototype for, or a type mismatch using, the following function, define
+ this label and remake.])
+AH_TEMPLATE([NEED_PROTO_TRUNCATE],
+ [If you receive error or warning messages indicating that you are missing
+ a prototype for, or a type mismatch using, the following function, define
+ this label and remake.])
+AH_TEMPLATE([NEED_PROTO_FTRUNCATE],
+ [If you receive error or warning messages indicating that you are missing
+ a prototype for, or a type mismatch using, the following function, define
+ this label and remake.])
+
+
+
+# Report system info
+# ------------------
+program_name=[`grep 'PROGRAM_NAME *"' ctags.h | sed -e 's/.*"\([^"]*\)".*/\1/'`]
+program_version=[`grep 'PROGRAM_VERSION *"' ctags.h | sed -e 's/.*"\([^"]*\)".*/\1/'`]
+echo "$program_name, version $program_version"
+uname -mrsv 2>/dev/null
+
+# Define convenience macros
+# -------------------------
+# CHECK_HEADER_DEFINE(LABEL, HEADER [,ACTION-IF-FOUND [, ACTION-IF-NOT-FOUND ] ])
+AC_DEFUN([CHECK_HEADER_DEFINE], [
+ AC_MSG_CHECKING([if $1 is defined in $2])
+ AC_EGREP_CPP(yes,
+[#include <$2>
+#ifdef $1
+ yes
+#endif
+], [
+ AC_MSG_RESULT(yes)
+ [$3]
+], [
+ AC_MSG_RESULT(no)
+ [$4]
+]) ])
+
+# Checks for configuration options
+# --------------------------------
+
+AC_ARG_WITH(posix-regex,
+[ --with-posix-regex use Posix regex interface, if available])
+
+AC_ARG_WITH(readlib,
+[ --with-readlib include readtags library object during install])
+
+# AC_ARG_WITH(perl-regex,
+# [ --with-perl-regex use Perl pcre interface, if available])
+
+AC_ARG_ENABLE(etags,
+[ --enable-etags enable the installation of links for etags])
+
+AC_ARG_ENABLE(extended-format,
+[ --disable-extended-format
+ disable extension flags; use original ctags file
+ format only],
+ AC_DEFINE(DEFAULT_FILE_FORMAT, 1), AC_DEFINE(DEFAULT_FILE_FORMAT, 2))
+
+AC_ARG_ENABLE(external-sort,
+[ --disable-external-sort use internal sort algorithm instead of sort program])
+
+AC_ARG_ENABLE(custom-config,
+[ --enable-custom-config=FILE
+ enable custom config file for site-wide defaults])
+
+AC_ARG_ENABLE(macro-patterns,
+[ --enable-macro-patterns use patterns as default method to locate macros
+ instead of line numbers])
+
+AC_ARG_ENABLE(maintainer-mode,
+[ --enable-maintainer-mode
+ use maintainer makefile])
+
+AC_ARG_ENABLE(shell-globbing,
+[ --enable-shell-globbing=DIR
+ does shell expand wildcards (yes|no)? [yes]])
+
+AC_ARG_ENABLE(tmpdir,
+[ --enable-tmpdir=DIR default directory for temporary files [ARG=/tmp]],
+ tmpdir_specified=yes)
+
+
+# Process configuration options
+# -----------------------------
+
+if test "$enable_maintainer_mode" = yes ; then
+ AC_MSG_RESULT(enabling maintainer mode)
+fi
+
+install_targets="install-ctags"
+AC_MSG_CHECKING(whether to install link to etags)
+if test yes = "$enable_etags"; then
+ AC_MSG_RESULT(yes)
+ install_targets="$install_targets install-etags"
+else
+ AC_MSG_RESULT(no)
+fi
+AC_MSG_CHECKING(whether to install readtags object file)
+if test yes = "$with_readlib"; then
+ AC_MSG_RESULT(yes)
+ install_targets="$install_targets install-lib"
+else
+ AC_MSG_RESULT(no)
+fi
+AC_SUBST(install_targets)
+
+if test "$enable_custom_config" = no -o "$enable_custom_config" = yes ; then
+ AC_MSG_RESULT(no name supplied for custom configuration file)
+elif test -n "$enable_custom_config" ; then
+ AC_DEFINE_UNQUOTED(CUSTOM_CONFIGURATION_FILE, "$enable_custom_config")
+ AC_MSG_RESULT($enable_custom_config will be used as custom configuration file)
+fi
+
+if test "$enable_macro_patterns" = yes ; then
+ AC_DEFINE(MACROS_USE_PATTERNS)
+ AC_MSG_RESULT(tag file will use patterns for macros by default)
+fi
+
+# Checks for programs
+# -------------------
+
+AC_PROG_CC
+
+case `uname` in
+ HP-UX)
+ AC_MSG_CHECKING(HP-UX native compiler)
+ if test "$CC" = "cc"; then
+ AC_MSG_RESULT(yes; adding compiler options for ANSI support)
+ CFLAGS="$CFLAGS -Aa -D_HPUX_SOURCE"
+ else
+ AC_MSG_RESULT(no)
+ fi
+ ;;
+ SunOS)
+ if uname -r | grep '5\.' >/dev/null 2>&1; then
+ AC_MSG_CHECKING(Solaris native compiler)
+ if test "$CC" = "cc" -a "`which cc`" = "/usr/ucb/cc"; then
+ AC_MSG_RESULT(yes; adding compiler option for ANSI support)
+ CC="$CC -Xa"
+ else
+ AC_MSG_RESULT(no)
+ fi
+ fi
+ ;;
+esac
+
+AC_PROG_LN_S
+AC_CHECK_PROG(STRIP, strip, strip, :)
+
+
+# Checks for operating environment
+# --------------------------------
+
+# Check for temporary directory
+AC_MSG_CHECKING(directory to use for temporary files)
+if test -n "$enable_tmpdir"; then
+ tmpdir="$enable_tmpdir"
+elif test -n "$TMPDIR"; then
+ tmpdir="$TMPDIR"
+elif test -n "$TMPDIR"; then
+ tmpdir="$TMPDIR"
+elif test -n "$TMP"; then
+ tmpdir="$TMP"
+elif test -n "$TEMP"; then
+ tmpdir="$TEMP"
+elif test -d "c:/"; then
+ tmpdir="c:/"
+else
+ tmpdir="/tmp"
+fi
+if test -d $tmpdir ; then
+ AC_MSG_RESULT($tmpdir)
+ AC_DEFINE_UNQUOTED(TMPDIR, "$tmpdir")
+else
+ AC_MSG_ERROR($tmpdir does not exist)
+fi
+
+# Check whether system supports #! scripts
+AC_SYS_INTERPRETER
+if test yes = "$interpval"; then
+ AC_DEFINE(SYS_INTERPRETER)
+fi
+
+# Test for case-insensitive filenames
+AC_MSG_CHECKING(for case-insensitive filenames)
+touch conftest.cif
+if test -f CONFTEST.CIF; then
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(CASE_INSENSITIVE_FILENAMES)
+else
+ AC_MSG_RESULT(no)
+fi
+rm -f conftest.cif
+
+AC_MSG_CHECKING(selected sort method)
+if test no = "$enable_external_sort"; then
+ AC_MSG_RESULT(simple internal algorithm)
+else
+ AC_MSG_RESULT(external sort utility)
+ enable_external_sort=no
+ AC_CHECK_PROG(sort_found, sort, yes, no)
+ if test "$sort_found" = yes ; then
+ AC_MSG_CHECKING(if sort accepts our command line)
+ touch ${tmpdir}/sort.test
+ sort -u -f -o ${tmpdir}/sort.test ${tmpdir}/sort.test 1>/dev/null 2>&1
+ if test $? -ne 0 ; then
+ AC_MSG_RESULT(no)
+ else
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(EXTERNAL_SORT)
+ enable_external_sort=yes
+ fi
+ rm -f ${tmpdir}/sort.test
+ fi
+fi
+if test "$enable_external_sort" != yes ; then
+ AC_MSG_RESULT(using internal sort algorithm as fallback)
+fi
+
+
+# Checks for header files
+# -----------------------
+
+AC_CHECK_HEADERS_ONCE([dirent.h fcntl.h fnmatch.h stat.h stdlib.h string.h])
+AC_CHECK_HEADERS_ONCE([time.h types.h unistd.h])
+AC_CHECK_HEADERS_ONCE([sys/dir.h sys/stat.h sys/times.h sys/types.h])
+
+
+# Checks for header file macros
+# -----------------------------
+
+CHECK_HEADER_DEFINE(L_tmpnam, [stdio.h],, AC_DEFINE(L_tmpnam, 20))
+
+CHECK_HEADER_DEFINE(INT_MAX, [limits.h],,
+ CHECK_HEADER_DEFINE(MAXINT, [limits.h],
+ AC_DEFINE(INT_MAX, MAXINT), AC_DEFINE(INT_MAX, 32767)))
+
+
+# Checks for typedefs
+# -------------------
+
+AC_TYPE_SIZE_T
+AC_TYPE_OFF_T
+
+AC_MSG_CHECKING(for fpos_t)
+AC_EGREP_HEADER(fpos_t, stdio.h, AC_MSG_RESULT(yes),
+[
+ AC_MSG_RESULT(no)
+ AC_DEFINE(fpos_t, long)
+])
+
+AC_MSG_CHECKING(for clock_t)
+AC_EGREP_HEADER(clock_t, time.h, AC_MSG_RESULT(yes),
+[
+ AC_MSG_RESULT(no)
+ AC_DEFINE(clock_t, long)
+])
+
+
+# Checks for compiler characteristics
+# -----------------------------------
+
+# AC_CYGWIN
+# AC_MINGW32
+AC_C_CONST
+AC_OBJEXT
+AC_EXEEXT
+
+AC_MSG_CHECKING(if struct stat contains st_ino)
+AC_TRY_COMPILE([#include <sys/stat.h>], [
+ struct stat st;
+ stat(".", &st);
+ if (st.st_ino > 0)
+ exit(0);
+], have_st_ino=yes, have_st_ino=no)
+AC_MSG_RESULT($have_st_ino)
+if test yes = "$have_st_ino"; then
+ AC_DEFINE(HAVE_STAT_ST_INO)
+fi
+
+
+# Checks for library functions
+# ----------------------------
+
+AC_CHECK_FUNCS(fnmatch)
+AC_CHECK_FUNCS(strstr)
+AC_CHECK_FUNCS(strcasecmp stricmp, break)
+AC_CHECK_FUNCS(strncasecmp strnicmp, break)
+AC_CHECK_FUNCS(fgetpos, have_fgetpos=yes)
+
+# SEEK_SET should be in stdio.h, but may be in unistd.h on SunOS 4.1.x
+if test "$have_fgetpos" != yes ; then
+ CHECK_HEADER_DEFINE(SEEK_SET, stdio.h,,
+ CHECK_HEADER_DEFINE(SEEK_SET, unistd.h,,
+ AC_DEFINE(SEEK_SET, 0)))
+fi
+
+AC_CHECK_FUNCS(mkstemp, have_mkstemp=yes)
+if test "$have_mkstemp" != yes ; then
+ AC_CHECK_FUNCS(tempnam, have_tempnam=yes)
+fi
+if test "$have_mkstemp" != yes -a "$have_tempnam" != yes; then
+ AC_CHECK_FUNCS(chmod)
+ if test "$tmpdir_specified" = yes ; then
+ AC_MSG_RESULT(use of tmpnam overrides temporary directory selection)
+ fi
+fi
+
+AC_CHECK_FUNCS(opendir findfirst _findfirst, break)
+AC_CHECK_FUNCS(strerror)
+AC_CHECK_FUNCS(clock times, break)
+AC_CHECK_FUNCS(remove, have_remove=yes,
+ CHECK_HEADER_DEFINE(remove, unistd.h,, AC_DEFINE(remove, unlink)))
+
+AC_CHECK_FUNCS(truncate, have_truncate=yes)
+# === Cannot nest AC_CHECK_FUNCS() calls
+if test "$have_truncate" != yes ; then
+ AC_CHECK_FUNCS(ftruncate, have_ftruncate=yes)
+ if test "$have_ftruncate" != yes ; then
+ AC_CHECK_FUNCS(chsize)
+ fi
+fi
+
+AC_CHECK_FUNCS(setenv, have_setenv=yes)
+# === Cannot nest AC_CHECK_FUNCS() calls
+if test "$have_setenv" != yes ; then
+ AC_CHECK_FUNCS(putenv, have_putenv=yes)
+ if test "$have_putenv" = yes ; then
+ AC_EGREP_HEADER(putenv, stdlib.h, have_putenv_prototype=yes)
+ if test "$have_putenv_prototype" = yes ; then
+ AC_MSG_CHECKING(putenv prototype)
+ AC_EGREP_HEADER([[^A-Za-zo-9_]putenv[ ]*\(.*const.*\)[ ]*;],
+ stdlib.h, AC_MSG_RESULT(correct),
+ [
+ AC_MSG_RESULT(no const)
+ AC_DEFINE(NON_CONST_PUTENV_PROTOTYPE)
+ ])
+ fi
+ fi
+fi
+
+#
+# if test yes = "$CYGWIN"; then with_posix_regex=no; fi
+if test no != "$with_posix_regex"; then
+ AC_CHECK_FUNCS(regcomp)
+ AC_MSG_CHECKING(if regcomp works)
+ AC_TRY_RUN([
+#include <sys/types.h>
+#include <regex.h>
+main() {
+ regex_t patbuf;
+ exit (regcomp (&patbuf, "/hello/", 0) != 0);
+}],regcomp_works=yes,regcomp_works=no,AC_DEFINE(CHECK_REGCOMP))
+ AC_MSG_RESULT($regcomp_works)
+ if test yes != "$regcomp_works"; then
+ AC_DEFINE(REGCOMP_BROKEN)
+ fi
+fi
+
+# if test yes = "$with_perl_regex"; then
+# AC_MSG_CHECKING(for Perl regex library)
+# pcre_candidates="$with_perl_regex $HOME/local/lib* /usr*/local/lib* /usr/lib*"
+# for lib in $pcre_candidates; do
+# if test -f $lib/libpcreposix.so; then
+# pcre_lib="-L$lib -lpcreposix"
+# break
+# elif test -f $lib/libpcreposix.a; then
+# pcre_lib="$lib/libpcreposix.a"
+# break
+# fi
+# done
+# if test -z "$pcre_lib"; then
+# AC_MSG_RESULT(not found)
+# else
+# AC_MSG_RESULT($lib)
+# AC_DEFINE(HAVE_REGCOMP)
+# LDFLAGS="$LDFLAGS $pcre_lib"
+# have_regex=yes
+# fi
+# fi
+
+
+# Checks for missing prototypes
+# -----------------------------
+AC_CHECKING(for new missing prototypes)
+
+AC_DEFUN(CHECK_PROTO, [
+ AC_EGREP_HEADER([[^A-Za-z0-9_]$1([ ]+[A-Za-z0-9_]*)?[ ]*\(],
+ $2,
+ ,
+ [
+ AC_MSG_RESULT([adding prototype for $1])
+ AC_DEFINE(patsubst([NEED_PROTO_NAME], [NAME], translit([$1], [[a-z]], [[A-Z]])))
+ ])])
+
+if test "$have_remove" = yes ; then
+ CHECK_PROTO(remove, stdio.h)
+else
+ CHECK_PROTO(unlink, unistd.h)
+fi
+CHECK_PROTO(malloc, stdlib.h)
+CHECK_PROTO(getenv, stdlib.h)
+CHECK_PROTO(stat, sys/stat.h)
+CHECK_PROTO(lstat, sys/stat.h)
+if test "$have_fgetpos" = yes ; then
+ CHECK_PROTO(fgetpos, stdio.h)
+fi
+if test "$have_truncate" = yes ; then
+ CHECK_PROTO(truncate, unistd.h)
+fi
+if test "$have_ftruncate" = yes ; then
+ CHECK_PROTO(ftruncate, unistd.h)
+fi
+
+
+# Output files
+# ------------
+
+
+rm -f Makefile
+if test "$enable_maintainer_mode" = yes ; then
+ AC_MSG_RESULT(creating maintainer Makefile)
+ ln -s maintainer.mak Makefile
+ makefile_out=
+else
+ makefile_out=Makefile
+fi
+AC_OUTPUT($makefile_out)
+
+# vim:ts=4:sw=4:
diff --git a/ctags.1 b/ctags.1
new file mode 100644
index 0000000..2d89006
--- /dev/null
+++ b/ctags.1
@@ -0,0 +1,1186 @@
+.TH CTAGS 1 "Version 5.8" "Darren Hiebert" "Exuberant Ctags"
+
+
+.SH "NAME"
+ctags \- Generate tag files for source code
+
+
+.SH SYNOPSIS
+.TP 6
+\fBctags\fP [\fBoptions\fP] [\fIfile(s)\fP]
+.TP 6
+\fBetags\fP [\fBoptions\fP] [\fIfile(s)\fP]
+
+
+.SH "DESCRIPTION"
+The \fBctags\fP and \fBetags\fP programs (hereinafter collectively referred to
+as \fBctags\fP, except where distinguished) generate an index (or "tag") file
+for a variety of language objects found in \fIfile(s)\fP.
+This tag file allows these items to be quickly and easily located by a text
+editor or other utility. A "tag" signifies a language object for which an
+index entry is available (or, alternatively, the index entry created for that
+object).
+
+Alternatively, \fBctags\fP can generate a cross reference file which lists, in
+human readable form, information about the various source objects found in a
+set of language files.
+
+Tag index files are supported by numerous editors, which allow the user to
+locate the object associated with a name appearing in a source file and jump
+to the file and line which defines the name. Those known about at the time of
+this release are:
+
+.RS 4
+\fBVi\fP(1) and its derivatives (e.g. Elvis, Vim, Vile, Lemmy),
+\fBCRiSP\fP,
+\fBEmacs\fP,
+\fBFTE\fP (Folding Text Editor),
+\fBJED\fP,
+\fBjEdit\fP,
+\fBMined\fP,
+\fBNEdit\fP (Nirvana Edit),
+\fBTSE\fP (The SemWare Editor),
+\fBUltraEdit\fP,
+\fBWorkSpace\fP,
+\fBX2\fP,
+\fBZeus\fP
+.RE
+
+\fBCtags\fP is capable of generating different kinds of tags for each of many
+different languages. For a complete list of supported languages, the names
+by which they are recognized, and the kinds of tags which are generated for
+each, see the \fB\-\-list\-languages\fP and \fB\-\-list\-kinds\fP options.
+
+
+.SH "SOURCE FILES"
+
+Unless the \fB\-\-language\-force\fP option is specified, the language of each
+source file is automatically selected based upon a mapping of file names to
+languages. The mappings in effect for each language may be display using the
+\fB\-\-list\-maps\fP option and may be changed using the \fB\-\-langmap\fP option.
+On platforms which support it, if the name of a file is not mapped
+to a language and the file is executable, the first line of the file is
+checked to see if the file is a "#!" script for a recognized language.
+
+By default, all other files names are ignored. This permits running
+\fBctags\fP on all files in either a single directory (e.g. "ctags *"), or on
+all files in an entire source directory tree (e.g. "ctags \-R"), since only
+those files whose names are mapped to languages will be scanned.
+
+[The reason that .h extensions are mapped to C++ files rather than C files
+is because it is common to use .h extensions in C++, and no harm results in
+treating them as C++ files.]
+
+
+.SH "OPTIONS"
+
+Despite the wealth of available options, defaults are set so that \fBctags\fP
+is most commonly executed without any options (e.g. "ctags *", or "ctags \-R"),
+which will create a tag file in the current directory for all recognized
+source files. The options described below are provided merely to allow custom
+tailoring to meet special needs.
+
+Note that spaces separating the single-letter options from their parameters
+are optional.
+
+Note also that the boolean parameters to the long form options (those
+beginning with "\-\-" and that take a "\fI[=yes\fP|\fIno]\fP" parameter) may
+be omitted, in which case "\fB=\fP\fIyes\fP" is implied. (e.g. \fB\-\-sort\fP
+is equivalent to \fB\-\-sort\fP=\fIyes\fP). Note further that "=\fI1\fP" and
+"=\fIon\fP" are considered synonyms for "=\fIyes\fP", and that "=\fI0\fP"
+and "=\fIoff\fP" are considered synonyms for "=\fIno\fP".
+
+Some options are either ignored or useful only when used while running in
+etags mode (see \fB\-e\fP option). Such options will be noted.
+
+Most options may appear anywhere on the command line, affecting only those
+files which follow the option. A few options, however, must appear before the
+first file name and will be noted as such.
+
+Options taking language names will accept those names in either upper or lower
+case. See the \fB\-\-list\-languages\fP option for a complete list of the
+built-in language names.
+
+.TP 5
+.B \-a
+Equivalent to \fB\-\-append\fP.
+
+.TP 5
+.B \-B
+Use backward searching patterns (e.g. ?pattern?). [Ignored in etags mode]
+
+.TP 5
+.B \-e
+Enable etags mode, which will create a tag file for use with the Emacs editor.
+Alternatively, if \fBctags\fP is invoked by a name containing the string
+"etags" (either by renaming, or creating a link to, the executable), etags
+mode will be enabled. This option must appear before the first file name.
+
+.TP 5
+.BI \-f " tagfile"
+Use the name specified by \fItagfile\fP for the tag file (default is "tags",
+or "TAGS" when running in etags mode). If \fItagfile\fP is specified as
+"\-", then the tag file is written to standard output instead. \fBCtags\fP
+will stubbornly refuse to take orders if \fItagfile\fP exists and its first
+line contains something other than a valid tags line. This will save your neck
+if you mistakenly type "ctags \-f *.c", which would otherwise overwrite your
+first C file with the tags generated by the rest! It will also refuse to
+accept a multi-character file name which begins with a '\-' (dash) character,
+since this most likely means that you left out the tag file name and this
+option tried to grab the next option as the file name. If you really want to
+name your output tag file "\-ugly", specify it as "./\-ugly". This option must
+appear before the first file name. If this option is specified more than once,
+only the last will apply.
+
+.TP 5
+.B \-F
+Use forward searching patterns (e.g. /pattern/) (default).
+[Ignored in etags mode]
+
+.TP 5
+.BI \-h " list"
+Specifies a list of file extensions, separated by periods, which are to be
+interpreted as include (or header) files. To indicate files having no
+extension, use a period not followed by a non-period character (e.g. ".",
+"..x", ".x."). This option only affects how the scoping of a particular kinds
+of tags is interpreted (i.e. whether or not they are considered as globally
+visible or visible only within the file in which they are defined); it does
+not map the extension to any particular language. Any tag which is located in
+a non-include file and cannot be seen (e.g. linked to) from another file is
+considered to have file-limited (e.g. static) scope. No kind of tag appearing
+in an include file will be considered to have file-limited scope. If the first
+character in the list is a plus sign, then the extensions in the list will be
+appended to the current list; otherwise, the list will replace the current
+list. See, also, the \fB\-\-file\-scope\fP option. The default list is
+".h.H.hh.hpp.hxx.h++.inc.def". To restore the default list, specify \fB\-h\fP
+\fIdefault\fP. Note that if an extension supplied to this option is not
+already mapped to a particular language (see \fBSOURCE FILES\fP, above), you
+will also need to use either the \fB\-\-langmap\fP or \fB\-\-language\-force\fP
+option.
+
+.TP 5
+.BI \-I " identifier\-list"
+Specifies a list of identifiers which are to be specially handled while
+parsing C and C++ source files. This option is specifically provided to handle
+special cases arising through the use of preprocessor macros. When the
+identifiers listed are simple identifiers, these identifiers will be ignored
+during parsing of the source files. If an identifier is suffixed with a '+'
+character, \fBctags\fP will also ignore any parenthesis-enclosed argument list
+which may immediately follow the identifier in the source files. If two
+identifiers are separated with the '=' character, the first identifiers is
+replaced by the second identifiers for parsing purposes. The list of
+identifiers may be supplied directly on the command line or read in from a
+separate file. If the first character of \fIidentifier\-list\fP is '@', '.' or
+a pathname separator ('/' or '\\'), or the first two characters specify a
+drive letter (e.g. "C:"), the parameter \fIidentifier\-list\fP will be
+interpreted as a filename from which to read a list of identifiers, one per
+input line. Otherwise, \fIidentifier\-list\fP is a list of identifiers (or
+identifier pairs) to be specially handled, each delimited by a either a comma
+or by white space (in which case the list should be quoted to keep the entire
+list as one command line argument). Multiple \fB\-I\fP options may be supplied.
+To clear the list of ignore identifiers, supply a single dash ("\-") for
+\fIidentifier\-list\fP.
+
+This feature is useful when preprocessor macros are used in such a way that
+they cause syntactic confusion due to their presence. Indeed, this is the best
+way of working around a number of problems caused by the presence of
+syntax-busting macros in source files (see \fBCAVEATS\fP, below). Some
+examples will illustrate this point.
+
+.RS 8
+int foo ARGDECL4(void *, ptr, long int, nbytes)
+.RE
+
+.IP
+In the above example, the macro "ARGDECL4" would be mistakenly interpreted to
+be the name of the function instead of the correct name of "foo". Specifying
+\fB\-I\fP \fIARGDECL4\fP results in the correct behavior.
+
+.RS 8
+/* creates an RCS version string in module */
+.br
+MODULE_VERSION("$Revision: 690 $")
+.RE
+
+.IP
+In the above example the macro invocation looks too much like a function
+definition because it is not followed by a semicolon (indeed, it could even be
+followed by a global variable definition that would look much like a K&R style
+function parameter declaration). In fact, this seeming function definition
+could possibly even cause the rest of the file to be skipped over while trying
+to complete the definition. Specifying \fB\-I\fP \fIMODULE_VERSION+\fP would
+avoid such a problem.
+
+.RS 8
+CLASS Example {
+.br
+ // your content here
+.br
+};
+.RE
+
+.IP
+The example above uses "CLASS" as a preprocessor macro which expands to
+something different for each platform. For instance CLASS may be defined as
+"class __declspec(dllexport)" on Win32 platforms and simply "class" on UNIX.
+Normally, the absence of the C++ keyword "class" would cause the source file
+to be incorrectly parsed. Correct behavior can be restored by specifying
+\fB\-I\fP \fICLASS=class\fP.
+
+.TP 5
+.BI \-L " file"
+Read from \fIfile\fP a list of file names for which tags should be generated.
+If \fIfile\fP is specified as "\-", then file names are read from standard
+input. File names read using this option are processed following file names
+appearing on the command line. Options are also accepted in this input. If
+this option is specified more than once, only the last will apply. \fBNote:\fP
+\fIfile\fP is read in line-oriented mode, where a new line is the only
+delimiter and non-trailing white space is considered significant, in order
+that file names containing spaces may be supplied (however, trailing white
+space is stripped from lines); this can affect how options are parsed if
+included in the input.
+
+.TP 5
+.B \-n
+Equivalent to \fB\-\-excmd\fP=\fInumber\fP.
+
+.TP 5
+.B \-N
+Equivalent to \fB\-\-excmd\fP=\fIpattern\fP.
+
+.TP 5
+.BI \-o " tagfile"
+Equivalent to \fB\-f\fP \fItagfile\fP.
+
+.TP 5
+.B \-R
+Equivalent to \fB\-\-recurse\fP.
+
+.TP 5
+.B \-u
+Equivalent to \fB\-\-sort\fP=\fIno\fP (i.e. "unsorted").
+
+.TP 5
+.B \-V
+Equivalent to \fB\-\-verbose\fP.
+
+.TP 5
+.B \-w
+This option is silently ignored for backward-compatibility with the ctags
+of SVR4 Unix.
+
+.TP 5
+.B \-x
+Print a tabular, human-readable cross reference (xref) file to standard output
+instead of generating a tag file. The information contained in the output
+includes: the tag name; the kind of tag; the line number, file name, and
+source line (with extra white space condensed) of the file which defines the
+tag. No tag file is written and all options affecting tag file output will be
+ignored. Example applications for this feature are generating a listing of all
+functions located in a source file (e.g. \fBctags \-x \-\-c\-kinds\fP=\fIf\fP
+\fIfile\fP), or generating a list of all externally visible global variables
+located in a source file (e.g. \fBctags \-x \-\-c\-kinds\fP=\fIv\fP
+\fB\-\-file\-scope\fP=\fIno file\fP). This option must appear before the first
+file name.
+
+.TP 5
+\fB\-\-append\fP[=\fIyes\fP|\fIno\fP]
+Indicates whether tags generated from the specified files should be appended
+to those already present in the tag file or should replace them. This option
+is off by default. This option must appear before the first file name.
+
+.TP 5
+\fB\-\-etags\-include\fP=\fIfile\fP
+Include a reference to \fIfile\fP in the tag file. This option may be
+specified as many times as desired. This supports Emacs' capability to use a
+tag file which "includes" other tag files. [Available only in etags mode]
+
+.TP 5
+\fB\-\-exclude\fP=[\fIpattern\fP]
+Add \fIpattern\fP to a list of excluded files and directories. This option
+may be specified as many times as desired. For each file name considered by
+\fBctags\fP, each \fIpattern\fP specified using this option will be compared
+against both the complete path (e.g. some/path/base.ext) and the base name
+(e.g. base.ext) of the file, thus allowing patterns which match a given file
+name irrespective of its path, or match only a specific path. If appropriate
+support is available from the runtime library of your C compiler, then
+\fIpattern\fP may contain the usual shell wildcards (not regular expressions)
+common on Unix (be sure to quote the option parameter to protect the wildcards
+from being expanded by the shell before being passed to \fBctags\fP; also be
+aware that wildcards can match the slash character, '/'). You can determine if
+shell wildcards are available on your platform by examining the output of the
+\fB\-\-version\fP option, which will include "+wildcards" in the compiled
+feature list; otherwise, \fIpattern\fP is matched against file names using a
+simple textual comparison.
+
+If \fIpattern\fP begins with the character '@', then the rest of the string
+is interpreted as a file name from which to read exclusion patterns, one per
+line. If \fIpattern\fP is empty, the list of excluded patterns is cleared.
+Note that at program startup, the default exclude list contains "EIFGEN",
+"SCCS", "RCS", and "CVS", which are names of directories for which it is
+generally not desirable to descend while processing the \fB\-\-recurse\fP
+option.
+
+.TP 5
+\fB\-\-excmd\fP=\fItype\fP
+Determines the type of EX command used to locate tags in the source file.
+[Ignored in etags mode]
+
+The valid values for \fItype\fP (either the entire word or the first letter is
+accepted) are:
+
+.RS 5
+.TP 9
+.I number
+Use only line numbers in the tag file for locating tags. This has four
+advantages:
+.PD 0
+.RS 9
+.TP 4
+1.
+Significantly reduces the size of the resulting tag file.
+.TP 4
+2.
+Eliminates failures to find tags because the line defining the tag has
+changed, causing the pattern match to fail (note that some editors, such as
+\fBvim\fP, are able to recover in many such instances).
+.TP 4
+3.
+Eliminates finding identical matching, but incorrect, source lines (see
+\fBBUGS\fP, below).
+.TP 4
+4.
+Retains separate entries in the tag file for lines which are identical in
+content. In \fIpattern\fP mode, duplicate entries are dropped because the
+search patterns they generate are identical, making the duplicate entries
+useless.
+.RE
+.PD 1
+
+.IP
+However, this option has one significant drawback: changes to the source files
+can cause the line numbers recorded in the tag file to no longer correspond
+to the lines in the source file, causing jumps to some tags to miss the target
+definition by one or more lines. Basically, this option is best used when the
+source code to which it is applied is not subject to change. Selecting this
+option type causes the following options to be ignored: \fB\-BF\fP.
+
+.TP 9
+.I pattern
+Use only search patterns for all tags, rather than the line numbers usually
+used for macro definitions. This has the advantage of not referencing obsolete
+line numbers when lines have been added or removed since the tag file was
+generated.
+
+.TP 9
+.I mixed
+In this mode, patterns are generally used with a few exceptions. For C, line
+numbers are used for macro definition tags. This was the default format
+generated by the original \fBctags\fP and is, therefore, retained as the
+default for this option. For Fortran, line numbers are used for common blocks
+because their corresponding source lines are generally identical, making
+pattern searches useless for finding all matches.
+.RE
+
+.TP 5
+\fB\-\-extra\fP=\fI[+|\-]flags\fP
+Specifies whether to include extra tag entries for certain kinds of
+information. The parameter \fIflags\fP is a set of one-letter flags, each
+representing one kind of extra tag entry to include in the tag file. If
+\fIflags\fP is preceded by by either the '+' or '\-' character, the effect of
+each flag is added to, or removed from, those currently enabled; otherwise the
+flags replace any current settings. The meaning of each flag is as follows:
+
+.PP
+.RS 8
+.TP 4
+.I f
+Include an entry for the base file name of every source file (e.g.
+"example.c"), which addresses the first line of the file.
+.TP 4
+.I q
+Include an extra class-qualified tag entry for each tag which is a member
+of a class (for languages for which this information is extracted; currently
+C++, Eiffel, and Java). The actual form of the qualified tag depends upon the
+language from which the tag was derived (using a form that is most natural for
+how qualified calls are specified in the language). For C++, it is in the form
+"class::member"; for Eiffel and Java, it is in the form "class.member". This
+may allow easier location of a specific tags when multiple occurrences of a
+tag name occur in the tag file. Note, however, that this could potentially
+more than double the size of the tag file.
+.RE
+
+.TP 5
+\fB\-\-fields\fP=\fI[+|\-]flags\fP
+Specifies the available extension fields which are to be included in the
+entries of the tag file (see \fBTAG FILE FORMAT\fP, below, for more
+information). The parameter \fIflags\fP is a set of one-letter flags, each
+representing one type of extension field to include, with the following
+meanings (disabled by default unless indicated):
+
+.PP
+.PD 0
+.RS 8
+.TP 4
+.I a
+Access (or export) of class members
+.TP 4
+.I f
+File-restricted scoping [enabled]
+.TP 4
+.I i
+Inheritance information
+.TP 4
+.I k
+Kind of tag as a single letter [enabled]
+.TP 4
+.I K
+Kind of tag as full name
+.TP 4
+.I l
+Language of source file containing tag
+.TP 4
+.I m
+Implementation information
+.TP 4
+.I n
+Line number of tag definition
+.TP 4
+.I s
+Scope of tag definition [enabled]
+.TP 4
+.I S
+Signature of routine (e.g. prototype or parameter list)
+.TP 4
+.I z
+Include the "kind:" key in kind field
+.TP 4
+.I t
+Type and name of a variable or typedef as "typeref:" field [enabled]
+.PD 1
+.RE
+
+.RS 5
+Each letter or group of letters may be preceded by either '+' to add it to the
+default set, or '\-' to exclude it. In the absence of any preceding '+' or '\-'
+sign, only those kinds explicitly listed in \fIflags\fP will be included in
+the output (i.e. overriding the default set). This option is ignored if the
+option \fB\-\-format\fP=\fI1\fP has been specified. The default value
+of this option is \fIfkst\fP.
+.RE
+
+.TP 5
+\fB\-\-file\-scope\fP[=\fIyes\fP|\fIno\fP]
+Indicates whether tags scoped only for a single file (i.e. tags which cannot
+be seen outside of the file in which they are defined, such as "static" tags)
+should be included in the output. See, also, the \fB\-h\fP option. This option
+is enabled by default.
+
+.TP 5
+\fB\-\-filter\fP[=\fIyes\fP|\fIno\fP]
+Causes \fBctags\fP to behave as a filter, reading source file names from
+standard input and printing their tags to standard output on a file-by-file
+basis. If \fB\-\-sorted\fP is enabled, tags are sorted only within the source
+file in which they are defined. File names are read from standard input in
+line-oriented input mode (see note for \fB\-L\fP option) and only after file
+names listed on the command line or from any file supplied using the \fB\-L\fP
+option. When this option is enabled, the options \fB\-f\fP, \fB\-o\fP,
+and \fB\-\-totals\fP are ignored. This option is quite esoteric and is disabled
+by default. This option must appear before the first file name.
+
+.TP 5
+\fB\-\-filter\-terminator\fP=\fIstring\fP
+Specifies a string to print to standard output following the tags for each
+file name parsed when the \fB\-\-filter\fP option is enabled. This may permit an
+application reading the output of ctags to determine when the output for each
+file is finished. Note that if the file name read is a directory and
+\fB\-\-recurse\fP is enabled, this string will be printed only one once at the
+end of all tags found for by descending the directory. This string will always
+be separated from the last tag line for the file by its terminating newline.
+This option is quite esoteric and is empty by default. This option must appear
+before the first file name.
+
+.TP 5
+\fB\-\-format\fP=\fIlevel\fP
+Change the format of the output tag file. Currently the only valid values for
+\fIlevel\fP are \fI1\fP or \fI2\fP. Level 1 specifies the original tag file
+format and level 2 specifies a new extended format containing extension fields
+(but in a manner which retains backward-compatibility with original
+\fBvi\fP(1) implementations). The default level is 2. This option must appear
+before the first file name. [Ignored in etags mode]
+
+.TP 5
+.B \-\-help
+Prints to standard output a detailed usage description, and then exits.
+
+.TP 5
+\fB\-\-if0\fP[=\fIyes\fP|\fIno\fP]
+Indicates a preference as to whether code within an "#if 0" branch of a
+preprocessor conditional should be examined for non-macro tags (macro tags are
+always included). Because the intent of this construct is to disable code, the
+default value of this option is \fIno\fP. Note that this indicates a
+preference only and does not guarantee skipping code within an "#if 0" branch,
+since the fall-back algorithm used to generate tags when preprocessor
+conditionals are too complex follows all branches of a conditional. This
+option is disabled by default.
+
+.TP 5
+\fB\-\-<LANG>\-kinds\fP=\fI[+|\-]kinds\fP
+Specifies a list of language-specific kinds of tags (or kinds) to include in
+the output file for a particular language, where \fB<LANG>\fP is
+case-insensitive and is one of the built-in language names (see the
+\fB\-\-list\-languages\fP option for a complete list). The parameter \fIkinds\fP
+is a group of one-letter flags designating kinds of tags (particular to the
+language) to either include or exclude from the output. The specific sets of
+flags recognized for each language, their meanings and defaults may be list
+using the \fB\-\-list\-kinds\fP option. Each letter or group of letters may be
+preceded by either '+' to add it to, or '\-' to remove it from, the default
+set. In the absence of any preceding '+' or '\-' sign, only those kinds
+explicitly listed in \fIkinds\fP will be included in the output (i.e.
+overriding the default for the specified language).
+
+As an example for the C language, in order to add prototypes and external
+variable declarations to the default set of tag kinds, but exclude macros,
+use \fB\-\-c\-kinds\fP=\fI+px\-d\fP; to include only tags for functions, use
+\fB\-\-c\-kinds\fP=\fIf\fP.
+
+.TP 5
+\fB\-\-langdef\fP=\fIname\fP
+Defines a new user-defined language, \fIname\fP, to be parsed with regular
+expressions. Once defined, \fIname\fP may be used in other options taking
+language names. The typical use of this option is to first define the
+language, then map file names to it using \fI\-\-langmap\fP, then specify
+regular expressions using \fI\-\-regex\-<LANG>\fP to define how its tags are
+found.
+
+.TP 5
+\fB\-\-langmap\fP=\fImap[,map[...]]\fP
+Controls how file names are mapped to languages (see the \fB\-\-list\-maps\fP
+option). Each comma-separated \fImap\fP consists of the language name (either
+a built-in or user-defined language), a colon, and a list of file extensions
+and/or file name patterns. A file extension is specified by preceding the
+extension with a period (e.g. ".c"). A file name pattern is specified by
+enclosing the pattern in parentheses (e.g. "([Mm]akefile)"). If appropriate
+support is available from the runtime library of your C compiler, then the
+file name pattern may contain the usual shell wildcards common on Unix (be
+sure to quote the option parameter to protect the wildcards from being
+expanded by the shell before being passed to \fBctags\fP). You can determine
+if shell wildcards are available on your platform by examining the output of
+the \fB\-\-version\fP option, which will include "+wildcards" in the compiled
+feature list; otherwise, the file name patterns are matched against file names
+using a simple textual comparison. When mapping a file extension, it will
+first be unmapped from any other languages.
+
+If the first character in a map is a plus sign, then the extensions and file
+name patterns in that map will be appended to the current map for that
+language; otherwise, the map will replace the current map. For example, to
+specify that only files with extensions of .c and .x are to be treated as C
+language files, use "\fB\-\-langmap\fP=\fIc:.c.x\fP"; to also add files with
+extensions of .j as Java language files, specify
+"\fB\-\-langmap\fP=\fIc:.c.x,java:+.j\fP". To map makefiles (e.g. files
+named either "Makefile", "makefile", or having the extension ".mak") to a
+language called "make", specify "\fB\-\-langmap\fP=\fImake:([Mm]akefile).mak\fP".
+To map files having no extension, specify a period not followed by a
+non-period character (e.g. ".", "..x", ".x."). To clear the mapping for a
+particular language (thus inhibiting automatic generation of tags for that
+language), specify an empty extension list (e.g.
+"\fB\-\-langmap\fP=\fIfortran:\fP"). To restore the default language mappings
+for all a particular language, supply the keyword "default" for the mapping.
+To specify restore the default language mappings for all languages, specify
+"\fB\-\-langmap\fP=\fIdefault\fP". Note that file extensions are tested before
+file name patterns when inferring the language of a file.
+
+.TP 5
+\fB\-\-language\-force\fP=\fIlanguage\fP
+By default, \fBctags\fP automatically selects the language of a source file,
+ignoring those files whose language cannot be determined (see
+\fBSOURCE FILES\fP, above). This option forces the specified \fIlanguage\fP
+(case-insensitive; either built-in or user-defined) to be used for every
+supplied file instead of automatically selecting the language based upon its
+extension. In addition, the special value \fIauto\fP indicates that the
+language should be automatically selected (which effectively disables this
+option).
+
+.TP 5
+\fB\-\-languages\fP=\fI[+|\-]list\fP
+Specifies the languages for which tag generation is enabled, with \fIlist\fP
+containing a comma-separated list of language names (case-insensitive; either
+built-in or user-defined). If the first language of \fIlist\fP is not preceded
+by either a '+' or '\-', the current list will be cleared before adding or
+removing the languages in \fIlist\fP. Until a '\-' is encountered, each
+language in the list will be added to the current list. As either the '+' or
+\&'\-' is encountered in the list, the languages following it are added or
+removed from the current list, respectively. Thus, it becomes simple to
+replace the current list with a new one, or to add or remove languages from
+the current list. The actual list of files for which tags will be generated
+depends upon the language extension mapping in effect (see the \fB\-\-langmap\fP
+option). Note that all languages, including user-defined languages are enabled
+unless explicitly disabled using this option. Language names included in
+\fIlist\fP may be any built-in language or one previously defined with
+\fB\-\-langdef\fP. The default is "all", which is also accepted as a valid
+argument. See the \fB\-\-list\-languages\fP option for a complete list of the
+built-in language names.
+
+.TP 5
+\fB\-\-license\fP
+Prints a summary of the software license to standard output, and then exits.
+
+.TP 5
+\fB\-\-line\-directives\fP[=\fIyes\fP|\fIno\fP]
+Specifies whether "#line" directives should be recognized. These are present
+in the output of preprocessors and contain the line number, and possibly the
+file name, of the original source file(s) from which the preprocessor output
+file was generated. When enabled, this option will cause \fBctags\fP to
+generate tag entries marked with the file names and line numbers of their
+locations original source file(s), instead of their actual locations in the
+preprocessor output. The actual file names placed into the tag file will have
+the same leading path components as the preprocessor output file, since it is
+assumed that the original source files are located relative to the
+preprocessor output file (unless, of course, the #line directive specifies an
+absolute path). This option is off by default. \fBNote:\fP This option is
+generally only useful when used together with the \fB\-\-excmd\fP=\fInumber\fP
+(\fB\-n\fP) option. Also, you may have to use either the \fB\-\-langmap\fP or
+\fB\-\-language\-force\fP option if the extension of the preprocessor output file
+is not known to \fBctags\fP.
+
+.TP 5
+\fB\-\-links\fP[=\fIyes\fP|\fIno\fP]
+Indicates whether symbolic links (if supported) should be followed. When
+disabled, symbolic links are ignored. This option is on by default.
+
+.TP 5
+\fB\-\-list\-kinds\fP[=\fIlanguage\fP|\fIall\fP]
+Lists the tag kinds recognized for either the specified language or all
+languages, and then exits. Each kind of tag recorded in the tag file is
+represented by a one-letter flag, which is also used to filter the tags placed
+into the output through use of the \fB\-\-<LANG>\-kinds\fP option. Note that some
+languages and/or tag kinds may be implemented using regular expressions and
+may not be available if regex support is not compiled into \fBctags\fP (see
+the \fB\-\-regex\-<LANG>\fP option). Each kind listed is enabled unless followed
+by "[off]".
+
+.TP 5
+\fB\-\-list\-maps\fP[=\fIlanguage\fP|\fIall\fP]
+Lists the file extensions and file name patterns which associate a file name
+with a language for either the specified language or all languages, and then
+exits. See the \fB\-\-langmap\fP option, and \fBSOURCE FILES\fP, above.
+
+.TP 5
+\fB\-\-list\-languages\fP
+Lists the names of the languages understood by \fBctags\fP, and then exits.
+These language names are case insensitive and may be used in the
+\fB\-\-language\-force\fP, \fB\-\-languages\fP, \fB\-\-<LANG>\-kinds\fP, and
+\fB\-\-regex\-<LANG>\fP options.
+
+.TP 5
+\fB\-\-options\fP=\fIfile\fP
+Read additional options from \fIfile\fP. The file should contain one option
+per line. As a special case, if
+\fB\-\-options\fP=\fINONE\fP is specified as the first option on the command
+line, it will disable the automatic reading of any configuration options from
+either a file or the environment (see \fBFILES\fP).
+
+.TP 5
+\fB\-\-recurse\fP[=\fIyes\fP|\fIno\fP]
+Recurse into directories encountered in the list of supplied files. If the
+list of supplied files is empty and no file list is specified with the
+\fB\-L\fP option, then the current directory (i.e. ".") is assumed. Symbolic
+links are followed. If you don't like these behaviors, either explicitly
+specify the files or pipe the output of \fBfind\fP(1) into \fBctags \-L\-\fP
+instead. \fBNote:\fP This option is not supported on all platforms at present.
+It is available if the output of the \fB\-\-help\fP option includes this option.
+See, also, the \fB\-\-exclude\fP to limit recursion.
+
+.TP 5
+\fB\-\-regex\-<LANG>\fP=\fI/regexp/replacement/[kind\-spec/][flags]\fP
+The \fI/regexp/replacement/\fP pair define a regular expression replacement
+pattern, similar in style to \fBsed\fP substitution commands, with which to
+generate tags from source files mapped to the named language, \fB<LANG>\fP,
+(case-insensitive; either a built-in or user-defined language). The regular
+expression, \fIregexp\fP, defines an extended regular expression (roughly that
+used by \fBegrep\fP(1)), which is used to locate a single source line
+containing a tag and may specify tab characters using \\t. When a matching
+line is found, a tag will be generated for the name defined by
+\fIreplacement\fP, which generally will contain the special back-references
+\\1 through \\9 to refer to matching sub-expression groups within
+\fIregexp\fP. The '/' separator characters shown in the parameter to the
+option can actually be replaced by any character. Note that whichever
+separator character is used will have to be escaped with a backslash ('\\')
+character wherever it is used in the parameter as something other than a
+separator. The regular expression defined by this option is added to the
+current list of regular expressions for the specified language unless the
+parameter is omitted, in which case the current list is cleared.
+
+Unless modified by \fIflags\fP, \fIregexp\fP is interpreted as a Posix
+extended regular expression. The \fIreplacement\fP should expand for all
+matching lines to a non-empty string of characters, or a warning message will
+be reported. An optional kind specifier for tags matching \fIregexp\fP may
+follow \fIreplacement\fP, which will determine what kind of tag is reported in
+the "kind" extension field (see \fBTAG FILE FORMAT\fP, below). The full form
+of \fIkind\-spec\fP is in the form of a single letter, a comma, a name (without
+spaces), a comma, a description, followed by a separator, which specify
+the short and long forms of the kind value and its textual description
+(displayed using \fB\-\-list\-kinds\fP). Either the kind name and/or the
+description may be omitted. If \fIkind\-spec\fP is omitted, it defaults to
+"\fIr,regex\fP". Finally, \fIflags\fP are one or more single-letter characters
+having the following effect upon the interpretation of \fIregexp\fP:
+
+.PP
+.RS 8
+.TP 4
+.I b
+The pattern is interpreted as a Posix basic regular expression.
+.TP 4
+.I e
+The pattern is interpreted as a Posix extended regular expression (default).
+.TP 4
+.I i
+The regular expression is to be applied in a case-insensitive manner.
+.RE
+
+.RS 5
+Note that this option is available only if \fBctags\fP was compiled with
+support for regular expressions, which depends upon your platform. You can
+determine if support for regular expressions is compiled in by examining the
+output of the \fB\-\-version\fP option, which will include "+regex" in the
+compiled feature list.
+
+For more information on the regular expressions used by \fBctags\fP, see
+either the \fBregex(5,7)\fP man page, or the GNU info documentation for regex
+(e.g. "info regex").
+.RE
+
+.TP 5
+\fB\-\-sort\fP[=\fIyes\fP|\fIno\fP|\fIfoldcase\fP]
+Indicates whether the tag file should be sorted on the tag name (default is
+\fIyes\fP). Note that the original \fBvi\fP(1) required sorted tags.
+The \fIfoldcase\fP value specifies case insensitive (or case-folded) sorting.
+Fast binary searches of tag files sorted with case-folding will require
+special support from tools using tag files, such as that found in the ctags
+readtags library, or Vim version 6.2 or higher (using "set ignorecase"). This
+option must appear before the first file name. [Ignored in etags mode]
+
+.TP 5
+\fB\-\-tag\-relative\fP[=\fIyes\fP|\fIno\fP]
+Indicates that the file paths recorded in the tag file should be relative to
+the directory containing the tag file, rather than relative to the current
+directory, unless the files supplied on the command line are specified with
+absolute paths. This option must appear before the first file name. The
+default is \fIyes\fP when running in etags mode (see the \fB\-e\fP
+option), \fIno\fP otherwise.
+
+.TP 5
+\fB\-\-totals\fP[=\fIyes\fP|\fIno\fP]
+Prints statistics about the source files read and the tag file written during
+the current invocation of \fBctags\fP. This option is off by default.
+This option must appear before the first file name.
+
+.TP 5
+\fB\-\-verbose\fP[=\fIyes\fP|\fIno\fP]
+Enable verbose mode. This prints out information on option processing and a
+brief message describing what action is being taken for each file considered
+by \fBctags\fP. Normally, \fBctags\fP does not read command line arguments
+until after options are read from the configuration files (see \fBFILES\fP,
+below) and the \fBCTAGS\fP environment variable. However, if this option is
+the first argument on the command line, it will take effect before any options
+are read from these sources. The default is \fIno\fP.
+
+.TP 5
+\fB\-\-version\fP
+Prints a version identifier for \fBctags\fP to standard output, and then
+exits. This is guaranteed to always contain the string "Exuberant Ctags".
+
+
+.SH "OPERATIONAL DETAILS"
+
+As \fBctags\fP considers each file name in turn, it tries to determine the
+language of the file by applying the following three tests in order: if the
+file extension has been mapped to a language, if the file name matches a shell
+pattern mapped to a language, and finally if the file is executable and its
+first line specifies an interpreter using the Unix-style "#!" specification
+(if supported on the platform). If a language was identified, the file is
+opened and then the appropriate language parser is called to operate on the
+currently open file. The parser parses through the file and adds an entry to
+the tag file for each language object it is written to handle. See
+\fBTAG FILE FORMAT\fP, below, for details on these entries.
+
+This implementation of \fBctags\fP imposes no formatting requirements on C
+code as do legacy implementations. Older implementations of ctags tended to
+rely upon certain formatting assumptions in order to help it resolve coding
+dilemmas caused by preprocessor conditionals.
+
+In general, \fBctags\fP tries to be smart about conditional preprocessor
+directives. If a preprocessor conditional is encountered within a statement
+which defines a tag, \fBctags\fP follows only the first branch of that
+conditional (except in the special case of "#if 0", in which case it follows
+only the last branch). The reason for this is that failing to pursue only one
+branch can result in ambiguous syntax, as in the following example:
+
+.RS
+#ifdef TWO_ALTERNATIVES
+.br
+struct {
+.br
+#else
+.br
+union {
+.br
+#endif
+.RS 4
+short a;
+.br
+long b;
+.RE
+}
+.RE
+
+Both branches cannot be followed, or braces become unbalanced and \fBctags\fP
+would be unable to make sense of the syntax.
+
+If the application of this heuristic fails to properly parse a file,
+generally due to complicated and inconsistent pairing within the conditionals,
+\fBctags\fP will retry the file using a different heuristic which does not
+selectively follow conditional preprocessor branches, but instead falls back
+to relying upon a closing brace ("}") in column 1 as indicating the end of a
+block once any brace imbalance results from following a #if conditional branch.
+
+\fBCtags\fP will also try to specially handle arguments lists enclosed in
+double sets of parentheses in order to accept the following conditional
+construct:
+
+.RS
+extern void foo __ARGS((int one, char two));
+.RE
+
+Any name immediately preceding the "((" will be automatically ignored and
+the previous name will be used.
+
+C++ operator definitions are specially handled. In order for consistency with
+all types of operators (overloaded and conversion), the operator name in the
+tag file will always be preceded by the string "operator " (i.e. even if the
+actual operator definition was written as "operator<<").
+
+After creating or appending to the tag file, it is sorted by the tag name,
+removing identical tag lines.
+
+
+.SH "TAG FILE FORMAT"
+
+When not running in etags mode, each entry in the tag file consists of a
+separate line, each looking like this in the most general case:
+
+.RS 1
+tag_name<TAB>file_name<TAB>ex_cmd;"<TAB>extension_fields
+.RE
+
+The fields and separators of these lines are specified as follows:
+
+.PD 0
+.RS 4
+.TP 4
+1.
+tag name
+.TP 4
+2.
+single tab character
+.TP 4
+3.
+name of the file in which the object associated with the tag is located
+.TP 4
+4.
+single tab character
+.TP 4
+5.
+EX command used to locate the tag within the file; generally a search pattern
+(either /pattern/ or ?pattern?) or line number (see \fB\-\-excmd\fP). Tag file
+format 2 (see \fB\-\-format\fP) extends this EX command under certain
+circumstances to include a set of extension fields (described below) embedded
+in an EX comment immediately appended to the EX command, which leaves it
+backward-compatible with original \fBvi\fP(1) implementations.
+.RE
+.PD 1
+
+A few special tags are written into the tag file for internal purposes. These
+tags are composed in such a way that they always sort to the top of the file.
+Therefore, the first two characters of these tags are used a magic number to
+detect a tag file for purposes of determining whether a valid tag file is
+being overwritten rather than a source file.
+
+Note that the name of each source file will be recorded in the tag file
+exactly as it appears on the command line. Therefore, if the path you
+specified on the command line was relative to the current directory, then it
+will be recorded in that same manner in the tag file. See, however, the
+\fB\-\-tag\-relative\fP option for how this behavior can be modified.
+
+Extension fields are tab-separated key-value pairs appended to the end of the
+EX command as a comment, as described above. These key value pairs appear in
+the general form "\fIkey\fP:\fIvalue\fP". Their presence in the lines of the
+tag file are controlled by the \fB\-\-fields\fP option. The possible keys and
+the meaning of their values are as follows:
+
+.TP 12
+.I access
+Indicates the visibility of this class member, where \fIvalue\fP is specific
+to the language.
+
+.TP 12
+.I file
+Indicates that the tag has file-limited visibility. This key has no
+corresponding value.
+
+.TP 12
+.I kind
+Indicates the type, or kind, of tag. Its value is either one of the
+corresponding one-letter flags described under the various
+\fB\-\-<LANG>\-kinds\fP options above, or a full name. It is permitted (and is,
+in fact, the default) for the key portion of this field to be omitted. The
+optional behaviors are controlled with the \fB\-\-fields\fP option.
+
+.TP 12
+.I implementation
+When present, this indicates a limited implementation (abstract vs. concrete)
+of a routine or class, where \fIvalue\fP is specific to the language
+("virtual" or "pure virtual" for C++; "abstract" for Java).
+
+.TP 12
+.I inherits
+When present, \fIvalue\fP. is a comma-separated list of classes from which
+this class is derived (i.e. inherits from).
+
+.TP 12
+.I signature
+When present, \fIvalue\fP is a language-dependent representation of the
+signature of a routine. A routine signature in its complete form specifies the
+return type of a routine and its formal argument list. This extension field is
+presently supported only for C-based languages and does not include the return
+type.
+
+.PP
+In addition, information on the scope of the tag definition may be available,
+with the key portion equal to some language-dependent construct name and its
+value the name declared for that construct in the program. This scope entry
+indicates the scope in which the tag was found. For example, a tag generated
+for a C structure member would have a scope looking like "struct:myStruct".
+
+
+.SH "HOW TO USE WITH VI"
+Vi will, by default, expect a tag file by the name "tags" in the current
+directory. Once the tag file is built, the following commands exercise the tag
+indexing feature:
+.TP 12
+.B vi \-t tag
+Start vi and position the cursor at the file and line where "tag" is defined.
+.TP 12
+.B :ta tag
+Find a tag.
+.TP 12
+.B Ctrl-]
+Find the tag under the cursor.
+.TP 12
+.B Ctrl-T
+Return to previous location before jump to tag (not widely implemented).
+
+
+.SH "HOW TO USE WITH GNU EMACS"
+Emacs will, by default, expect a tag file by the name "TAGS" in the current
+directory. Once the tag file is built, the following commands exercise the
+tag indexing feature:
+.TP 10
+.B "M-x visit\-tags\-table <RET> FILE <RET>"
+Select the tag file, "FILE", to use.
+.TP 10
+.B "M-. [TAG] <RET>"
+Find the first definition of TAG. The default tag is the identifier under the
+cursor.
+.TP 10
+.B "M-*"
+Pop back to where you previously invoked "M-.".
+.TP 10
+.B "C-u M-."
+Find the next definition for the last tag.
+
+.PP
+For more commands, see the \fITags\fP topic in the Emacs info document.
+
+
+.SH "HOW TO USE WITH NEDIT"
+NEdit version 5.1 and later can handle the new extended tag file format (see
+\fB\-\-format\fP). To make NEdit use the tag file, select "File\->Load Tags
+File". To jump to the definition for a tag, highlight the word, the press
+Ctrl-D. NEdit 5.1 can can read multiple tag files from different directories.
+Setting the X resource nedit.tagFile to the name of a tag file instructs NEdit
+to automatically load that tag file at startup time.
+
+
+.SH "CAVEATS"
+Because \fBctags\fP is neither a preprocessor nor a compiler, use of
+preprocessor macros can fool \fBctags\fP into either missing tags or
+improperly generating inappropriate tags. Although \fBctags\fP has been
+designed to handle certain common cases, this is the single biggest cause of
+reported problems. In particular, the use of preprocessor constructs which
+alter the textual syntax of C can fool \fBctags\fP. You can work around many
+such problems by using the \fB\-I\fP option.
+
+Note that since \fBctags\fP generates patterns for locating tags (see
+the \fB\-\-excmd\fP option), it is entirely possible that the wrong line may be
+found by your editor if there exists another source line which is identical to
+the line containing the tag. The following example demonstrates this condition:
+
+.RS
+int variable;
+
+/* ... */
+.br
+void foo(variable)
+.br
+int variable;
+.br
+{
+.RS 4
+/* ... */
+.RE
+}
+.RE
+
+Depending upon which editor you use and where in the code you happen to be, it
+is possible that the search pattern may locate the local parameter declaration
+in foo() before it finds the actual global variable definition, since the
+lines (and therefore their search patterns are identical). This can be avoided
+by use of the \fB\-\-excmd\fP=\fIn\fP option.
+
+
+.SH "BUGS"
+\fBCtags\fP has more options than \fBls\fP(1).
+
+When parsing a C++ member function definition (e.g. "className::function"),
+\fBctags\fP cannot determine whether the scope specifier is a class name or a
+namespace specifier and always lists it as a class name in the scope portion
+of the extension fields. Also, if a C++ function is defined outside of the
+class declaration (the usual case), the access specification (i.e. public,
+protected, or private) and implementation information (e.g. virtual, pure
+virtual) contained in the function declaration are not known when the tag is
+generated for the function definition. It will, however be available for
+prototypes (e.g \fB\-\-c++\-kinds\fP=\fI+p\fP).
+
+No qualified tags are generated for language objects inherited into a class.
+
+
+.SH "ENVIRONMENT VARIABLES"
+
+.TP 8
+.B CTAGS
+If this environment variable exists, it will be expected to contain a set of
+default options which are read when \fBctags\fP starts, after the
+configuration files listed in \fBFILES\fP, below, are read, but before any
+command line options are read. Options appearing on the command line will
+override options specified in this variable. Only options will be read from
+this variable. Note that all white space in this variable is considered a
+separator, making it impossible to pass an option parameter containing an
+embedded space. If this is a problem, use a configuration file instead.
+
+.TP 8
+.B ETAGS
+Similar to the \fBCTAGS\fP variable above, this variable, if found, will be
+read when \fBetags\fP starts. If this variable is not found, \fBetags\fP will
+try to use \fBCTAGS\fP instead.
+
+.TP 8
+.B TMPDIR
+On Unix-like hosts where mkstemp() is available, the value of this variable
+specifies the directory in which to place temporary files. This can be useful
+if the size of a temporary file becomes too large to fit on the partition
+holding the default temporary directory defined at compilation time.
+\fBctags\fP creates temporary files only if either (1) an emacs-style tag file
+is being generated, (2) the tag file is being sent to standard output, or (3)
+the program was compiled to use an internal sort algorithm to sort the tag
+files instead of the the sort utility of the operating system. If the sort
+utility of the operating system is being used, it will generally observe this
+variable also. Note that if \fBctags\fP is setuid, the value of TMPDIR will be
+ignored.
+
+
+.SH "FILES"
+.PD 0
+.I /ctags.cnf (on MSDOS, MSWindows only)
+.br
+.I /etc/ctags.conf
+.br
+.I /usr/local/etc/ctags.conf
+.br
+.I $HOME/.ctags
+.br
+.I $HOME/ctags.cnf (on MSDOS, MSWindows only)
+.br
+.I .ctags
+.br
+.I ctags.cnf (on MSDOS, MSWindows only)
+.IP
+If any of these configuration files exist, each will be expected to contain a
+set of default options which are read in the order listed when \fBctags\fP
+starts, but before the \fBCTAGS\fP environment variable is read or any command
+line options are read. This makes it possible to set up site-wide, personal
+or project-level defaults. It is possible to compile \fBctags\fP to read an
+additional configuration file before any of those shown above, which will be
+indicated if the output produced by the \fB\-\-version\fP option lists the
+"custom-conf" feature. Options appearing in the \fBCTAGS\fP environment
+variable or on the command line will override options specified in these
+files. Only options will be read from these files. Note that the option files
+are read in line-oriented mode in which spaces are significant (since
+shell quoting is not possible). Each line of the file is read as one command
+line parameter (as if it were quoted with single quotes). Therefore, use new
+lines to indicate separate command-line arguments.
+.PD 1
+
+.TP
+.I tags
+The default tag file created by \fBctags\fP.
+.TP
+.I TAGS
+The default tag file created by \fBetags\fP.
+
+.SH "SEE ALSO"
+The official Exuberant Ctags web site at:
+
+.RS
+http://ctags.sourceforge.net
+.RE
+
+Also \fBex\fP(1), \fBvi\fP(1), \fBelvis\fP, or, better yet, \fBvim\fP, the
+official editor of \fBctags\fP. For more information on \fBvim\fP, see the VIM
+Pages web site at:
+
+.RS
+http://www.vim.org/
+.RE
+
+
+.SH "AUTHOR"
+Darren Hiebert <dhiebert at users.sourceforge.net>
+.br
+http://DarrenHiebert.com/
+
+
+.SH "MOTIVATION"
+"Think ye at all times of rendering some service to every member of the human
+race."
+
+"All effort and exertion put forth by man from the fullness of his heart is
+worship, if it is prompted by the highest motives and the will to do service
+to humanity."
+
+.RS
+\-\- From the Baha'i Writings
+.RE
+
+
+.SH "CREDITS"
+This version of \fBctags\fP was originally derived from and inspired by the
+ctags program by Steve Kirkendall <kirkenda@cs.pdx.edu> that comes with the
+Elvis vi clone (though virtually none of the original code remains).
+
+Credit is also due Bram Moolenaar <Bram@vim.org>, the author of \fBvim\fP, who
+has devoted so much of his time and energy both to developing the editor as a
+service to others, and to helping the orphans of Uganda.
+
+The section entitled "HOW TO USE WITH GNU EMACS" was shamelessly stolen from
+the info page for GNU \fBetags\fP.
diff --git a/ctags.h b/ctags.h
new file mode 100644
index 0000000..f8884af
--- /dev/null
+++ b/ctags.h
@@ -0,0 +1,28 @@
+/*
+* $Id: ctags.h 702 2009-03-14 03:52:21Z dhiebert $
+*
+* Copyright (c) 1996-2002, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* Program definitions
+*/
+#ifndef _CTAGS_H
+#define _CTAGS_H
+
+/*
+* MACROS
+*/
+#ifndef PROGRAM_VERSION
+# define PROGRAM_VERSION "5.8"
+#endif
+#define PROGRAM_NAME "Exuberant Ctags"
+#define PROGRAM_URL "http://ctags.sourceforge.net"
+#define PROGRAM_COPYRIGHT "Copyright (C) 1996-2009"
+#define AUTHOR_NAME "Darren Hiebert"
+#define AUTHOR_EMAIL "dhiebert@users.sourceforge.net"
+
+#endif /* _CTAGS_H */
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/ctags.html b/ctags.html
new file mode 100644
index 0000000..6ff0fdd
--- /dev/null
+++ b/ctags.html
@@ -0,0 +1,2087 @@
+<!-- Creator : groff version 1.19.2 -->
+<!-- CreationDate: Thu Jul 9 17:03:58 2009 -->
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+"http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<meta name="generator" content="groff -Thtml, see www.gnu.org">
+<meta http-equiv="Content-Type" content="text/html; charset=US-ASCII">
+<meta name="Content-Style" content="text/css">
+<style type="text/css">
+ p { margin-top: 0; margin-bottom: 0; }
+ pre { margin-top: 0; margin-bottom: 0; }
+ table { margin-top: 0; margin-bottom: 0; }
+</style>
+<title>CTAGS</title>
+
+</head>
+<body>
+
+<h1 align=center>CTAGS</h1>
+
+<a href="#NAME">NAME</a><br>
+<a href="#SYNOPSIS">SYNOPSIS</a><br>
+<a href="#DESCRIPTION">DESCRIPTION</a><br>
+<a href="#SOURCE FILES">SOURCE FILES</a><br>
+<a href="#OPTIONS">OPTIONS</a><br>
+<a href="#OPERATIONAL DETAILS">OPERATIONAL DETAILS</a><br>
+<a href="#TAG FILE FORMAT">TAG FILE FORMAT</a><br>
+<a href="#HOW TO USE WITH VI">HOW TO USE WITH VI</a><br>
+<a href="#HOW TO USE WITH GNU EMACS">HOW TO USE WITH GNU EMACS</a><br>
+<a href="#HOW TO USE WITH NEDIT">HOW TO USE WITH NEDIT</a><br>
+<a href="#CAVEATS">CAVEATS</a><br>
+<a href="#BUGS">BUGS</a><br>
+<a href="#ENVIRONMENT VARIABLES">ENVIRONMENT VARIABLES</a><br>
+<a href="#FILES">FILES</a><br>
+<a href="#SEE ALSO">SEE ALSO</a><br>
+<a href="#AUTHOR">AUTHOR</a><br>
+<a href="#MOTIVATION">MOTIVATION</a><br>
+<a href="#CREDITS">CREDITS</a><br>
+
+<hr>
+
+
+<a name="NAME"></a>
+<h2>NAME</h2>
+
+
+<p style="margin-left:11%; margin-top: 1em">ctags &minus;
+Generate tag files for source code</p>
+
+<a name="SYNOPSIS"></a>
+<h2>SYNOPSIS</h2>
+
+
+<p style="margin-left:11%; margin-top: 1em"><b>ctags</b>
+[<b>options</b>] [<i>file(s)</i>] <b><br>
+etags</b> [<b>options</b>] [<i>file(s)</i>]</p>
+
+<a name="DESCRIPTION"></a>
+<h2>DESCRIPTION</h2>
+
+
+<p style="margin-left:11%; margin-top: 1em">The
+<b>ctags</b> and <b>etags</b> programs (hereinafter
+collectively referred to as <b>ctags</b>, except where
+distinguished) generate an index (or &quot;tag&quot;) file
+for a variety of language objects found in <i>file(s)</i>.
+This tag file allows these items to be quickly and easily
+located by a text editor or other utility. A &quot;tag&quot;
+signifies a language object for which an index entry is
+available (or, alternatively, the index entry created for
+that object).</p>
+
+<p style="margin-left:11%; margin-top: 1em">Alternatively,
+<b>ctags</b> can generate a cross reference file which
+lists, in human readable form, information about the various
+source objects found in a set of language files.</p>
+
+<p style="margin-left:11%; margin-top: 1em">Tag index files
+are supported by numerous editors, which allow the user to
+locate the object associated with a name appearing in a
+source file and jump to the file and line which defines the
+name. Those known about at the time of this release are:</p>
+
+<p style="margin-left:17%; margin-top: 1em"><b>Vi</b>(1)
+and its derivatives (e.g. Elvis, Vim, Vile, Lemmy),
+<b>CRiSP</b>, <b>Emacs</b>, <b>FTE</b> (Folding Text
+Editor), <b>JED</b>, <b>jEdit</b>, <b>Mined</b>,
+<b>NEdit</b> (Nirvana Edit), <b>TSE</b> (The SemWare
+Editor), <b>UltraEdit</b>, <b>WorkSpace</b>, <b>X2</b>,
+<b>Zeus</b></p>
+
+<p style="margin-left:11%; margin-top: 1em"><b>Ctags</b> is
+capable of generating different kinds of tags for each of
+many different languages. For a complete list of supported
+languages, the names by which they are recognized, and the
+kinds of tags which are generated for each, see the
+<b>&minus;&minus;list&minus;languages</b> and
+<b>&minus;&minus;list&minus;kinds</b> options.</p>
+
+<a name="SOURCE FILES"></a>
+<h2>SOURCE FILES</h2>
+
+
+<p style="margin-left:11%; margin-top: 1em">Unless the
+<b>&minus;&minus;language&minus;force</b> option is
+specified, the language of each source file is automatically
+selected based upon a mapping of file names to languages.
+The mappings in effect for each language may be display
+using the <b>&minus;&minus;list&minus;maps</b> option and
+may be changed using the <b>&minus;&minus;langmap</b>
+option. On platforms which support it, if the name of a file
+is not mapped to a language and the file is executable, the
+first line of the file is checked to see if the file is a
+&quot;#!&quot; script for a recognized language.</p>
+
+<p style="margin-left:11%; margin-top: 1em">By default, all
+other files names are ignored. This permits running
+<b>ctags</b> on all files in either a single directory (e.g.
+&quot;ctags *&quot;), or on all files in an entire source
+directory tree (e.g. &quot;ctags &minus;R&quot;), since only
+those files whose names are mapped to languages will be
+scanned.</p>
+
+<p style="margin-left:11%; margin-top: 1em">[The reason
+that .h extensions are mapped to C++ files rather than C
+files is because it is common to use .h extensions in C++,
+and no harm results in treating them as C++ files.]</p>
+
+<a name="OPTIONS"></a>
+<h2>OPTIONS</h2>
+
+
+<p style="margin-left:11%; margin-top: 1em">Despite the
+wealth of available options, defaults are set so that
+<b>ctags</b> is most commonly executed without any options
+(e.g. &quot;ctags *&quot;, or &quot;ctags &minus;R&quot;),
+which will create a tag file in the current directory for
+all recognized source files. The options described below are
+provided merely to allow custom tailoring to meet special
+needs.</p>
+
+<p style="margin-left:11%; margin-top: 1em">Note that
+spaces separating the single-letter options from their
+parameters are optional.</p>
+
+<p style="margin-left:11%; margin-top: 1em">Note also that
+the boolean parameters to the long form options (those
+beginning with &quot;&minus;&minus;&quot; and that take a
+&quot;<i>[=yes</i>|<i>no]</i>&quot; parameter) may be
+omitted, in which case &quot;<b>=</b><i>yes</i>&quot; is
+implied. (e.g. <b>&minus;&minus;sort</b> is equivalent to
+<b>&minus;&minus;sort</b>=<i>yes</i>). Note further that
+&quot;=<i>1</i>&quot; and &quot;=<i>on</i>&quot; are
+considered synonyms for &quot;=<i>yes</i>&quot;, and that
+&quot;=<i>0</i>&quot; and &quot;=<i>off</i>&quot; are
+considered synonyms for &quot;=<i>no</i>&quot;.</p>
+
+<p style="margin-left:11%; margin-top: 1em">Some options
+are either ignored or useful only when used while running in
+etags mode (see <b>&minus;e</b> option). Such options will
+be noted.</p>
+
+<p style="margin-left:11%; margin-top: 1em">Most options
+may appear anywhere on the command line, affecting only
+those files which follow the option. A few options, however,
+must appear before the first file name and will be noted as
+such.</p>
+
+<p style="margin-left:11%; margin-top: 1em">Options taking
+language names will accept those names in either upper or
+lower case. See the
+<b>&minus;&minus;list&minus;languages</b> option for a
+complete list of the built-in language names.</p>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="11%"></td>
+<td width="3%">
+
+
+
+<p style="margin-top: 1em" valign="top"><b>&minus;a</b></p> </td>
+<td width="4%"></td>
+<td width="82%">
+
+
+<p style="margin-top: 1em" valign="top">Equivalent to
+<b>&minus;&minus;append</b>.</p> </td>
+<tr valign="top" align="left">
+<td width="11%"></td>
+<td width="3%">
+
+
+<p><b>&minus;B</b></p></td>
+<td width="4%"></td>
+<td width="82%">
+
+
+<p>Use backward searching patterns (e.g. ?pattern?).
+[Ignored in etags mode]</p></td>
+<tr valign="top" align="left">
+<td width="11%"></td>
+<td width="3%">
+
+
+<p><b>&minus;e</b></p></td>
+<td width="4%"></td>
+<td width="82%">
+
+
+<p>Enable etags mode, which will create a tag file for use
+with the Emacs editor. Alternatively, if <b>ctags</b> is
+invoked by a name containing the string &quot;etags&quot;
+(either by renaming, or creating a link to, the executable),
+etags mode will be enabled. This option must appear before
+the first file name.</p></td>
+</table>
+
+<p style="margin-left:11%;"><b>&minus;f</b>
+<i>tagfile</i></p>
+
+<p style="margin-left:18%;">Use the name specified by
+<i>tagfile</i> for the tag file (default is
+&quot;tags&quot;, or &quot;TAGS&quot; when running in etags
+mode). If <i>tagfile</i> is specified as
+&quot;&minus;&quot;, then the tag file is written to
+standard output instead. <b>Ctags</b> will stubbornly refuse
+to take orders if <i>tagfile</i> exists and its first line
+contains something other than a valid tags line. This will
+save your neck if you mistakenly type &quot;ctags &minus;f
+*.c&quot;, which would otherwise overwrite your first C file
+with the tags generated by the rest! It will also refuse to
+accept a multi-character file name which begins with a
+&rsquo;&minus;&rsquo; (dash) character, since this most
+likely means that you left out the tag file name and this
+option tried to grab the next option as the file name. If
+you really want to name your output tag file
+&quot;&minus;ugly&quot;, specify it as
+&quot;./&minus;ugly&quot;. This option must appear before
+the first file name. If this option is specified more than
+once, only the last will apply.</p>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="11%"></td>
+<td width="3%">
+
+
+
+<p style="margin-top: 1em" valign="top"><b>&minus;F</b></p> </td>
+<td width="4%"></td>
+<td width="82%">
+
+
+<p style="margin-top: 1em" valign="top">Use forward
+searching patterns (e.g. /pattern/) (default). [Ignored in
+etags mode]</p></td>
+</table>
+
+<p style="margin-left:11%;"><b>&minus;h</b> <i>list</i></p>
+
+<p style="margin-left:18%;">Specifies a list of file
+extensions, separated by periods, which are to be
+interpreted as include (or header) files. To indicate files
+having no extension, use a period not followed by a
+non-period character (e.g. &quot;.&quot;, &quot;..x&quot;,
+&quot;.x.&quot;). This option only affects how the scoping
+of a particular kinds of tags is interpreted (i.e. whether
+or not they are considered as globally visible or visible
+only within the file in which they are defined); it does not
+map the extension to any particular language. Any tag which
+is located in a non-include file and cannot be seen (e.g.
+linked to) from another file is considered to have
+file-limited (e.g. static) scope. No kind of tag appearing
+in an include file will be considered to have file-limited
+scope. If the first character in the list is a plus sign,
+then the extensions in the list will be appended to the
+current list; otherwise, the list will replace the current
+list. See, also, the <b>&minus;&minus;file&minus;scope</b>
+option. The default list is
+&quot;.h.H.hh.hpp.hxx.h++.inc.def&quot;. To restore the
+default list, specify <b>&minus;h</b> <i>default</i>. Note
+that if an extension supplied to this option is not already
+mapped to a particular language (see <b>SOURCE FILES</b>,
+above), you will also need to use either the
+<b>&minus;&minus;langmap</b> or
+<b>&minus;&minus;language&minus;force</b> option.</p>
+
+<p style="margin-left:11%;"><b>&minus;I</b>
+<i>identifier&minus;list</i></p>
+
+<p style="margin-left:18%;">Specifies a list of identifiers
+which are to be specially handled while parsing C and C++
+source files. This option is specifically provided to handle
+special cases arising through the use of preprocessor
+macros. When the identifiers listed are simple identifiers,
+these identifiers will be ignored during parsing of the
+source files. If an identifier is suffixed with a
+&rsquo;+&rsquo; character, <b>ctags</b> will also ignore any
+parenthesis-enclosed argument list which may immediately
+follow the identifier in the source files. If two
+identifiers are separated with the &rsquo;=&rsquo;
+character, the first identifiers is replaced by the second
+identifiers for parsing purposes. The list of identifiers
+may be supplied directly on the command line or read in from
+a separate file. If the first character of
+<i>identifier&minus;list</i> is &rsquo;@&rsquo;,
+&rsquo;.&rsquo; or a pathname separator (&rsquo;/&rsquo; or
+&rsquo;\&rsquo;), or the first two characters specify a
+drive letter (e.g. &quot;C:&quot;), the parameter
+<i>identifier&minus;list</i> will be interpreted as a
+filename from which to read a list of identifiers, one per
+input line. Otherwise, <i>identifier&minus;list</i> is a
+list of identifiers (or identifier pairs) to be specially
+handled, each delimited by a either a comma or by white
+space (in which case the list should be quoted to keep the
+entire list as one command line argument). Multiple
+<b>&minus;I</b> options may be supplied. To clear the list
+of ignore identifiers, supply a single dash
+(&quot;&minus;&quot;) for <i>identifier&minus;list</i>.</p>
+
+<p style="margin-left:18%; margin-top: 1em">This feature is
+useful when preprocessor macros are used in such a way that
+they cause syntactic confusion due to their presence.
+Indeed, this is the best way of working around a number of
+problems caused by the presence of syntax-busting macros in
+source files (see <b>CAVEATS</b>, below). Some examples will
+illustrate this point.</p>
+
+<p style="margin-left:23%; margin-top: 1em">int foo
+ARGDECL4(void *, ptr, long int, nbytes)</p>
+
+<p style="margin-left:18%; margin-top: 1em">In the above
+example, the macro &quot;ARGDECL4&quot; would be mistakenly
+interpreted to be the name of the function instead of the
+correct name of &quot;foo&quot;. Specifying <b>&minus;I</b>
+<i>ARGDECL4</i> results in the correct behavior.</p>
+
+<p style="margin-left:23%; margin-top: 1em">/* creates an
+RCS version string in module */ <br>
+MODULE_VERSION(&quot;$Revision: 690 $&quot;)</p>
+
+<p style="margin-left:18%; margin-top: 1em">In the above
+example the macro invocation looks too much like a function
+definition because it is not followed by a semicolon
+(indeed, it could even be followed by a global variable
+definition that would look much like a K&amp;R style
+function parameter declaration). In fact, this seeming
+function definition could possibly even cause the rest of
+the file to be skipped over while trying to complete the
+definition. Specifying <b>&minus;I</b>
+<i>MODULE_VERSION+</i> would avoid such a problem.</p>
+
+<p style="margin-left:23%; margin-top: 1em">CLASS Example {
+<br>
+// your content here <br>
+};</p>
+
+<p style="margin-left:18%; margin-top: 1em">The example
+above uses &quot;CLASS&quot; as a preprocessor macro which
+expands to something different for each platform. For
+instance CLASS may be defined as &quot;class
+__declspec(dllexport)&quot; on Win32 platforms and simply
+&quot;class&quot; on UNIX. Normally, the absence of the C++
+keyword &quot;class&quot; would cause the source file to be
+incorrectly parsed. Correct behavior can be restored by
+specifying <b>&minus;I</b> <i>CLASS=class</i>.</p>
+
+<p style="margin-left:11%;"><b>&minus;L</b> <i>file</i></p>
+
+<p style="margin-left:18%;">Read from <i>file</i> a list of
+file names for which tags should be generated. If
+<i>file</i> is specified as &quot;&minus;&quot;, then file
+names are read from standard input. File names read using
+this option are processed following file names appearing on
+the command line. Options are also accepted in this input.
+If this option is specified more than once, only the last
+will apply. <b>Note:</b> <i>file</i> is read in
+line-oriented mode, where a new line is the only delimiter
+and non-trailing white space is considered significant, in
+order that file names containing spaces may be supplied
+(however, trailing white space is stripped from lines); this
+can affect how options are parsed if included in the
+input.</p>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="11%"></td>
+<td width="3%">
+
+
+
+<p style="margin-top: 1em" valign="top"><b>&minus;n</b></p> </td>
+<td width="4%"></td>
+<td width="47%">
+
+
+<p style="margin-top: 1em" valign="top">Equivalent to
+<b>&minus;&minus;excmd</b>=<i>number</i>.</p> </td>
+<td width="35%">
+</td>
+<tr valign="top" align="left">
+<td width="11%"></td>
+<td width="3%">
+
+
+<p><b>&minus;N</b></p></td>
+<td width="4%"></td>
+<td width="47%">
+
+
+<p>Equivalent to
+<b>&minus;&minus;excmd</b>=<i>pattern</i>.</p> </td>
+<td width="35%">
+</td>
+</table>
+
+<p style="margin-left:11%;"><b>&minus;o</b>
+<i>tagfile</i></p>
+
+<p style="margin-left:18%;">Equivalent to <b>&minus;f</b>
+<i>tagfile</i>.</p>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="11%"></td>
+<td width="3%">
+
+
+
+<p style="margin-top: 1em" valign="top"><b>&minus;R</b></p> </td>
+<td width="4%"></td>
+<td width="82%">
+
+
+<p style="margin-top: 1em" valign="top">Equivalent to
+<b>&minus;&minus;recurse</b>.</p> </td>
+<tr valign="top" align="left">
+<td width="11%"></td>
+<td width="3%">
+
+
+<p><b>&minus;u</b></p></td>
+<td width="4%"></td>
+<td width="82%">
+
+
+<p>Equivalent to <b>&minus;&minus;sort</b>=<i>no</i> (i.e.
+&quot;unsorted&quot;).</p> </td>
+<tr valign="top" align="left">
+<td width="11%"></td>
+<td width="3%">
+
+
+<p><b>&minus;V</b></p></td>
+<td width="4%"></td>
+<td width="82%">
+
+
+<p>Equivalent to <b>&minus;&minus;verbose</b>.</p></td>
+<tr valign="top" align="left">
+<td width="11%"></td>
+<td width="3%">
+
+
+<p><b>&minus;w</b></p></td>
+<td width="4%"></td>
+<td width="82%">
+
+
+<p>This option is silently ignored for
+backward-compatibility with the ctags of SVR4 Unix.</p></td>
+<tr valign="top" align="left">
+<td width="11%"></td>
+<td width="3%">
+
+
+<p><b>&minus;x</b></p></td>
+<td width="4%"></td>
+<td width="82%">
+
+
+<p>Print a tabular, human-readable cross reference (xref)
+file to standard output instead of generating a tag file.
+The information contained in the output includes: the tag
+name; the kind of tag; the line number, file name, and
+source line (with extra white space condensed) of the file
+which defines the tag. No tag file is written and all
+options affecting tag file output will be ignored. Example
+applications for this feature are generating a listing of
+all functions located in a source file (e.g. <b>ctags
+&minus;x &minus;&minus;c&minus;kinds</b>=<i>f file</i>), or
+generating a list of all externally visible global variables
+located in a source file (e.g. <b>ctags &minus;x
+&minus;&minus;c&minus;kinds</b>=<i>v</i>
+<b>&minus;&minus;file&minus;scope</b>=<i>no file</i>). This
+option must appear before the first file name.</p></td>
+</table>
+
+
+<p style="margin-left:11%;"><b>&minus;&minus;append</b>[=<i>yes</i>|<i>no</i>]</p>
+
+<p style="margin-left:18%;">Indicates whether tags
+generated from the specified files should be appended to
+those already present in the tag file or should replace
+them. This option is off by default. This option must appear
+before the first file name.</p>
+
+
+<p style="margin-left:11%;"><b>&minus;&minus;etags&minus;include</b>=<i>file</i></p>
+
+<p style="margin-left:18%;">Include a reference to
+<i>file</i> in the tag file. This option may be specified as
+many times as desired. This supports Emacs&rsquo; capability
+to use a tag file which &quot;includes&quot; other tag
+files. [Available only in etags mode]</p>
+
+
+<p style="margin-left:11%;"><b>&minus;&minus;exclude</b>=[<i>pattern</i>]</p>
+
+<p style="margin-left:18%;">Add <i>pattern</i> to a list of
+excluded files and directories. This option may be specified
+as many times as desired. For each file name considered by
+<b>ctags</b>, each <i>pattern</i> specified using this
+option will be compared against both the complete path (e.g.
+some/path/base.ext) and the base name (e.g. base.ext) of the
+file, thus allowing patterns which match a given file name
+irrespective of its path, or match only a specific path. If
+appropriate support is available from the runtime library of
+your C compiler, then <i>pattern</i> may contain the usual
+shell wildcards (not regular expressions) common on Unix (be
+sure to quote the option parameter to protect the wildcards
+from being expanded by the shell before being passed to
+<b>ctags</b>; also be aware that wildcards can match the
+slash character, &rsquo;/&rsquo;). You can determine if
+shell wildcards are available on your platform by examining
+the output of the <b>&minus;&minus;version</b> option, which
+will include &quot;+wildcards&quot; in the compiled feature
+list; otherwise, <i>pattern</i> is matched against file
+names using a simple textual comparison.</p>
+
+<p style="margin-left:18%; margin-top: 1em">If
+<i>pattern</i> begins with the character &rsquo;@&rsquo;,
+then the rest of the string is interpreted as a file name
+from which to read exclusion patterns, one per line. If
+<i>pattern</i> is empty, the list of excluded patterns is
+cleared. Note that at program startup, the default exclude
+list contains &quot;EIFGEN&quot;, &quot;SCCS&quot;,
+&quot;RCS&quot;, and &quot;CVS&quot;, which are names of
+directories for which it is generally not desirable to
+descend while processing the <b>&minus;&minus;recurse</b>
+option.</p>
+
+
+<p style="margin-left:11%;"><b>&minus;&minus;excmd</b>=<i>type</i></p>
+
+<p style="margin-left:18%;">Determines the type of EX
+command used to locate tags in the source file. [Ignored in
+etags mode]</p>
+
+<p style="margin-left:18%; margin-top: 1em">The valid
+values for <i>type</i> (either the entire word or the first
+letter is accepted) are:</p>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="18%"></td>
+<td width="10%">
+
+
+<p style="margin-top: 1em" valign="top"><i>number</i></p></td>
+<td width="4%"></td>
+<td width="68%">
+
+
+<p style="margin-top: 1em" valign="top">Use only line
+numbers in the tag file for locating tags. This has four
+advantages:</p> </td>
+</table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="32%"></td>
+<td width="3%">
+
+
+<p valign="top">1.</p></td>
+<td width="3%"></td>
+<td width="62%">
+
+
+<p valign="top">Significantly reduces the size of the
+resulting tag file.</p></td>
+<tr valign="top" align="left">
+<td width="32%"></td>
+<td width="3%">
+
+
+<p valign="top">2.</p></td>
+<td width="3%"></td>
+<td width="62%">
+
+
+<p valign="top">Eliminates failures to find tags because
+the line defining the tag has changed, causing the pattern
+match to fail (note that some editors, such as <b>vim</b>,
+are able to recover in many such instances).</p></td>
+<tr valign="top" align="left">
+<td width="32%"></td>
+<td width="3%">
+
+
+<p valign="top">3.</p></td>
+<td width="3%"></td>
+<td width="62%">
+
+
+<p valign="top">Eliminates finding identical matching, but
+incorrect, source lines (see <b>BUGS</b>, below).</p></td>
+<tr valign="top" align="left">
+<td width="32%"></td>
+<td width="3%">
+
+
+<p valign="top">4.</p></td>
+<td width="3%"></td>
+<td width="62%">
+
+
+<p valign="top">Retains separate entries in the tag file
+for lines which are identical in content. In <i>pattern</i>
+mode, duplicate entries are dropped because the search
+patterns they generate are identical, making the duplicate
+entries useless.</p></td>
+</table>
+
+<p style="margin-left:32%; margin-top: 1em">However, this
+option has one significant drawback: changes to the source
+files can cause the line numbers recorded in the tag file to
+no longer correspond to the lines in the source file,
+causing jumps to some tags to miss the target definition by
+one or more lines. Basically, this option is best used when
+the source code to which it is applied is not subject to
+change. Selecting this option type causes the following
+options to be ignored: <b>&minus;BF</b>.</p>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="18%"></td>
+<td width="11%">
+
+
+<p style="margin-top: 1em" valign="top"><i>pattern</i></p></td>
+<td width="3%"></td>
+<td width="68%">
+
+
+<p style="margin-top: 1em" valign="top">Use only search
+patterns for all tags, rather than the line numbers usually
+used for macro definitions. This has the advantage of not
+referencing obsolete line numbers when lines have been added
+or removed since the tag file was generated.</p></td>
+<tr valign="top" align="left">
+<td width="18%"></td>
+<td width="11%">
+
+
+<p><i>mixed</i></p></td>
+<td width="3%"></td>
+<td width="68%">
+
+
+<p>In this mode, patterns are generally used with a few
+exceptions. For C, line numbers are used for macro
+definition tags. This was the default format generated by
+the original <b>ctags</b> and is, therefore, retained as the
+default for this option. For Fortran, line numbers are used
+for common blocks because their corresponding source lines
+are generally identical, making pattern searches useless for
+finding all matches.</p></td>
+</table>
+
+
+<p style="margin-left:11%;"><b>&minus;&minus;extra</b>=<i>[+|&minus;]flags</i></p>
+
+<p style="margin-left:18%;">Specifies whether to include
+extra tag entries for certain kinds of information. The
+parameter <i>flags</i> is a set of one-letter flags, each
+representing one kind of extra tag entry to include in the
+tag file. If <i>flags</i> is preceded by by either the
+&rsquo;+&rsquo; or &rsquo;&minus;&rsquo; character, the
+effect of each flag is added to, or removed from, those
+currently enabled; otherwise the flags replace any current
+settings. The meaning of each flag is as follows:</p>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="23%"></td>
+<td width="2%">
+
+
+<p style="margin-top: 1em" valign="top"><i>f</i></p></td>
+<td width="4%"></td>
+<td width="71%">
+
+
+<p style="margin-top: 1em" valign="top">Include an entry
+for the base file name of every source file (e.g.
+&quot;example.c&quot;), which addresses the first line of
+the file.</p></td>
+<tr valign="top" align="left">
+<td width="23%"></td>
+<td width="2%">
+
+
+<p style="margin-top: 1em" valign="top"><i>q</i></p></td>
+<td width="4%"></td>
+<td width="71%">
+
+
+<p style="margin-top: 1em" valign="top">Include an extra
+class-qualified tag entry for each tag which is a member of
+a class (for languages for which this information is
+extracted; currently C++, Eiffel, and Java). The actual form
+of the qualified tag depends upon the language from which
+the tag was derived (using a form that is most natural for
+how qualified calls are specified in the language). For C++,
+it is in the form &quot;class::member&quot;; for Eiffel and
+Java, it is in the form &quot;class.member&quot;. This may
+allow easier location of a specific tags when multiple
+occurrences of a tag name occur in the tag file. Note,
+however, that this could potentially more than double the
+size of the tag file.</p></td>
+</table>
+
+
+<p style="margin-left:11%;"><b>&minus;&minus;fields</b>=<i>[+|&minus;]flags</i></p>
+
+<p style="margin-left:18%;">Specifies the available
+extension fields which are to be included in the entries of
+the tag file (see <b>TAG FILE FORMAT</b>, below, for more
+information). The parameter <i>flags</i> is a set of
+one-letter flags, each representing one type of extension
+field to include, with the following meanings (disabled by
+default unless indicated):</p>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="23%"></td>
+<td width="2%">
+
+
+<p style="margin-top: 1em" valign="top"><i>a</i></p></td>
+<td width="4%"></td>
+<td width="71%">
+
+
+<p style="margin-top: 1em" valign="top">Access (or export)
+of class members</p></td>
+<tr valign="top" align="left">
+<td width="23%"></td>
+<td width="2%">
+
+
+<p valign="top"><i>f</i></p></td>
+<td width="4%"></td>
+<td width="71%">
+
+
+<p valign="top">File-restricted scoping [enabled]</p></td>
+<tr valign="top" align="left">
+<td width="23%"></td>
+<td width="2%">
+
+
+<p valign="top"><i>i</i></p></td>
+<td width="4%"></td>
+<td width="71%">
+
+
+<p valign="top">Inheritance information</p></td>
+<tr valign="top" align="left">
+<td width="23%"></td>
+<td width="2%">
+
+
+<p valign="top"><i>k</i></p></td>
+<td width="4%"></td>
+<td width="71%">
+
+
+<p valign="top">Kind of tag as a single letter
+[enabled]</p> </td>
+<tr valign="top" align="left">
+<td width="23%"></td>
+<td width="2%">
+
+
+<p valign="top"><i>K</i></p></td>
+<td width="4%"></td>
+<td width="71%">
+
+
+<p valign="top">Kind of tag as full name</p></td>
+<tr valign="top" align="left">
+<td width="23%"></td>
+<td width="2%">
+
+
+<p valign="top"><i>l</i></p></td>
+<td width="4%"></td>
+<td width="71%">
+
+
+<p valign="top">Language of source file containing tag</p></td>
+<tr valign="top" align="left">
+<td width="23%"></td>
+<td width="2%">
+
+
+<p valign="top"><i>m</i></p></td>
+<td width="4%"></td>
+<td width="71%">
+
+
+<p valign="top">Implementation information</p></td>
+<tr valign="top" align="left">
+<td width="23%"></td>
+<td width="2%">
+
+
+<p valign="top"><i>n</i></p></td>
+<td width="4%"></td>
+<td width="71%">
+
+
+<p valign="top">Line number of tag definition</p></td>
+<tr valign="top" align="left">
+<td width="23%"></td>
+<td width="2%">
+
+
+<p valign="top"><i>s</i></p></td>
+<td width="4%"></td>
+<td width="71%">
+
+
+<p valign="top">Scope of tag definition [enabled]</p></td>
+<tr valign="top" align="left">
+<td width="23%"></td>
+<td width="2%">
+
+
+<p valign="top"><i>S</i></p></td>
+<td width="4%"></td>
+<td width="71%">
+
+
+<p valign="top">Signature of routine (e.g. prototype or
+parameter list)</p></td>
+<tr valign="top" align="left">
+<td width="23%"></td>
+<td width="2%">
+
+
+<p valign="top"><i>z</i></p></td>
+<td width="4%"></td>
+<td width="71%">
+
+
+<p valign="top">Include the &quot;kind:&quot; key in kind
+field</p> </td>
+<tr valign="top" align="left">
+<td width="23%"></td>
+<td width="2%">
+
+
+<p valign="top"><i>t</i></p></td>
+<td width="4%"></td>
+<td width="71%">
+
+
+<p valign="top">Type and name of a variable or typedef as
+&quot;typeref:&quot; field [enabled]</p></td>
+</table>
+
+<p style="margin-left:18%; margin-top: 1em">Each letter or
+group of letters may be preceded by either &rsquo;+&rsquo;
+to add it to the default set, or &rsquo;&minus;&rsquo; to
+exclude it. In the absence of any preceding &rsquo;+&rsquo;
+or &rsquo;&minus;&rsquo; sign, only those kinds explicitly
+listed in <i>flags</i> will be included in the output (i.e.
+overriding the default set). This option is ignored if the
+option <b>&minus;&minus;format</b>=<i>1</i> has been
+specified. The default value of this option is
+<i>fkst</i>.</p>
+
+
+<p style="margin-left:11%;"><b>&minus;&minus;file&minus;scope</b>[=<i>yes</i>|<i>no</i>]</p>
+
+<p style="margin-left:18%;">Indicates whether tags scoped
+only for a single file (i.e. tags which cannot be seen
+outside of the file in which they are defined, such as
+&quot;static&quot; tags) should be included in the output.
+See, also, the <b>&minus;h</b> option. This option is
+enabled by default.</p>
+
+
+<p style="margin-left:11%;"><b>&minus;&minus;filter</b>[=<i>yes</i>|<i>no</i>]</p>
+
+<p style="margin-left:18%;">Causes <b>ctags</b> to behave
+as a filter, reading source file names from standard input
+and printing their tags to standard output on a file-by-file
+basis. If <b>&minus;&minus;sorted</b> is enabled, tags are
+sorted only within the source file in which they are
+defined. File names are read from standard input in
+line-oriented input mode (see note for <b>&minus;L</b>
+option) and only after file names listed on the command line
+or from any file supplied using the <b>&minus;L</b> option.
+When this option is enabled, the options <b>&minus;f</b>,
+<b>&minus;o</b>, and <b>&minus;&minus;totals</b> are
+ignored. This option is quite esoteric and is disabled by
+default. This option must appear before the first file
+name.</p>
+
+
+<p style="margin-left:11%;"><b>&minus;&minus;filter&minus;terminator</b>=<i>string</i></p>
+
+<p style="margin-left:18%;">Specifies a string to print to
+standard output following the tags for each file name parsed
+when the <b>&minus;&minus;filter</b> option is enabled. This
+may permit an application reading the output of ctags to
+determine when the output for each file is finished. Note
+that if the file name read is a directory and
+<b>&minus;&minus;recurse</b> is enabled, this string will be
+printed only one once at the end of all tags found for by
+descending the directory. This string will always be
+separated from the last tag line for the file by its
+terminating newline. This option is quite esoteric and is
+empty by default. This option must appear before the first
+file name.</p>
+
+
+<p style="margin-left:11%;"><b>&minus;&minus;format</b>=<i>level</i></p>
+
+<p style="margin-left:18%;">Change the format of the output
+tag file. Currently the only valid values for <i>level</i>
+are <i>1</i> or <i>2</i>. Level 1 specifies the original tag
+file format and level 2 specifies a new extended format
+containing extension fields (but in a manner which retains
+backward-compatibility with original <b>vi</b>(1)
+implementations). The default level is 2. This option must
+appear before the first file name. [Ignored in etags
+mode]</p>
+
+<p style="margin-left:11%;"><b>&minus;&minus;help</b></p>
+
+<p style="margin-left:18%;">Prints to standard output a
+detailed usage description, and then exits.</p>
+
+
+<p style="margin-left:11%;"><b>&minus;&minus;if0</b>[=<i>yes</i>|<i>no</i>]</p>
+
+<p style="margin-left:18%;">Indicates a preference as to
+whether code within an &quot;#if 0&quot; branch of a
+preprocessor conditional should be examined for non-macro
+tags (macro tags are always included). Because the intent of
+this construct is to disable code, the default value of this
+option is <i>no</i>. Note that this indicates a preference
+only and does not guarantee skipping code within an
+&quot;#if 0&quot; branch, since the fall-back algorithm used
+to generate tags when preprocessor conditionals are too
+complex follows all branches of a conditional. This option
+is disabled by default.</p>
+
+
+<p style="margin-left:11%;"><b>&minus;&minus;&lt;LANG&gt;&minus;kinds</b>=<i>[+|&minus;]kinds</i></p>
+
+<p style="margin-left:18%;">Specifies a list of
+language-specific kinds of tags (or kinds) to include in the
+output file for a particular language, where
+<b>&lt;LANG&gt;</b> is case-insensitive and is one of the
+built-in language names (see the
+<b>&minus;&minus;list&minus;languages</b> option for a
+complete list). The parameter <i>kinds</i> is a group of
+one-letter flags designating kinds of tags (particular to
+the language) to either include or exclude from the output.
+The specific sets of flags recognized for each language,
+their meanings and defaults may be list using the
+<b>&minus;&minus;list&minus;kinds</b> option. Each letter or
+group of letters may be preceded by either &rsquo;+&rsquo;
+to add it to, or &rsquo;&minus;&rsquo; to remove it from,
+the default set. In the absence of any preceding
+&rsquo;+&rsquo; or &rsquo;&minus;&rsquo; sign, only those
+kinds explicitly listed in <i>kinds</i> will be included in
+the output (i.e. overriding the default for the specified
+language).</p>
+
+<p style="margin-left:18%; margin-top: 1em">As an example
+for the C language, in order to add prototypes and external
+variable declarations to the default set of tag kinds, but
+exclude macros, use
+<b>&minus;&minus;c&minus;kinds</b>=<i>+px&minus;d</i>; to
+include only tags for functions, use
+<b>&minus;&minus;c&minus;kinds</b>=<i>f</i>.</p>
+
+
+<p style="margin-left:11%;"><b>&minus;&minus;langdef</b>=<i>name</i></p>
+
+<p style="margin-left:18%;">Defines a new user-defined
+language, <i>name</i>, to be parsed with regular
+expressions. Once defined, <i>name</i> may be used in other
+options taking language names. The typical use of this
+option is to first define the language, then map file names
+to it using <i>&minus;&minus;langmap</i>, then specify
+regular expressions using
+<i>&minus;&minus;regex&minus;&lt;LANG&gt;</i> to define how
+its tags are found.</p>
+
+
+<p style="margin-left:11%;"><b>&minus;&minus;langmap</b>=<i>map[,map[...]]</i></p>
+
+<p style="margin-left:18%;">Controls how file names are
+mapped to languages (see the
+<b>&minus;&minus;list&minus;maps</b> option). Each
+comma-separated <i>map</i> consists of the language name
+(either a built-in or user-defined language), a colon, and a
+list of file extensions and/or file name patterns. A file
+extension is specified by preceding the extension with a
+period (e.g. &quot;.c&quot;). A file name pattern is
+specified by enclosing the pattern in parentheses (e.g.
+&quot;([Mm]akefile)&quot;). If appropriate support is
+available from the runtime library of your C compiler, then
+the file name pattern may contain the usual shell wildcards
+common on Unix (be sure to quote the option parameter to
+protect the wildcards from being expanded by the shell
+before being passed to <b>ctags</b>). You can determine if
+shell wildcards are available on your platform by examining
+the output of the <b>&minus;&minus;version</b> option, which
+will include &quot;+wildcards&quot; in the compiled feature
+list; otherwise, the file name patterns are matched against
+file names using a simple textual comparison. When mapping a
+file extension, it will first be unmapped from any other
+languages.</p>
+
+<p style="margin-left:18%; margin-top: 1em">If the first
+character in a map is a plus sign, then the extensions and
+file name patterns in that map will be appended to the
+current map for that language; otherwise, the map will
+replace the current map. For example, to specify that only
+files with extensions of .c and .x are to be treated as C
+language files, use
+&quot;<b>&minus;&minus;langmap</b>=<i>c:.c.x</i>&quot;; to
+also add files with extensions of .j as Java language files,
+specify
+&quot;<b>&minus;&minus;langmap</b>=<i>c:.c.x,java:+.j</i>&quot;.
+To map makefiles (e.g. files named either
+&quot;Makefile&quot;, &quot;makefile&quot;, or having the
+extension &quot;.mak&quot;) to a language called
+&quot;make&quot;, specify
+&quot;<b>&minus;&minus;langmap</b>=<i>make:([Mm]akefile).mak</i>&quot;.
+To map files having no extension, specify a period not
+followed by a non-period character (e.g. &quot;.&quot;,
+&quot;..x&quot;, &quot;.x.&quot;). To clear the mapping for
+a particular language (thus inhibiting automatic generation
+of tags for that language), specify an empty extension list
+(e.g.
+&quot;<b>&minus;&minus;langmap</b>=<i>fortran:</i>&quot;).
+To restore the default language mappings for all a
+particular language, supply the keyword &quot;default&quot;
+for the mapping. To specify restore the default language
+mappings for all languages, specify
+&quot;<b>&minus;&minus;langmap</b>=<i>default</i>&quot;.
+Note that file extensions are tested before file name
+patterns when inferring the language of a file.</p>
+
+
+<p style="margin-left:11%;"><b>&minus;&minus;language&minus;force</b>=<i>language</i></p>
+
+<p style="margin-left:18%;">By default, <b>ctags</b>
+automatically selects the language of a source file,
+ignoring those files whose language cannot be determined
+(see <b>SOURCE FILES</b>, above). This option forces the
+specified <i>language</i> (case-insensitive; either built-in
+or user-defined) to be used for every supplied file instead
+of automatically selecting the language based upon its
+extension. In addition, the special value <i>auto</i>
+indicates that the language should be automatically selected
+(which effectively disables this option).</p>
+
+
+<p style="margin-left:11%;"><b>&minus;&minus;languages</b>=<i>[+|&minus;]list</i></p>
+
+<p style="margin-left:18%;">Specifies the languages for
+which tag generation is enabled, with <i>list</i> containing
+a comma-separated list of language names (case-insensitive;
+either built-in or user-defined). If the first language of
+<i>list</i> is not preceded by either a &rsquo;+&rsquo; or
+&rsquo;&minus;&rsquo;, the current list will be cleared
+before adding or removing the languages in <i>list</i>.
+Until a &rsquo;&minus;&rsquo; is encountered, each language
+in the list will be added to the current list. As either the
+&rsquo;+&rsquo; or &rsquo;&minus;&rsquo; is encountered in
+the list, the languages following it are added or removed
+from the current list, respectively. Thus, it becomes simple
+to replace the current list with a new one, or to add or
+remove languages from the current list. The actual list of
+files for which tags will be generated depends upon the
+language extension mapping in effect (see the
+<b>&minus;&minus;langmap</b> option). Note that all
+languages, including user-defined languages are enabled
+unless explicitly disabled using this option. Language names
+included in <i>list</i> may be any built-in language or one
+previously defined with <b>&minus;&minus;langdef</b>. The
+default is &quot;all&quot;, which is also accepted as a
+valid argument. See the
+<b>&minus;&minus;list&minus;languages</b> option for a
+complete list of the built-in language names.</p>
+
+
+<p style="margin-left:11%;"><b>&minus;&minus;license</b></p>
+
+<p style="margin-left:18%;">Prints a summary of the
+software license to standard output, and then exits.</p>
+
+
+<p style="margin-left:11%;"><b>&minus;&minus;line&minus;directives</b>[=<i>yes</i>|<i>no</i>]</p>
+
+<p style="margin-left:18%;">Specifies whether
+&quot;#line&quot; directives should be recognized. These are
+present in the output of preprocessors and contain the line
+number, and possibly the file name, of the original source
+file(s) from which the preprocessor output file was
+generated. When enabled, this option will cause <b>ctags</b>
+to generate tag entries marked with the file names and line
+numbers of their locations original source file(s), instead
+of their actual locations in the preprocessor output. The
+actual file names placed into the tag file will have the
+same leading path components as the preprocessor output
+file, since it is assumed that the original source files are
+located relative to the preprocessor output file (unless, of
+course, the #line directive specifies an absolute path).
+This option is off by default. <b>Note:</b> This option is
+generally only useful when used together with the
+<b>&minus;&minus;excmd</b>=<i>number</i> (<b>&minus;n</b>)
+option. Also, you may have to use either the
+<b>&minus;&minus;langmap</b> or
+<b>&minus;&minus;language&minus;force</b> option if the
+extension of the preprocessor output file is not known to
+<b>ctags</b>.</p>
+
+
+<p style="margin-left:11%;"><b>&minus;&minus;links</b>[=<i>yes</i>|<i>no</i>]</p>
+
+<p style="margin-left:18%;">Indicates whether symbolic
+links (if supported) should be followed. When disabled,
+symbolic links are ignored. This option is on by
+default.</p>
+
+
+<p style="margin-left:11%;"><b>&minus;&minus;list&minus;kinds</b>[=<i>language</i>|<i>all</i>]</p>
+
+<p style="margin-left:18%;">Lists the tag kinds recognized
+for either the specified language or all languages, and then
+exits. Each kind of tag recorded in the tag file is
+represented by a one-letter flag, which is also used to
+filter the tags placed into the output through use of the
+<b>&minus;&minus;&lt;LANG&gt;&minus;kinds</b> option. Note
+that some languages and/or tag kinds may be implemented
+using regular expressions and may not be available if regex
+support is not compiled into <b>ctags</b> (see the
+<b>&minus;&minus;regex&minus;&lt;LANG&gt;</b> option). Each
+kind listed is enabled unless followed by
+&quot;[off]&quot;.</p>
+
+
+<p style="margin-left:11%;"><b>&minus;&minus;list&minus;maps</b>[=<i>language</i>|<i>all</i>]</p>
+
+<p style="margin-left:18%;">Lists the file extensions and
+file name patterns which associate a file name with a
+language for either the specified language or all languages,
+and then exits. See the <b>&minus;&minus;langmap</b> option,
+and <b>SOURCE FILES</b>, above.</p>
+
+
+<p style="margin-left:11%;"><b>&minus;&minus;list&minus;languages</b></p>
+
+<p style="margin-left:18%;">Lists the names of the
+languages understood by <b>ctags</b>, and then exits. These
+language names are case insensitive and may be used in the
+<b>&minus;&minus;language&minus;force</b>,
+<b>&minus;&minus;languages</b>,
+<b>&minus;&minus;&lt;LANG&gt;&minus;kinds</b>, and
+<b>&minus;&minus;regex&minus;&lt;LANG&gt;</b> options.</p>
+
+
+<p style="margin-left:11%;"><b>&minus;&minus;options</b>=<i>file</i></p>
+
+<p style="margin-left:18%;">Read additional options from
+<i>file</i>. The file should contain one option per line. As
+a special case, if <b>&minus;&minus;options</b>=<i>NONE</i>
+is specified as the first option on the command line, it
+will disable the automatic reading of any configuration
+options from either a file or the environment (see
+<b>FILES</b>).</p>
+
+
+<p style="margin-left:11%;"><b>&minus;&minus;recurse</b>[=<i>yes</i>|<i>no</i>]</p>
+
+<p style="margin-left:18%;">Recurse into directories
+encountered in the list of supplied files. If the list of
+supplied files is empty and no file list is specified with
+the <b>&minus;L</b> option, then the current directory (i.e.
+&quot;.&quot;) is assumed. Symbolic links are followed. If
+you don&rsquo;t like these behaviors, either explicitly
+specify the files or pipe the output of <b>find</b>(1) into
+<b>ctags &minus;L&minus;</b> instead. <b>Note:</b> This
+option is not supported on all platforms at present. It is
+available if the output of the <b>&minus;&minus;help</b>
+option includes this option. See, also, the
+<b>&minus;&minus;exclude</b> to limit recursion.</p>
+
+
+<p style="margin-left:11%;"><b>&minus;&minus;regex&minus;&lt;LANG&gt;</b>=<i>/regexp/replacement/[kind&minus;spec/][flags]</i></p>
+
+<p style="margin-left:18%;">The <i>/regexp/replacement/</i>
+pair define a regular expression replacement pattern,
+similar in style to <b>sed</b> substitution commands, with
+which to generate tags from source files mapped to the named
+language, <b>&lt;LANG&gt;</b>, (case-insensitive; either a
+built-in or user-defined language). The regular expression,
+<i>regexp</i>, defines an extended regular expression
+(roughly that used by <b>egrep</b>(1)), which is used to
+locate a single source line containing a tag and may specify
+tab characters using \t. When a matching line is found, a
+tag will be generated for the name defined by
+<i>replacement</i>, which generally will contain the special
+back-references \1 through \9 to refer to matching
+sub-expression groups within <i>regexp</i>. The
+&rsquo;/&rsquo; separator characters shown in the parameter
+to the option can actually be replaced by any character.
+Note that whichever separator character is used will have to
+be escaped with a backslash (&rsquo;\&rsquo;) character
+wherever it is used in the parameter as something other than
+a separator. The regular expression defined by this option
+is added to the current list of regular expressions for the
+specified language unless the parameter is omitted, in which
+case the current list is cleared.</p>
+
+<p style="margin-left:18%; margin-top: 1em">Unless modified
+by <i>flags</i>, <i>regexp</i> is interpreted as a Posix
+extended regular expression. The <i>replacement</i> should
+expand for all matching lines to a non-empty string of
+characters, or a warning message will be reported. An
+optional kind specifier for tags matching <i>regexp</i> may
+follow <i>replacement</i>, which will determine what kind of
+tag is reported in the &quot;kind&quot; extension field (see
+<b>TAG FILE FORMAT</b>, below). The full form of
+<i>kind&minus;spec</i> is in the form of a single letter, a
+comma, a name (without spaces), a comma, a description,
+followed by a separator, which specify the short and long
+forms of the kind value and its textual description
+(displayed using <b>&minus;&minus;list&minus;kinds</b>).
+Either the kind name and/or the description may be omitted.
+If <i>kind&minus;spec</i> is omitted, it defaults to
+&quot;<i>r,regex</i>&quot;. Finally, <i>flags</i> are one or
+more single-letter characters having the following effect
+upon the interpretation of <i>regexp</i>:</p>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="23%"></td>
+<td width="2%">
+
+
+<p style="margin-top: 1em" valign="top"><i>b</i></p></td>
+<td width="4%"></td>
+<td width="71%">
+
+
+<p style="margin-top: 1em" valign="top">The pattern is
+interpreted as a Posix basic regular expression.</p></td>
+<tr valign="top" align="left">
+<td width="23%"></td>
+<td width="2%">
+
+
+<p style="margin-top: 1em" valign="top"><i>e</i></p></td>
+<td width="4%"></td>
+<td width="71%">
+
+
+<p style="margin-top: 1em" valign="top">The pattern is
+interpreted as a Posix extended regular expression
+(default).</p> </td>
+<tr valign="top" align="left">
+<td width="23%"></td>
+<td width="2%">
+
+
+<p style="margin-top: 1em" valign="top"><i>i</i></p></td>
+<td width="4%"></td>
+<td width="71%">
+
+
+<p style="margin-top: 1em" valign="top">The regular
+expression is to be applied in a case-insensitive
+manner.</p> </td>
+</table>
+
+<p style="margin-left:18%; margin-top: 1em">Note that this
+option is available only if <b>ctags</b> was compiled with
+support for regular expressions, which depends upon your
+platform. You can determine if support for regular
+expressions is compiled in by examining the output of the
+<b>&minus;&minus;version</b> option, which will include
+&quot;+regex&quot; in the compiled feature list.</p>
+
+<p style="margin-left:18%; margin-top: 1em">For more
+information on the regular expressions used by <b>ctags</b>,
+see either the <b>regex(5,7)</b> man page, or the GNU info
+documentation for regex (e.g. &quot;info regex&quot;).</p>
+
+
+<p style="margin-left:11%;"><b>&minus;&minus;sort</b>[=<i>yes</i>|<i>no</i>|<i>foldcase</i>]</p>
+
+<p style="margin-left:18%;">Indicates whether the tag file
+should be sorted on the tag name (default is <i>yes</i>).
+Note that the original <b>vi</b>(1) required sorted tags.
+The <i>foldcase</i> value specifies case insensitive (or
+case-folded) sorting. Fast binary searches of tag files
+sorted with case-folding will require special support from
+tools using tag files, such as that found in the ctags
+readtags library, or Vim version 6.2 or higher (using
+&quot;set ignorecase&quot;). This option must appear before
+the first file name. [Ignored in etags mode]</p>
+
+
+<p style="margin-left:11%;"><b>&minus;&minus;tag&minus;relative</b>[=<i>yes</i>|<i>no</i>]</p>
+
+<p style="margin-left:18%;">Indicates that the file paths
+recorded in the tag file should be relative to the directory
+containing the tag file, rather than relative to the current
+directory, unless the files supplied on the command line are
+specified with absolute paths. This option must appear
+before the first file name. The default is <i>yes</i> when
+running in etags mode (see the <b>&minus;e</b> option),
+<i>no</i> otherwise.</p>
+
+
+<p style="margin-left:11%;"><b>&minus;&minus;totals</b>[=<i>yes</i>|<i>no</i>]</p>
+
+<p style="margin-left:18%;">Prints statistics about the
+source files read and the tag file written during the
+current invocation of <b>ctags</b>. This option is off by
+default. This option must appear before the first file
+name.</p>
+
+
+<p style="margin-left:11%;"><b>&minus;&minus;verbose</b>[=<i>yes</i>|<i>no</i>]</p>
+
+<p style="margin-left:18%;">Enable verbose mode. This
+prints out information on option processing and a brief
+message describing what action is being taken for each file
+considered by <b>ctags</b>. Normally, <b>ctags</b> does not
+read command line arguments until after options are read
+from the configuration files (see <b>FILES</b>, below) and
+the <b>CTAGS</b> environment variable. However, if this
+option is the first argument on the command line, it will
+take effect before any options are read from these sources.
+The default is <i>no</i>.</p>
+
+
+<p style="margin-left:11%;"><b>&minus;&minus;version</b></p>
+
+<p style="margin-left:18%;">Prints a version identifier for
+<b>ctags</b> to standard output, and then exits. This is
+guaranteed to always contain the string &quot;Exuberant
+Ctags&quot;.</p>
+
+<a name="OPERATIONAL DETAILS"></a>
+<h2>OPERATIONAL DETAILS</h2>
+
+
+<p style="margin-left:11%; margin-top: 1em">As <b>ctags</b>
+considers each file name in turn, it tries to determine the
+language of the file by applying the following three tests
+in order: if the file extension has been mapped to a
+language, if the file name matches a shell pattern mapped to
+a language, and finally if the file is executable and its
+first line specifies an interpreter using the Unix-style
+&quot;#!&quot; specification (if supported on the platform).
+If a language was identified, the file is opened and then
+the appropriate language parser is called to operate on the
+currently open file. The parser parses through the file and
+adds an entry to the tag file for each language object it is
+written to handle. See <b>TAG FILE FORMAT</b>, below, for
+details on these entries.</p>
+
+<p style="margin-left:11%; margin-top: 1em">This
+implementation of <b>ctags</b> imposes no formatting
+requirements on C code as do legacy implementations. Older
+implementations of ctags tended to rely upon certain
+formatting assumptions in order to help it resolve coding
+dilemmas caused by preprocessor conditionals.</p>
+
+<p style="margin-left:11%; margin-top: 1em">In general,
+<b>ctags</b> tries to be smart about conditional
+preprocessor directives. If a preprocessor conditional is
+encountered within a statement which defines a tag,
+<b>ctags</b> follows only the first branch of that
+conditional (except in the special case of &quot;#if
+0&quot;, in which case it follows only the last branch). The
+reason for this is that failing to pursue only one branch
+can result in ambiguous syntax, as in the following
+example:</p>
+
+<p style="margin-left:22%; margin-top: 1em">#ifdef
+TWO_ALTERNATIVES <br>
+struct { <br>
+#else <br>
+union { <br>
+#endif</p>
+
+<p style="margin-left:28%;">short a; <br>
+long b;</p>
+
+<p style="margin-left:22%;">}</p>
+
+<p style="margin-left:11%; margin-top: 1em">Both branches
+cannot be followed, or braces become unbalanced and
+<b>ctags</b> would be unable to make sense of the
+syntax.</p>
+
+<p style="margin-left:11%; margin-top: 1em">If the
+application of this heuristic fails to properly parse a
+file, generally due to complicated and inconsistent pairing
+within the conditionals, <b>ctags</b> will retry the file
+using a different heuristic which does not selectively
+follow conditional preprocessor branches, but instead falls
+back to relying upon a closing brace (&quot;}&quot;) in
+column 1 as indicating the end of a block once any brace
+imbalance results from following a #if conditional
+branch.</p>
+
+<p style="margin-left:11%; margin-top: 1em"><b>Ctags</b>
+will also try to specially handle arguments lists enclosed
+in double sets of parentheses in order to accept the
+following conditional construct:</p>
+
+<p style="margin-left:22%; margin-top: 1em">extern void foo
+__ARGS((int one, char two));</p>
+
+<p style="margin-left:11%; margin-top: 1em">Any name
+immediately preceding the &quot;((&quot; will be
+automatically ignored and the previous name will be
+used.</p>
+
+<p style="margin-left:11%; margin-top: 1em">C++ operator
+definitions are specially handled. In order for consistency
+with all types of operators (overloaded and conversion), the
+operator name in the tag file will always be preceded by the
+string &quot;operator &quot; (i.e. even if the actual
+operator definition was written as
+&quot;operator&lt;&lt;&quot;).</p>
+
+<p style="margin-left:11%; margin-top: 1em">After creating
+or appending to the tag file, it is sorted by the tag name,
+removing identical tag lines.</p>
+
+<a name="TAG FILE FORMAT"></a>
+<h2>TAG FILE FORMAT</h2>
+
+
+<p style="margin-left:11%; margin-top: 1em">When not
+running in etags mode, each entry in the tag file consists
+of a separate line, each looking like this in the most
+general case:</p>
+
+
+<p style="margin-left:12%; margin-top: 1em">tag_name&lt;TAB&gt;file_name&lt;TAB&gt;ex_cmd;&quot;&lt;TAB&gt;extension_fields</p>
+
+<p style="margin-left:11%; margin-top: 1em">The fields and
+separators of these lines are specified as follows:</p>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="17%"></td>
+<td width="3%">
+
+
+<p style="margin-top: 1em" valign="top">1.</p></td>
+<td width="3%"></td>
+<td width="77%">
+
+
+<p style="margin-top: 1em" valign="top">tag name</p></td>
+<tr valign="top" align="left">
+<td width="17%"></td>
+<td width="3%">
+
+
+<p valign="top">2.</p></td>
+<td width="3%"></td>
+<td width="77%">
+
+
+<p valign="top">single tab character</p></td>
+<tr valign="top" align="left">
+<td width="17%"></td>
+<td width="3%">
+
+
+<p valign="top">3.</p></td>
+<td width="3%"></td>
+<td width="77%">
+
+
+<p valign="top">name of the file in which the object
+associated with the tag is located</p></td>
+<tr valign="top" align="left">
+<td width="17%"></td>
+<td width="3%">
+
+
+<p valign="top">4.</p></td>
+<td width="3%"></td>
+<td width="77%">
+
+
+<p valign="top">single tab character</p></td>
+<tr valign="top" align="left">
+<td width="17%"></td>
+<td width="3%">
+
+
+<p valign="top">5.</p></td>
+<td width="3%"></td>
+<td width="77%">
+
+
+<p valign="top">EX command used to locate the tag within
+the file; generally a search pattern (either /pattern/ or
+?pattern?) or line number (see <b>&minus;&minus;excmd</b>).
+Tag file format 2 (see <b>&minus;&minus;format</b>) extends
+this EX command under certain circumstances to include a set
+of extension fields (described below) embedded in an EX
+comment immediately appended to the EX command, which leaves
+it backward-compatible with original <b>vi</b>(1)
+implementations.</p> </td>
+</table>
+
+<p style="margin-left:11%; margin-top: 1em">A few special
+tags are written into the tag file for internal purposes.
+These tags are composed in such a way that they always sort
+to the top of the file. Therefore, the first two characters
+of these tags are used a magic number to detect a tag file
+for purposes of determining whether a valid tag file is
+being overwritten rather than a source file.</p>
+
+<p style="margin-left:11%; margin-top: 1em">Note that the
+name of each source file will be recorded in the tag file
+exactly as it appears on the command line. Therefore, if the
+path you specified on the command line was relative to the
+current directory, then it will be recorded in that same
+manner in the tag file. See, however, the
+<b>&minus;&minus;tag&minus;relative</b> option for how this
+behavior can be modified.</p>
+
+<p style="margin-left:11%; margin-top: 1em">Extension
+fields are tab-separated key-value pairs appended to the end
+of the EX command as a comment, as described above. These
+key value pairs appear in the general form
+&quot;<i>key</i>:<i>value</i>&quot;. Their presence in the
+lines of the tag file are controlled by the
+<b>&minus;&minus;fields</b> option. The possible keys and
+the meaning of their values are as follows:</p>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="11%"></td>
+<td width="9%">
+
+
+<p style="margin-top: 1em" valign="top"><i>access</i></p></td>
+<td width="9%"></td>
+<td width="71%">
+
+
+<p style="margin-top: 1em" valign="top">Indicates the
+visibility of this class member, where <i>value</i> is
+specific to the language.</p></td>
+<tr valign="top" align="left">
+<td width="11%"></td>
+<td width="9%">
+
+
+<p><i>file</i></p></td>
+<td width="9%"></td>
+<td width="71%">
+
+
+<p>Indicates that the tag has file-limited visibility. This
+key has no corresponding value.</p></td>
+<tr valign="top" align="left">
+<td width="11%"></td>
+<td width="9%">
+
+
+<p><i>kind</i></p></td>
+<td width="9%"></td>
+<td width="71%">
+
+
+<p>Indicates the type, or kind, of tag. Its value is either
+one of the corresponding one-letter flags described under
+the various <b>&minus;&minus;&lt;LANG&gt;&minus;kinds</b>
+options above, or a full name. It is permitted (and is, in
+fact, the default) for the key portion of this field to be
+omitted. The optional behaviors are controlled with the
+<b>&minus;&minus;fields</b> option.</p></td>
+</table>
+
+<p style="margin-left:11%;"><i>implementation</i></p>
+
+<p style="margin-left:29%;">When present, this indicates a
+limited implementation (abstract vs. concrete) of a routine
+or class, where <i>value</i> is specific to the language
+(&quot;virtual&quot; or &quot;pure virtual&quot; for C++;
+&quot;abstract&quot; for Java).</p>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="11%"></td>
+<td width="14%">
+
+
+
+<p style="margin-top: 1em" valign="top"><i>inherits</i></p> </td>
+<td width="4%"></td>
+<td width="71%">
+
+
+<p style="margin-top: 1em" valign="top">When present,
+<i>value</i>. is a comma-separated list of classes from
+which this class is derived (i.e. inherits from).</p></td>
+<tr valign="top" align="left">
+<td width="11%"></td>
+<td width="14%">
+
+
+<p><i>signature</i></p></td>
+<td width="4%"></td>
+<td width="71%">
+
+
+<p>When present, <i>value</i> is a language-dependent
+representation of the signature of a routine. A routine
+signature in its complete form specifies the return type of
+a routine and its formal argument list. This extension field
+is presently supported only for C-based languages and does
+not include the return type.</p></td>
+</table>
+
+<p style="margin-left:11%; margin-top: 1em">In addition,
+information on the scope of the tag definition may be
+available, with the key portion equal to some
+language-dependent construct name and its value the name
+declared for that construct in the program. This scope entry
+indicates the scope in which the tag was found. For example,
+a tag generated for a C structure member would have a scope
+looking like &quot;struct:myStruct&quot;.</p>
+
+<a name="HOW TO USE WITH VI"></a>
+<h2>HOW TO USE WITH VI</h2>
+
+
+<p style="margin-left:11%; margin-top: 1em">Vi will, by
+default, expect a tag file by the name &quot;tags&quot; in
+the current directory. Once the tag file is built, the
+following commands exercise the tag indexing feature:</p>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="11%"></td>
+<td width="14%">
+
+
+<p style="margin-top: 1em" valign="top"><b>vi &minus;t
+tag</b></p> </td>
+<td width="4%"></td>
+<td width="71%">
+
+
+<p style="margin-top: 1em" valign="top">Start vi and
+position the cursor at the file and line where
+&quot;tag&quot; is defined.</p></td>
+<tr valign="top" align="left">
+<td width="11%"></td>
+<td width="14%">
+
+
+<p style="margin-top: 1em" valign="top"><b>:ta tag</b></p></td>
+<td width="4%"></td>
+<td width="71%">
+
+
+<p style="margin-top: 1em" valign="top">Find a tag.</p></td>
+<tr valign="top" align="left">
+<td width="11%"></td>
+<td width="14%">
+
+
+<p style="margin-top: 1em" valign="top"><b>Ctrl-]</b></p></td>
+<td width="4%"></td>
+<td width="71%">
+
+
+<p style="margin-top: 1em" valign="top">Find the tag under
+the cursor.</p></td>
+<tr valign="top" align="left">
+<td width="11%"></td>
+<td width="14%">
+
+
+<p style="margin-top: 1em" valign="top"><b>Ctrl-T</b></p></td>
+<td width="4%"></td>
+<td width="71%">
+
+
+<p style="margin-top: 1em" valign="top">Return to previous
+location before jump to tag (not widely implemented).</p></td>
+</table>
+
+<a name="HOW TO USE WITH GNU EMACS"></a>
+<h2>HOW TO USE WITH GNU EMACS</h2>
+
+
+<p style="margin-left:11%; margin-top: 1em">Emacs will, by
+default, expect a tag file by the name &quot;TAGS&quot; in
+the current directory. Once the tag file is built, the
+following commands exercise the tag indexing feature:
+<b><br>
+M-x visit&minus;tags&minus;table &lt;RET&gt; FILE
+&lt;RET&gt;</b></p>
+
+<p style="margin-left:26%;">Select the tag file,
+&quot;FILE&quot;, to use.</p>
+
+<p style="margin-left:11%;"><b>M-. [TAG]
+&lt;RET&gt;</b></p>
+
+<p style="margin-left:26%;">Find the first definition of
+TAG. The default tag is the identifier under the cursor.</p>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="11%"></td>
+<td width="11%">
+
+
+<p style="margin-top: 1em" valign="top"><b>M-*</b></p></td>
+<td width="4%"></td>
+<td width="72%">
+
+
+<p style="margin-top: 1em" valign="top">Pop back to where
+you previously invoked &quot;M-.&quot;.</p></td>
+<td width="2%">
+</td>
+<tr valign="top" align="left">
+<td width="11%"></td>
+<td width="11%">
+
+
+<p style="margin-top: 1em" valign="top"><b>C-u M-.</b></p></td>
+<td width="4%"></td>
+<td width="72%">
+
+
+<p style="margin-top: 1em" valign="top">Find the next
+definition for the last tag.</p></td>
+<td width="2%">
+</td>
+</table>
+
+<p style="margin-left:11%; margin-top: 1em">For more
+commands, see the <i>Tags</i> topic in the Emacs info
+document.</p>
+
+<a name="HOW TO USE WITH NEDIT"></a>
+<h2>HOW TO USE WITH NEDIT</h2>
+
+
+<p style="margin-left:11%; margin-top: 1em">NEdit version
+5.1 and later can handle the new extended tag file format
+(see <b>&minus;&minus;format</b>). To make NEdit use the tag
+file, select &quot;File&minus;&gt;Load Tags File&quot;. To
+jump to the definition for a tag, highlight the word, the
+press Ctrl-D. NEdit 5.1 can can read multiple tag files from
+different directories. Setting the X resource nedit.tagFile
+to the name of a tag file instructs NEdit to automatically
+load that tag file at startup time.</p>
+
+<a name="CAVEATS"></a>
+<h2>CAVEATS</h2>
+
+
+<p style="margin-left:11%; margin-top: 1em">Because
+<b>ctags</b> is neither a preprocessor nor a compiler, use
+of preprocessor macros can fool <b>ctags</b> into either
+missing tags or improperly generating inappropriate tags.
+Although <b>ctags</b> has been designed to handle certain
+common cases, this is the single biggest cause of reported
+problems. In particular, the use of preprocessor constructs
+which alter the textual syntax of C can fool <b>ctags</b>.
+You can work around many such problems by using the
+<b>&minus;I</b> option.</p>
+
+<p style="margin-left:11%; margin-top: 1em">Note that since
+<b>ctags</b> generates patterns for locating tags (see the
+<b>&minus;&minus;excmd</b> option), it is entirely possible
+that the wrong line may be found by your editor if there
+exists another source line which is identical to the line
+containing the tag. The following example demonstrates this
+condition:</p>
+
+<p style="margin-left:22%; margin-top: 1em">int
+variable;</p>
+
+<p style="margin-left:22%; margin-top: 1em">/* ... */ <br>
+void foo(variable) <br>
+int variable; <br>
+{</p>
+
+<p style="margin-left:28%;">/* ... */</p>
+
+<p style="margin-left:22%;">}</p>
+
+<p style="margin-left:11%; margin-top: 1em">Depending upon
+which editor you use and where in the code you happen to be,
+it is possible that the search pattern may locate the local
+parameter declaration in foo() before it finds the actual
+global variable definition, since the lines (and therefore
+their search patterns are identical). This can be avoided by
+use of the <b>&minus;&minus;excmd</b>=<i>n</i> option.</p>
+
+<a name="BUGS"></a>
+<h2>BUGS</h2>
+
+
+<p style="margin-left:11%; margin-top: 1em"><b>Ctags</b>
+has more options than <b>ls</b>(1).</p>
+
+<p style="margin-left:11%; margin-top: 1em">When parsing a
+C++ member function definition (e.g.
+&quot;className::function&quot;), <b>ctags</b> cannot
+determine whether the scope specifier is a class name or a
+namespace specifier and always lists it as a class name in
+the scope portion of the extension fields. Also, if a C++
+function is defined outside of the class declaration (the
+usual case), the access specification (i.e. public,
+protected, or private) and implementation information (e.g.
+virtual, pure virtual) contained in the function declaration
+are not known when the tag is generated for the function
+definition. It will, however be available for prototypes
+(e.g <b>&minus;&minus;c++&minus;kinds</b>=<i>+p</i>).</p>
+
+<p style="margin-left:11%; margin-top: 1em">No qualified
+tags are generated for language objects inherited into a
+class.</p>
+
+<a name="ENVIRONMENT VARIABLES"></a>
+<h2>ENVIRONMENT VARIABLES</h2>
+
+
+<table width="100%" border=0 rules="none" frame="void"
+ cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="11%"></td>
+<td width="9%">
+
+
+<p style="margin-top: 1em" valign="top"><b>CTAGS</b></p></td>
+<td width="3%"></td>
+<td width="77%">
+
+
+<p style="margin-top: 1em" valign="top">If this environment
+variable exists, it will be expected to contain a set of
+default options which are read when <b>ctags</b> starts,
+after the configuration files listed in <b>FILES</b>, below,
+are read, but before any command line options are read.
+Options appearing on the command line will override options
+specified in this variable. Only options will be read from
+this variable. Note that all white space in this variable is
+considered a separator, making it impossible to pass an
+option parameter containing an embedded space. If this is a
+problem, use a configuration file instead.</p></td>
+<tr valign="top" align="left">
+<td width="11%"></td>
+<td width="9%">
+
+
+<p><b>ETAGS</b></p></td>
+<td width="3%"></td>
+<td width="77%">
+
+
+<p>Similar to the <b>CTAGS</b> variable above, this
+variable, if found, will be read when <b>etags</b> starts.
+If this variable is not found, <b>etags</b> will try to use
+<b>CTAGS</b> instead.</p></td>
+<tr valign="top" align="left">
+<td width="11%"></td>
+<td width="9%">
+
+
+<p><b>TMPDIR</b></p></td>
+<td width="3%"></td>
+<td width="77%">
+
+
+<p>On Unix-like hosts where mkstemp() is available, the
+value of this variable specifies the directory in which to
+place temporary files. This can be useful if the size of a
+temporary file becomes too large to fit on the partition
+holding the default temporary directory defined at
+compilation time. <b>ctags</b> creates temporary files only
+if either (1) an emacs-style tag file is being generated,
+(2) the tag file is being sent to standard output, or (3)
+the program was compiled to use an internal sort algorithm
+to sort the tag files instead of the the sort utility of the
+operating system. If the sort utility of the operating
+system is being used, it will generally observe this
+variable also. Note that if <b>ctags</b> is setuid, the
+value of TMPDIR will be ignored.</p></td>
+</table>
+
+<a name="FILES"></a>
+<h2>FILES</h2>
+
+
+<p style="margin-left:11%; margin-top: 1em"><i>/ctags.cnf
+(on MSDOS, MSWindows only) <br>
+/etc/ctags.conf <br>
+/usr/local/etc/ctags.conf <br>
+$HOME/.ctags <br>
+$HOME/ctags.cnf (on MSDOS, MSWindows only) <br>
+.ctags <br>
+ctags.cnf (on MSDOS, MSWindows only)</i></p>
+
+<p style="margin-left:22%;">If any of these configuration
+files exist, each will be expected to contain a set of
+default options which are read in the order listed when
+<b>ctags</b> starts, but before the <b>CTAGS</b> environment
+variable is read or any command line options are read. This
+makes it possible to set up site-wide, personal or
+project-level defaults. It is possible to compile
+<b>ctags</b> to read an additional configuration file before
+any of those shown above, which will be indicated if the
+output produced by the <b>&minus;&minus;version</b> option
+lists the &quot;custom-conf&quot; feature. Options appearing
+in the <b>CTAGS</b> environment variable or on the command
+line will override options specified in these files. Only
+options will be read from these files. Note that the option
+files are read in line-oriented mode in which spaces are
+significant (since shell quoting is not possible). Each line
+of the file is read as one command line parameter (as if it
+were quoted with single quotes). Therefore, use new lines to
+indicate separate command-line arguments.</p>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="11%"></td>
+<td width="6%">
+
+
+<p style="margin-top: 1em" valign="top"><i>tags</i></p></td>
+<td width="5%"></td>
+<td width="58%">
+
+
+<p style="margin-top: 1em" valign="top">The default tag
+file created by <b>ctags</b>.</p></td>
+<td width="20%">
+</td>
+<tr valign="top" align="left">
+<td width="11%"></td>
+<td width="6%">
+
+
+<p style="margin-top: 1em" valign="top"><i>TAGS</i></p></td>
+<td width="5%"></td>
+<td width="58%">
+
+
+<p style="margin-top: 1em" valign="top">The default tag
+file created by <b>etags</b>.</p></td>
+<td width="20%">
+</td>
+</table>
+
+<a name="SEE ALSO"></a>
+<h2>SEE ALSO</h2>
+
+
+<p style="margin-left:11%; margin-top: 1em">The official
+Exuberant Ctags web site at:</p>
+
+
+<p style="margin-left:22%; margin-top: 1em">http://ctags.sourceforge.net</p>
+
+<p style="margin-left:11%; margin-top: 1em">Also
+<b>ex</b>(1), <b>vi</b>(1), <b>elvis</b>, or, better yet,
+<b>vim</b>, the official editor of <b>ctags</b>. For more
+information on <b>vim</b>, see the VIM Pages web site
+at:</p>
+
+
+<p style="margin-left:22%; margin-top: 1em">http://www.vim.org/</p>
+
+<a name="AUTHOR"></a>
+<h2>AUTHOR</h2>
+
+
+<p style="margin-left:11%; margin-top: 1em">Darren Hiebert
+&lt;dhiebert at users.sourceforge.net&gt; <br>
+http://DarrenHiebert.com/</p>
+
+<a name="MOTIVATION"></a>
+<h2>MOTIVATION</h2>
+
+
+<p style="margin-left:11%; margin-top: 1em">&quot;Think ye
+at all times of rendering some service to every member of
+the human race.&quot;</p>
+
+<p style="margin-left:11%; margin-top: 1em">&quot;All
+effort and exertion put forth by man from the fullness of
+his heart is worship, if it is prompted by the highest
+motives and the will to do service to humanity.&quot;</p>
+
+<p style="margin-left:22%; margin-top: 1em">&minus;&minus;
+From the Baha&rsquo;i Writings</p>
+
+<a name="CREDITS"></a>
+<h2>CREDITS</h2>
+
+
+<p style="margin-left:11%; margin-top: 1em">This version of
+<b>ctags</b> was originally derived from and inspired by the
+ctags program by Steve Kirkendall
+&lt;kirkenda@cs.pdx.edu&gt; that comes with the Elvis vi
+clone (though virtually none of the original code
+remains).</p>
+
+<p style="margin-left:11%; margin-top: 1em">Credit is also
+due Bram Moolenaar &lt;Bram@vim.org&gt;, the author of
+<b>vim</b>, who has devoted so much of his time and energy
+both to developing the editor as a service to others, and to
+helping the orphans of Uganda.</p>
+
+<p style="margin-left:11%; margin-top: 1em">The section
+entitled &quot;HOW TO USE WITH GNU EMACS&quot; was
+shamelessly stolen from the info page for GNU
+<b>etags</b>.</p>
+<hr>
+</body>
+</html>
diff --git a/ctags.spec b/ctags.spec
new file mode 100644
index 0000000..88939fe
--- /dev/null
+++ b/ctags.spec
@@ -0,0 +1,40 @@
+Summary: Exuberant Ctags - a multi-language source code indexing tool
+Name: ctags
+Version: @VERSION@
+Release: 1
+License: GPL
+Group: Development/Tools
+Source: http://prdownloads.sourceforge.net/ctags/ctags-%{version}.tar.gz
+URL: http://ctags.sourceforge.net
+Buildroot: %{_tmppath}/%{name}-%{version}-root
+
+%description
+Exuberant Ctags generates an index (or tag) file of language objects
+found in source files for many popular programming languages. This index
+makes it easy for text editors and other tools to locate the indexed
+items. Exuberant Ctags improves on traditional ctags because of its
+multilanguage support, its ability for the user to define new languages
+searched by regular expressions, and its ability to generate emacs-style
+TAGS files.
+
+Install ctags if you are going to use your system for programming.
+
+%prep
+%setup -q
+
+%build
+%configure
+make
+
+%install
+rm -rf $RPM_BUILD_ROOT
+%makeinstall
+
+%clean
+rm -rf $RPM_BUILD_ROOT
+
+%files
+%defattr(-,root,root)
+%doc COPYING EXTENDING.html FAQ NEWS README ctags.html
+%{_bindir}/ctags
+%{_mandir}/man1/ctags*
diff --git a/debug.c b/debug.c
new file mode 100644
index 0000000..6d44ad5
--- /dev/null
+++ b/debug.c
@@ -0,0 +1,113 @@
+/*
+* $Id: debug.c 558 2007-06-15 19:17:02Z elliotth $
+*
+* Copyright (c) 1996-2002, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* This module contains debugging functions.
+*/
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include <ctype.h>
+#include <stdarg.h>
+
+#include "debug.h"
+#include "options.h"
+#include "read.h"
+
+/*
+* FUNCTION DEFINITIONS
+*/
+
+#ifdef DEBUG
+
+extern void lineBreak (void) {} /* provides a line-specified break point */
+
+extern void debugPrintf (
+ const enum eDebugLevels level, const char *const format, ... )
+{
+ va_list ap;
+
+ va_start (ap, format);
+ if (debug (level))
+ vprintf (format, ap);
+ fflush (stdout);
+ va_end (ap);
+}
+
+extern void debugPutc (const int level, const int c)
+{
+ if (debug (level) && c != EOF)
+ {
+ if (c == STRING_SYMBOL) printf ("\"string\"");
+ else if (c == CHAR_SYMBOL) printf ("'c'");
+ else putchar (c);
+
+ fflush (stdout);
+ }
+}
+
+extern void debugParseNest (const boolean increase, const unsigned int level)
+{
+ debugPrintf (DEBUG_PARSE, "<*%snesting:%d*>", increase ? "++" : "--", level);
+}
+
+extern void debugCppNest (const boolean begin, const unsigned int level)
+{
+ debugPrintf (DEBUG_CPP, "<*cpp:%s level %d*>", begin ? "begin":"end", level);
+}
+
+extern void debugCppIgnore (const boolean ignore)
+{
+ debugPrintf (DEBUG_CPP, "<*cpp:%s ignore*>", ignore ? "begin":"end");
+}
+
+extern void debugEntry (const tagEntryInfo *const tag)
+{
+ const char *const scope = tag->isFileScope ? "{fs}" : "";
+
+ if (debug (DEBUG_PARSE))
+ {
+ printf ("<#%s%s:%s", scope, tag->kindName, tag->name);
+
+ if (tag->extensionFields.scope [0] != NULL &&
+ tag->extensionFields.scope [1] != NULL)
+ printf (" [%s:%s]", tag->extensionFields.scope [0],
+ tag->extensionFields.scope [1]);
+
+ if (Option.extensionFields.inheritance &&
+ tag->extensionFields.inheritance != NULL)
+ printf (" [inherits:%s]", tag->extensionFields.inheritance);
+
+ if (Option.extensionFields.fileScope &&
+ tag->isFileScope && ! isHeaderFile ())
+ printf (" [file:]");
+
+ if (Option.extensionFields.access &&
+ tag->extensionFields.access != NULL)
+ printf (" [access:%s]", tag->extensionFields.access);
+
+ if (Option.extensionFields.implementation &&
+ tag->extensionFields.implementation != NULL)
+ printf (" [imp:%s]", tag->extensionFields.implementation);
+
+ if (Option.extensionFields.typeRef &&
+ tag->extensionFields.typeRef [0] != NULL &&
+ tag->extensionFields.typeRef [1] != NULL)
+ printf (" [%s:%s]", tag->extensionFields.typeRef [0],
+ tag->extensionFields.typeRef [1]);
+
+ printf ("#>");
+ fflush (stdout);
+ }
+}
+
+#endif
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/debug.h b/debug.h
new file mode 100644
index 0000000..41a6881
--- /dev/null
+++ b/debug.h
@@ -0,0 +1,70 @@
+/*
+* $Id: debug.h 558 2007-06-15 19:17:02Z elliotth $
+*
+* Copyright (c) 1998-2002, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* External interface to debug.c
+*/
+#ifndef _DEBUG_H
+#define _DEBUG_H
+
+/*
+* Include files
+*/
+#include "general.h" /* must always come first */
+
+#ifdef DEBUG
+# include <assert.h>
+#endif
+#include "entry.h"
+
+/*
+* Macros
+*/
+
+#ifdef DEBUG
+# define debug(level) ((Option.debugLevel & (long)(level)) != 0)
+# define DebugStatement(x) x
+# define PrintStatus(x) if (debug(DEBUG_STATUS)) printf x;
+# define Assert(c) assert(c)
+#else
+# define DebugStatement(x)
+# define PrintStatus(x)
+# define Assert(c)
+# ifndef NDEBUG
+# define NDEBUG
+# endif
+#endif
+
+/*
+* Data declarations
+*/
+
+/* Defines the debugging levels.
+ */
+enum eDebugLevels {
+ DEBUG_READ = 0x01, /* echo raw (filtered) characters */
+ DEBUG_PARSE = 0x02, /* echo parsing results */
+ DEBUG_STATUS = 0x04, /* echo file status information */
+ DEBUG_OPTION = 0x08, /* echo option parsing */
+ DEBUG_CPP = 0x10, /* echo characters out of pre-processor */
+ DEBUG_RAW = 0x20 /* echo raw (filtered) characters */
+};
+
+/*
+* Function prototypes
+*/
+extern void lineBreak (void);
+extern void debugPrintf (const enum eDebugLevels level, const char *const format, ...) __printf__ (2, 3);
+extern void debugPutc (const int level, const int c);
+extern void debugParseNest (const boolean increase, const unsigned int level);
+extern void debugCppNest (const boolean begin, const unsigned int level);
+extern void debugCppIgnore (const boolean ignore);
+extern void debugEntry (const tagEntryInfo *const tag);
+
+#endif /* _DEBUG_H */
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/descrip.mms b/descrip.mms
new file mode 100644
index 0000000..4a83e5c
--- /dev/null
+++ b/descrip.mms
@@ -0,0 +1,68 @@
+# $Id: descrip.mms 2 2001-11-02 04:53:43Z darren $
+#
+# Makefile for building CTAGS under OpenVMS
+#
+# Maintained by by Zoltan Arpadffy <arpadffy@altavista.net>
+#
+# Edit the lines in the Configuration section below to select.
+
+######################################################################
+# Configuration section.
+######################################################################
+# Compiler selection.
+# Comment out if you use the VAXC compiler
+######################################################################
+DECC = YES
+
+######################################################################
+# Uncomment if want a debug version. Resulting executable is DCTAGS.EXE
+######################################################################
+# DEBUG = YES
+
+######################################################################
+# End of configuration section.
+#
+# Please, do not change anything below without programming experience.
+######################################################################
+
+CC = cc
+
+.IFDEF DECC
+CC_DEF = $(CC)/decc/prefix=all
+.ELSE
+CC_DEF = $(CC)
+.ENDIF
+
+LD_DEF = link
+
+.IFDEF DEBUG
+TARGET = dctags.exe
+CFLAGS = /debug/noopt/list/cross_reference/include=[]
+LDFLAGS = /debug
+.ELSE
+TARGET = ctags.exe
+CFLAGS = /include=[]
+LDFLAGS =
+.ENDIF
+
+OBJEXT = obj
+
+.SUFFIXES : .obj .c
+
+.INCLUDE source.mak
+
+EXTRA_OBJS = argproc.obj
+
+all : $(TARGET)
+ ! $@
+
+.c.obj :
+ $(CC_DEF) $(CFLAGS) $<
+
+$(TARGET) : $(OBJECTS) $(EXTRA_OBJS)
+ $(LD_DEF) $(LDFLAGS) /exe=$(TARGET) $+
+
+clean :
+ -@ if F$SEARCH("*.obj") .NES. "" then delete/noconfirm/nolog *.obj.*
+ -@ if F$SEARCH("*.exe") .NES. "" then delete/noconfirm/nolog *.exe.*
+ -@ if F$SEARCH("config.h") .NES. "" then delete/noconfirm/nolog config.h.*
diff --git a/dosbatch.c b/dosbatch.c
new file mode 100644
index 0000000..c165183
--- /dev/null
+++ b/dosbatch.c
@@ -0,0 +1,42 @@
+/*
+* $Id$
+*
+* Copyright (c) 2009, David Fishburn
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* This module contains functions for generating tags for DOS Batch language files.
+*/
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include <string.h>
+#include "parse.h"
+
+/*
+* FUNCTION DEFINITIONS
+*/
+
+static void installDosBatchRegex (const langType language)
+{
+ addTagRegex (language,
+ "^:([A-Za-z_0-9]+)", "\\1", "l,label,labels", NULL);
+ addTagRegex (language,
+ "set[ \t]+([A-Za-z_0-9]+)[ \t]*=", "\\1", "v,variable,variables", NULL);
+}
+
+extern parserDefinition* DosBatchParser ()
+{
+ static const char *const extensions [] = { "bat", "cmd", NULL };
+ parserDefinition* const def = parserNew ("DosBatch");
+ def->extensions = extensions;
+ def->initialize = installDosBatchRegex;
+ def->regex = TRUE;
+ return def;
+}
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/e_amiga.h b/e_amiga.h
new file mode 100644
index 0000000..511a78f
--- /dev/null
+++ b/e_amiga.h
@@ -0,0 +1,24 @@
+/*
+* $Id: e_amiga.h 136 2002-03-08 22:35:19Z darren $
+*
+* Copyright (c) 2002, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* Configures ctags for Amiga environment.
+*/
+#ifndef E_AMIGA_H
+#define E_AMIGA_H
+
+#define HAVE_STDLIB_H 1
+#define HAVE_SYS_STAT_H 1
+#define HAVE_SYS_TYPES_H 1
+#define HAVE_TIME_H 1
+#define HAVE_CLOCK 1
+#define HAVE_FGETPOS 1
+#define HAVE_STRERROR 1
+#define HAVE_STRICMP 1
+#define HAVE_STRNICMP 1
+
+#endif
diff --git a/e_djgpp.h b/e_djgpp.h
new file mode 100644
index 0000000..5cd5190
--- /dev/null
+++ b/e_djgpp.h
@@ -0,0 +1,47 @@
+/*
+* $Id: e_djgpp.h 375 2003-10-31 04:15:35Z darren $
+*
+* Copyright (c) 2002-2003, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* Configures ctags for DJGPP environment.
+*/
+#ifndef E_DJGPP_H
+#define E_DJGPP_H
+
+#define CASE_INSENSITIVE_FILENAMES 1
+#define MSDOS_STYLE_PATH 1
+
+#define HAVE_DIR_H 1
+#define HAVE_DIRENT_H 1
+#define HAVE_FCNTL_H 1
+#define HAVE_FNMATCH_H 1
+#define HAVE_STDLIB_H 1
+#define HAVE_STRING_H 1
+#define HAVE_SYS_DIR_H 1
+#define HAVE_SYS_STAT_H 1
+#define HAVE_SYS_TIMES_H 1
+#define HAVE_SYS_TYPES_H 1
+#define HAVE_TIME_H 1
+#define HAVE_UNISTD_H 1
+
+#define HAVE_CLOCK 1
+#define HAVE_FGETPOS 1
+#define HAVE_FNMATCH 1
+#define HAVE_MKSTEMP 1
+#define HAVE_OPENDIR 1
+#define HAVE_REGCOMP 1
+#define HAVE_REMOVE 1
+#define HAVE_SETENV 1
+#define HAVE_STAT_ST_INO 1
+#define HAVE_STRCASECMP 1
+#define HAVE_STRERROR 1
+#define HAVE_STRNCASECMP 1
+#define HAVE_STRSTR 1
+#define HAVE_TRUNCATE 1
+#define NEED_PROTO_LSTAT 1
+#define STDC_HEADERS 1
+
+#endif
diff --git a/e_mac.h b/e_mac.h
new file mode 100644
index 0000000..053eab0
--- /dev/null
+++ b/e_mac.h
@@ -0,0 +1,143 @@
+/*
+* $Id: e_mac.h 443 2006-05-30 04:37:13Z darren $
+*
+* Copyright (c) 2001, Maarten L. Hekkelman
+*
+* Author: Maarten L. Hekkelman <maarten@hekkelman.com>
+* http://www.hekkelman.com
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License. It is provided on an as-is basis and no
+* responsibility is accepted for its failure to perform as expected.
+*
+* Configures ctags for Macintosh environment.
+*/
+#ifndef E_MAC_H
+#define E_MAC_H
+
+#define BUILD_MPW_TOOL 1
+
+#define MACROS_USE_PATTERNS 1
+#define DEFAULT_FILE_FORMAT 2
+#define INTERNAL_SORT 1
+#define TMPDIR "/tmp"
+#define NEED_PROTO_TRUNCATE 1
+#define STDC_HEADERS 1
+#define HAVE_CLOCK 1
+#define HAVE_FGETPOS 1
+#define HAVE_OPENDIR 1
+#define HAVE_REMOVE 1
+#define HAVE_SETENV 1
+#define HAVE_STRERROR 1
+#define HAVE_STRSTR 1
+#define HAVE_FCNTL_H 1
+#define HAVE_STDLIB_H 1
+#define HAVE_STRING_H 1
+#define HAVE_SYS_DIR_H 1
+#define HAVE_SYS_TIMES_H 1
+#define HAVE_TIME_H 1
+#define HAVE_TYPES_H 1
+#define HAVE_STDLIB_H 1
+
+#include <time.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <stdio.h>
+
+#include <Files.h>
+
+#if BUILD_MPW_TOOL
+
+/*
+ The following defines are collected from various header files from some
+ Linux distribution
+*/
+
+typedef unsigned long mode_t;
+typedef unsigned long ino_t;
+typedef unsigned long dev_t;
+typedef short nlink_t;
+typedef unsigned long uid_t;
+typedef unsigned long gid_t;
+
+/* Encoding of the file mode. */
+#define S_IFMT 0170000 /* These bits determine file type. */
+
+/* File types. */
+#define S_IFDIR 0040000 /* Directory. */
+#define S_IFCHR 0020000 /* Character device. */
+#define S_IFBLK 0060000 /* Block device. */
+#define S_IFREG 0100000 /* Regular file. */
+
+#define S_ISTYPE(mode, mask) (((mode) & S_IFMT) == (mask))
+
+#define S_ISDIR(mode) S_ISTYPE((mode), S_IFDIR)
+#define S_ISCHR(mode) S_ISTYPE((mode), S_IFCHR)
+#define S_ISBLK(mode) S_ISTYPE((mode), S_IFBLK)
+#define S_ISREG(mode) S_ISTYPE((mode), S_IFREG)
+
+struct stat {
+ dev_t st_dev; /* Device. */
+ unsigned short int __pad1;
+ ino_t st_ino; /* File serial number. */
+ mode_t st_mode; /* File mode. */
+ nlink_t st_nlink; /* Link count. */
+ uid_t st_uid; /* User ID of the file's owner. */
+ gid_t st_gid; /* Group ID of the file's group.*/
+ off_t st_size; /* Size of file, in bytes. */
+ unsigned long int st_blksize; /* Optimal block size for I/O. */
+ long st_blocks; /* Number 512-byte blocks allocated. */
+ time_t st_atime; /* Time of last access. */
+ time_t st_mtime; /* Time of last modification. */
+ time_t st_ctime; /* Time of last status change. */
+};
+
+int fstat(int fildes, struct stat *buf);
+
+#else
+#include <console.h>
+#include <stat.mac.h>
+#endif
+
+#ifndef PATH_MAX
+#define PATH_MAX 1024
+#endif
+
+/*
+ Our own stat, accepts unix like paths.
+*/
+int mstat(const char *path, struct stat *buf);
+
+struct dirent {
+ char d_name[64];
+};
+
+typedef struct {
+ FSSpec file;
+ int index;
+ struct dirent ent;
+} DIR;
+
+extern DIR* opendir(const char *dirname);
+extern struct dirent* readdir(DIR* dirp);
+extern int closedir(DIR* dirp);
+extern void rewinddir(DIR* dirp);
+extern char* getcwd(char*, int);
+
+/*
+ Our own fopen, accepts unix like paths.
+*/
+extern FILE* mfopen(const char* file, const char* mode);
+
+/*
+ Dirty, define the standard functions fopen, stat and lstat to map to our
+ own routines.
+*/
+#define fopen mfopen
+#define stat(a,b) mstat(a,b)
+#define lstat(a,b) mstat(a,b)
+
+#endif
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/e_msoft.h b/e_msoft.h
new file mode 100644
index 0000000..cc40015
--- /dev/null
+++ b/e_msoft.h
@@ -0,0 +1,76 @@
+/*
+* $Id: e_msoft.h 577 2007-06-30 15:30:16Z dhiebert $
+*
+* Copyright (c) 2002-2003, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* Configures ctags for Microsoft environment.
+*/
+#ifndef E_MSOFT_H
+#define E_MSOFT_H
+
+/* MS-DOS/Windows doesn't allow manipulation of standard error,
+ * so we send it to stdout instead.
+ */
+#define errout stdout
+
+#define CASE_INSENSITIVE_FILENAMES 1
+#define MANUAL_GLOBBING 1
+#define MSDOS_STYLE_PATH 1
+#define HAVE_DOS_H 1
+#define HAVE_FCNTL_H 1
+#define HAVE_IO_H 1
+#define HAVE_STDLIB_H 1
+#define HAVE_SYS_STAT_H 1
+#define HAVE_SYS_TYPES_H 1
+#define HAVE_TIME_H 1
+#define HAVE_CLOCK 1
+#define HAVE_CHSIZE 1
+#define HAVE_FGETPOS 1
+#define HAVE_STRICMP 1
+#define HAVE_STRNICMP 1
+#define HAVE_STRSTR 1
+#define HAVE_STRERROR 1
+#define HAVE_FINDNEXT 1
+#define HAVE_TEMPNAM 1
+#define tempnam(dir,pfx) _tempnam(dir,pfx)
+#define TMPDIR "\\"
+
+#ifdef __BORLANDC__
+
+# define HAVE_DIR_H 1
+# define HAVE_DIRENT_H 1
+# define HAVE_FINDFIRST 1
+
+#elif defined (_MSC_VER)
+
+# define HAVE__FINDFIRST 1
+# define HAVE_DIRECT_H 1
+
+# if _MSC_VER >= 1300
+# define findfirst_t intptr_t /* Visual Studio 7 */
+# else
+# define findfirst_t long /* Visual Studio 6 or earlier */
+# endif
+
+#elif defined (__MINGW32__)
+
+# include <_mingw.h>
+# if defined (__MSVCRT__) && __MINGW32_MAJOR_VERSION == 1 && __MINGW32_MINOR_VERSION < 2
+/* Work-around for broken implementation of fgetpos()/fsetpos() on Mingw32 */
+# undef HAVE_FGETPOS
+# define NEED_PROTO_FGETPOS 1
+# endif
+# define HAVE_DIR_H 1
+# define HAVE_DIRENT_H 1
+# define HAVE__FINDFIRST 1
+# define findfirst_t long
+# define ffblk _finddata_t
+# define FA_DIREC _A_SUBDIR
+# define ff_name name
+
+#endif
+
+#endif
diff --git a/e_os2.h b/e_os2.h
new file mode 100644
index 0000000..53b5f19
--- /dev/null
+++ b/e_os2.h
@@ -0,0 +1,37 @@
+/*
+* $Id: e_os2.h 136 2002-03-08 22:35:19Z darren $
+*
+* Copyright (c) 2002, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* Configures ctags for OS/2 environment.
+*/
+#ifndef E_OS2_H
+#define E_OS2_H
+
+#define UNIX_PATH_SEPARATOR 1
+#define CASE_INSENSITIVE_FILENAMES 1
+#define HAVE_DIRENT_H 1
+#define HAVE_FCNTL_H 1
+#define HAVE_IO_H 1
+#define HAVE_STDLIB_H 1
+#define HAVE_SYS_STAT_H 1
+#define HAVE_SYS_TYPES_H 1
+#define HAVE_TIME_H 1
+#define HAVE_UNISTD_H 1
+#define HAVE_CLOCK 1
+#define HAVE_CHSIZE 1
+#define HAVE_FGETPOS 1
+#define HAVE_FTRUNCATE 1
+#define HAVE_OPENDIR 1
+#define HAVE_REGCOMP 1
+#define HAVE_REMOVE 1
+#define HAVE_STRERROR 1
+#define HAVE_STRICMP 1
+#define HAVE_STRNICMP 1
+#define HAVE_STRSTR 1
+#define HAVE_TRUNCATE 1
+
+#endif
diff --git a/e_qdos.h b/e_qdos.h
new file mode 100644
index 0000000..52f2500
--- /dev/null
+++ b/e_qdos.h
@@ -0,0 +1,34 @@
+/*
+* $Id: e_qdos.h 136 2002-03-08 22:35:19Z darren $
+*
+* Copyright (c) 2002, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* Configures ctags for QDOS environment.
+*/
+#ifndef E_QDOS_H
+#define E_QDOS_H
+
+#define HAVE_DIRENT_H 1
+#define HAVE_STDLIB_H 1
+#define HAVE_SYS_STAT_H 1
+#define HAVE_SYS_TIMES_H 1
+#define HAVE_SYS_TYPES_H 1
+#define HAVE_TIME_H 1
+#define HAVE_UNISTD_H 1
+#define STDC_HEADERS 1
+#define HAVE_CLOCK 1
+#define HAVE_FGETPOS 1
+#define HAVE_FTRUNCATE 1
+#define HAVE_OPENDIR 1
+#define HAVE_PUTENV 1
+#define HAVE_REMOVE 1
+#define HAVE_STRERROR 1
+#define HAVE_STRSTR 1
+#define HAVE_TIMES 1
+#define HAVE_TRUNCATE 1
+#define NON_CONST_PUTENV_PROTOTYPE 1
+
+#endif
diff --git a/e_riscos.h b/e_riscos.h
new file mode 100644
index 0000000..a7a3ecc
--- /dev/null
+++ b/e_riscos.h
@@ -0,0 +1,58 @@
+/*
+* $Id: e_riscos.h 136 2002-03-08 22:35:19Z darren $
+*
+* Copyright (c) 2002, Andrew Wingate
+*
+* Author: Andrew Wingate <andy@sparse.net>
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License. It is provided on an as-is basis and no
+* responsibility is accepted for its failure to perform as expected.
+*
+* Configures ctags for RISC OS environment.
+*
+* We currently use UnixLib. This file is subject to change if
+* we move to using SharedCLibrary and libGNU.
+*/
+#ifndef E_RISCOS_H
+#define E_RISCOS_H
+
+#define MACROS_USE_PATTERNS 1
+#define DEFAULT_FILE_FORMAT 2
+#define INTERNAL_SORT 1 /* Not all systems will have installed sort(1) */
+#define TMPDIR "<Wimp$ScrapDir>"
+
+/* Various definitions for UnixLib */
+#define STDC_HEADERS 1
+#define HAVE_CHMOD 1
+#define HAVE_CHSIZE 1
+#define HAVE_CLOCK 1
+#define HAVE_FGETPOS 1
+#define HAVE_FNMATCH 1
+#define HAVE_FTRUNCATE 1
+#define HAVE_MKSTEMP 1
+#define HAVE_OPENDIR 1
+#define HAVE_PUTENV 1
+#define HAVE_REGCOMP 1 /* Requires RegEx library */
+#define HAVE_REMOVE 1
+#define HAVE_SETENV 1
+#define HAVE_STRERROR 1
+#define HAVE_STRICMP 1
+#define HAVE_STRNICMP 1
+#define HAVE_STRSTR 1
+#define HAVE_TIMES 1
+#define HAVE_TRUNCATE 1
+#define HAVE_DIRENT_H 1
+#define HAVE_FCNTL_H 1
+#define HAVE_FNMATCH_H 1
+#define HAVE_STAT_H 1
+#define HAVE_STDLIB_H 1
+#define HAVE_STRING_H 1
+#define HAVE_SYS_DIR_H 1
+#define HAVE_SYS_STAT_H 1
+#define HAVE_SYS_TIMES_H 1
+#define HAVE_SYS_TYPES_H 1
+#define HAVE_TIME_H 1
+#define HAVE_UNISTD_H 1
+
+#endif
diff --git a/e_vms.h b/e_vms.h
new file mode 100644
index 0000000..b5cfa36
--- /dev/null
+++ b/e_vms.h
@@ -0,0 +1,31 @@
+/*
+* $Id: e_vms.h 136 2002-03-08 22:35:19Z darren $
+*
+* Copyright (c) 2002, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* Configures ctags for VMS environment.
+*/
+#ifndef E_VMS_H
+#define E_VMS_H
+
+#define CASE_INSENSITIVE_FILENAMES 1
+#define HAVE_STDLIB_H 1
+#define HAVE_TIME_H 1
+#ifdef VAXC
+# define HAVE_STAT_H 1
+# define HAVE_TYPES_H 1
+#else
+# define HAVE_FCNTL_H 1
+# define HAVE_SYS_STAT_H 1
+# define HAVE_SYS_TYPES_H 1
+#endif
+#define HAVE_CLOCK 1
+#define HAVE_FGETPOS 1
+#define HAVE_STRERROR 1
+#define HAVE_STRSTR 1
+#define HAVE_UNISTD_H 1
+
+#endif
diff --git a/eiffel.c b/eiffel.c
new file mode 100644
index 0000000..b504ac3
--- /dev/null
+++ b/eiffel.c
@@ -0,0 +1,1352 @@
+/*
+* $Id: eiffel.c 706 2009-06-28 23:09:30Z dhiebert $
+*
+* Copyright (c) 1998-2002, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* This module contains functions for generating tags for Eiffel language
+* files.
+*/
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#ifdef TYPE_REFERENCE_TOOL
+#include <stdio.h>
+#endif
+#include <string.h>
+#include <limits.h>
+#include <ctype.h> /* to define tolower () */
+#include <setjmp.h>
+
+#include "debug.h"
+#include "keyword.h"
+#include "routines.h"
+#include "vstring.h"
+#ifndef TYPE_REFERENCE_TOOL
+#include "entry.h"
+#include "options.h"
+#include "parse.h"
+#include "read.h"
+#endif
+
+/*
+* MACROS
+*/
+#define isident(c) (isalnum(c) || (c) == '_')
+#define isFreeOperatorChar(c) ((c) == '@' || (c) == '#' || \
+ (c) == '|' || (c) == '&')
+#define isType(token,t) (boolean) ((token)->type == (t))
+#define isKeyword(token,k) (boolean) ((token)->keyword == (k))
+
+/*
+* DATA DECLARATIONS
+*/
+
+typedef enum eException { ExceptionNone, ExceptionEOF } exception_t;
+
+/* Used to specify type of keyword.
+ */
+typedef enum eKeywordId {
+ KEYWORD_NONE = -1,
+ KEYWORD_alias, KEYWORD_all, KEYWORD_and, KEYWORD_as, KEYWORD_assign,
+ KEYWORD_check, KEYWORD_class, KEYWORD_convert, KEYWORD_create,
+ KEYWORD_creation, KEYWORD_Current,
+ KEYWORD_debug, KEYWORD_deferred, KEYWORD_do, KEYWORD_else,
+ KEYWORD_elseif, KEYWORD_end, KEYWORD_ensure, KEYWORD_expanded,
+ KEYWORD_export, KEYWORD_external, KEYWORD_false, KEYWORD_feature,
+ KEYWORD_from, KEYWORD_frozen, KEYWORD_if, KEYWORD_implies,
+ KEYWORD_indexing, KEYWORD_infix, KEYWORD_inherit, KEYWORD_inspect,
+ KEYWORD_invariant, KEYWORD_is, KEYWORD_like, KEYWORD_local,
+ KEYWORD_loop, KEYWORD_not, KEYWORD_obsolete, KEYWORD_old, KEYWORD_once,
+ KEYWORD_or, KEYWORD_prefix, KEYWORD_redefine, KEYWORD_rename,
+ KEYWORD_require, KEYWORD_rescue, KEYWORD_Result, KEYWORD_retry,
+ KEYWORD_select, KEYWORD_separate, KEYWORD_strip, KEYWORD_then,
+ KEYWORD_true, KEYWORD_undefine, KEYWORD_unique, KEYWORD_until,
+ KEYWORD_variant, KEYWORD_when, KEYWORD_xor
+} keywordId;
+
+/* Used to determine whether keyword is valid for the token language and
+ * what its ID is.
+ */
+typedef struct sKeywordDesc {
+ const char *name;
+ keywordId id;
+} keywordDesc;
+
+typedef enum eTokenType {
+ TOKEN_UNDEFINED,
+ TOKEN_BANG,
+ TOKEN_CHARACTER,
+ TOKEN_CLOSE_BRACE,
+ TOKEN_CLOSE_BRACKET,
+ TOKEN_CLOSE_PAREN,
+ TOKEN_COLON,
+ TOKEN_COMMA,
+ TOKEN_CONSTRAINT,
+ TOKEN_DOT,
+ TOKEN_DOLLAR,
+ TOKEN_IDENTIFIER,
+ TOKEN_KEYWORD,
+ TOKEN_NUMERIC,
+ TOKEN_OPEN_BRACE,
+ TOKEN_OPEN_BRACKET,
+ TOKEN_OPEN_PAREN,
+ TOKEN_OPERATOR,
+ TOKEN_OTHER,
+ TOKEN_QUESTION,
+ TOKEN_SEMICOLON,
+ TOKEN_SEPARATOR,
+ TOKEN_STRING,
+ TOKEN_TILDE
+} tokenType;
+
+typedef struct sTokenInfo {
+ tokenType type;
+ keywordId keyword;
+ boolean isExported;
+ vString* string;
+ vString* className;
+ vString* featureName;
+} tokenInfo;
+
+/*
+* DATA DEFINITIONS
+*/
+
+static langType Lang_eiffel;
+
+#ifdef TYPE_REFERENCE_TOOL
+
+static const char *FileName;
+static FILE *File;
+static int PrintClass;
+static int PrintReferences;
+static int SelfReferences;
+static int Debug;
+static stringList *GenericNames;
+static stringList *ReferencedTypes;
+
+#else
+
+typedef enum {
+ EKIND_CLASS, EKIND_FEATURE, EKIND_LOCAL, EKIND_QUALIFIED_TAGS
+} eiffelKind;
+
+static kindOption EiffelKinds [] = {
+ { TRUE, 'c', "class", "classes"},
+ { TRUE, 'f', "feature", "features"},
+ { FALSE, 'l', "local", "local entities"}
+};
+
+#endif
+
+static jmp_buf Exception;
+
+static const keywordDesc EiffelKeywordTable [] = {
+ /* keyword keyword ID */
+ { "alias", KEYWORD_alias },
+ { "all", KEYWORD_all },
+ { "and", KEYWORD_and },
+ { "as", KEYWORD_as },
+ { "assign", KEYWORD_assign },
+ { "check", KEYWORD_check },
+ { "class", KEYWORD_class },
+ { "convert", KEYWORD_convert },
+ { "create", KEYWORD_create },
+ { "creation", KEYWORD_creation },
+ { "current", KEYWORD_Current },
+ { "debug", KEYWORD_debug },
+ { "deferred", KEYWORD_deferred },
+ { "do", KEYWORD_do },
+ { "else", KEYWORD_else },
+ { "elseif", KEYWORD_elseif },
+ { "end", KEYWORD_end },
+ { "ensure", KEYWORD_ensure },
+ { "expanded", KEYWORD_expanded },
+ { "export", KEYWORD_export },
+ { "external", KEYWORD_external },
+ { "false", KEYWORD_false },
+ { "feature", KEYWORD_feature },
+ { "from", KEYWORD_from },
+ { "frozen", KEYWORD_frozen },
+ { "if", KEYWORD_if },
+ { "implies", KEYWORD_implies },
+ { "indexing", KEYWORD_indexing },
+ { "infix", KEYWORD_infix },
+ { "inherit", KEYWORD_inherit },
+ { "inspect", KEYWORD_inspect },
+ { "invariant", KEYWORD_invariant },
+ { "is", KEYWORD_is },
+ { "like", KEYWORD_like },
+ { "local", KEYWORD_local },
+ { "loop", KEYWORD_loop },
+ { "not", KEYWORD_not },
+ { "obsolete", KEYWORD_obsolete },
+ { "old", KEYWORD_old },
+ { "once", KEYWORD_once },
+ { "or", KEYWORD_or },
+ { "prefix", KEYWORD_prefix },
+ { "redefine", KEYWORD_redefine },
+ { "rename", KEYWORD_rename },
+ { "require", KEYWORD_require },
+ { "rescue", KEYWORD_rescue },
+ { "result", KEYWORD_Result },
+ { "retry", KEYWORD_retry },
+ { "select", KEYWORD_select },
+ { "separate", KEYWORD_separate },
+ { "strip", KEYWORD_strip },
+ { "then", KEYWORD_then },
+ { "true", KEYWORD_true },
+ { "undefine", KEYWORD_undefine },
+ { "unique", KEYWORD_unique },
+ { "until", KEYWORD_until },
+ { "variant", KEYWORD_variant },
+ { "when", KEYWORD_when },
+ { "xor", KEYWORD_xor }
+};
+
+/*
+* FUNCTION DEFINITIONS
+*/
+
+static void buildEiffelKeywordHash (void)
+{
+ const size_t count = sizeof (EiffelKeywordTable) /
+ sizeof (EiffelKeywordTable [0]);
+ size_t i;
+ for (i = 0 ; i < count ; ++i)
+ {
+ const keywordDesc* const p = &EiffelKeywordTable [i];
+ addKeyword (p->name, Lang_eiffel, (int) p->id);
+ }
+}
+
+#ifdef TYPE_REFERENCE_TOOL
+
+static void addGenericName (tokenInfo *const token)
+{
+ vStringUpper (token->string);
+ if (vStringLength (token->string) > 0)
+ stringListAdd (GenericNames, vStringNewCopy (token->string));
+}
+
+static boolean isGeneric (tokenInfo *const token)
+{
+ return (boolean) stringListHas (GenericNames, vStringValue (token->string));
+}
+
+static void reportType (tokenInfo *const token)
+{
+ vStringUpper (token->string);
+ if (vStringLength (token->string) > 0 && ! isGeneric (token) &&
+ (SelfReferences || strcmp (vStringValue (
+ token->string), vStringValue (token->className)) != 0) &&
+ ! stringListHas (ReferencedTypes, vStringValue (token->string)))
+ {
+ printf ("%s\n", vStringValue (token->string));
+ stringListAdd (ReferencedTypes, vStringNewCopy (token->string));
+ }
+}
+
+static int fileGetc (void)
+{
+ int c = getc (File);
+ if (c == '\r')
+ {
+ c = getc (File);
+ if (c != '\n')
+ {
+ ungetc (c, File);
+ c = '\n';
+ }
+ }
+ if (Debug > 0 && c != EOF)
+ putc (c, errout);
+ return c;
+}
+
+static int fileUngetc (c)
+{
+ return ungetc (c, File);
+}
+
+extern char *readLine (vString *const vLine, FILE *const fp)
+{
+ return NULL;
+}
+
+#else
+
+/*
+* Tag generation functions
+*/
+
+static void makeEiffelClassTag (tokenInfo *const token)
+{
+ if (EiffelKinds [EKIND_CLASS].enabled)
+ {
+ const char *const name = vStringValue (token->string);
+ tagEntryInfo e;
+
+ initTagEntry (&e, name);
+
+ e.kindName = EiffelKinds [EKIND_CLASS].name;
+ e.kind = EiffelKinds [EKIND_CLASS].letter;
+
+ makeTagEntry (&e);
+ }
+ vStringCopy (token->className, token->string);
+}
+
+static void makeEiffelFeatureTag (tokenInfo *const token)
+{
+ if (EiffelKinds [EKIND_FEATURE].enabled &&
+ (token->isExported || Option.include.fileScope))
+ {
+ const char *const name = vStringValue (token->string);
+ tagEntryInfo e;
+
+ initTagEntry (&e, name);
+
+ e.isFileScope = (boolean) (! token->isExported);
+ e.kindName = EiffelKinds [EKIND_FEATURE].name;
+ e.kind = EiffelKinds [EKIND_FEATURE].letter;
+ e.extensionFields.scope [0] = EiffelKinds [EKIND_CLASS].name;
+ e.extensionFields.scope [1] = vStringValue (token->className);
+
+ makeTagEntry (&e);
+
+ if (Option.include.qualifiedTags)
+ {
+ vString* qualified = vStringNewInit (vStringValue (token->className));
+ vStringPut (qualified, '.');
+ vStringCat (qualified, token->string);
+ e.name = vStringValue (qualified);
+ makeTagEntry (&e);
+ vStringDelete (qualified);
+ }
+ }
+ vStringCopy (token->featureName, token->string);
+}
+
+static void makeEiffelLocalTag (tokenInfo *const token)
+{
+ if (EiffelKinds [EKIND_LOCAL].enabled && Option.include.fileScope)
+ {
+ const char *const name = vStringValue (token->string);
+ vString* scope = vStringNew ();
+ tagEntryInfo e;
+
+ initTagEntry (&e, name);
+
+ e.isFileScope = TRUE;
+ e.kindName = EiffelKinds [EKIND_LOCAL].name;
+ e.kind = EiffelKinds [EKIND_LOCAL].letter;
+
+ vStringCopy (scope, token->className);
+ vStringPut (scope, '.');
+ vStringCat (scope, token->featureName);
+
+ e.extensionFields.scope [0] = EiffelKinds [EKIND_FEATURE].name;
+ e.extensionFields.scope [1] = vStringValue (scope);
+
+ makeTagEntry (&e);
+ vStringDelete (scope);
+ }
+}
+
+#endif
+
+/*
+* Parsing functions
+*/
+
+static int skipToCharacter (const int c)
+{
+ int d;
+
+ do
+ {
+ d = fileGetc ();
+ } while (d != EOF && d != c);
+
+ return d;
+}
+
+/* If a numeric is passed in 'c', this is used as the first digit of the
+ * numeric being parsed.
+ */
+static vString *parseInteger (int c)
+{
+ vString *string = vStringNew ();
+
+ if (c == '\0')
+ c = fileGetc ();
+ if (c == '-')
+ {
+ vStringPut (string, c);
+ c = fileGetc ();
+ }
+ else if (! isdigit (c))
+ c = fileGetc ();
+ while (c != EOF && (isdigit (c) || c == '_'))
+ {
+ vStringPut (string, c);
+ c = fileGetc ();
+ }
+ vStringTerminate (string);
+ fileUngetc (c);
+
+ return string;
+}
+
+static vString *parseNumeric (int c)
+{
+ vString *string = vStringNew ();
+ vString *integer = parseInteger (c);
+ vStringCopy (string, integer);
+ vStringDelete (integer);
+
+ c = fileGetc ();
+ if (c == '.')
+ {
+ integer = parseInteger ('\0');
+ vStringPut (string, c);
+ vStringCat (string, integer);
+ vStringDelete (integer);
+ c = fileGetc ();
+ }
+ if (tolower (c) == 'e')
+ {
+ integer = parseInteger ('\0');
+ vStringPut (string, c);
+ vStringCat (string, integer);
+ vStringDelete (integer);
+ }
+ else if (!isspace (c))
+ fileUngetc (c);
+
+ vStringTerminate (string);
+
+ return string;
+}
+
+static int parseEscapedCharacter (void)
+{
+ int d = '\0';
+ int c = fileGetc ();
+
+ switch (c)
+ {
+ case 'A': d = '@'; break;
+ case 'B': d = '\b'; break;
+ case 'C': d = '^'; break;
+ case 'D': d = '$'; break;
+ case 'F': d = '\f'; break;
+ case 'H': d = '\\'; break;
+ case 'L': d = '~'; break;
+ case 'N': d = '\n'; break;
+#ifdef QDOS
+ case 'Q': d = 0x9F; break;
+#else
+ case 'Q': d = '`'; break;
+#endif
+ case 'R': d = '\r'; break;
+ case 'S': d = '#'; break;
+ case 'T': d = '\t'; break;
+ case 'U': d = '\0'; break;
+ case 'V': d = '|'; break;
+ case '%': d = '%'; break;
+ case '\'': d = '\''; break;
+ case '"': d = '"'; break;
+ case '(': d = '['; break;
+ case ')': d = ']'; break;
+ case '<': d = '{'; break;
+ case '>': d = '}'; break;
+
+ case '\n': skipToCharacter ('%'); break;
+
+ case '/':
+ {
+ vString *string = parseInteger ('\0');
+ const char *value = vStringValue (string);
+ const unsigned long ascii = atol (value);
+ vStringDelete (string);
+
+ c = fileGetc ();
+ if (c == '/' && ascii < 256)
+ d = ascii;
+ break;
+ }
+
+ default: break;
+ }
+ return d;
+}
+
+static int parseCharacter (void)
+{
+ int c = fileGetc ();
+ int result = c;
+
+ if (c == '%')
+ result = parseEscapedCharacter ();
+
+ c = fileGetc ();
+ if (c != '\'')
+ skipToCharacter ('\n');
+
+ return result;
+}
+
+static void parseString (vString *const string)
+{
+ boolean verbatim = FALSE;
+ boolean align = FALSE;
+ boolean end = FALSE;
+ vString *verbatimCloser = vStringNew ();
+ vString *lastLine = vStringNew ();
+ int prev = '\0';
+ int c;
+
+ while (! end)
+ {
+ c = fileGetc ();
+ if (c == EOF)
+ end = TRUE;
+ else if (c == '"')
+ {
+ if (! verbatim)
+ end = TRUE;
+ else
+ end = (boolean) (strcmp (vStringValue (lastLine),
+ vStringValue (verbatimCloser)) == 0);
+ }
+ else if (c == '\n')
+ {
+ if (verbatim)
+ vStringClear (lastLine);
+ if (prev == '[' /* || prev == '{' */)
+ {
+ verbatim = TRUE;
+ vStringClear (verbatimCloser);
+ vStringClear (lastLine);
+ if (prev == '{')
+ vStringPut (verbatimCloser, '}');
+ else
+ {
+ vStringPut (verbatimCloser, ']');
+ align = TRUE;
+ }
+ vStringNCat (verbatimCloser, string, vStringLength (string) - 1);
+ vStringClear (string);
+ }
+ if (verbatim && align)
+ {
+ do
+ c = fileGetc ();
+ while (isspace (c));
+ }
+ }
+ else if (c == '%')
+ c = parseEscapedCharacter ();
+ if (! end)
+ {
+ vStringPut (string, c);
+ if (verbatim)
+ {
+ vStringPut (lastLine, c);
+ vStringTerminate (lastLine);
+ }
+ prev = c;
+ }
+ }
+ vStringTerminate (string);
+ vStringDelete (lastLine);
+ vStringDelete (verbatimCloser);
+}
+
+/* Read a C identifier beginning with "firstChar" and places it into "name".
+ */
+static void parseIdentifier (vString *const string, const int firstChar)
+{
+ int c = firstChar;
+
+ do
+ {
+ vStringPut (string, c);
+ c = fileGetc ();
+ } while (isident (c));
+
+ vStringTerminate (string);
+ if (!isspace (c))
+ fileUngetc (c); /* unget non-identifier character */
+}
+
+static void parseFreeOperator (vString *const string, const int firstChar)
+{
+ int c = firstChar;
+
+ do
+ {
+ vStringPut (string, c);
+ c = fileGetc ();
+ } while (c > ' ');
+
+ vStringTerminate (string);
+ if (!isspace (c))
+ fileUngetc (c); /* unget non-identifier character */
+}
+
+static void copyToken (tokenInfo* dst, const tokenInfo *src)
+{
+ dst->type = src->type;
+ dst->keyword = src->keyword;
+ dst->isExported = src->isExported;
+
+ vStringCopy (dst->string, src->string);
+ vStringCopy (dst->className, src->className);
+ vStringCopy (dst->featureName, src->featureName);
+}
+
+static tokenInfo *newToken (void)
+{
+ tokenInfo *const token = xMalloc (1, tokenInfo);
+
+ token->type = TOKEN_UNDEFINED;
+ token->keyword = KEYWORD_NONE;
+ token->isExported = TRUE;
+
+ token->string = vStringNew ();
+ token->className = vStringNew ();
+ token->featureName = vStringNew ();
+
+ return token;
+}
+
+static void deleteToken (tokenInfo *const token)
+{
+ vStringDelete (token->string);
+ vStringDelete (token->className);
+ vStringDelete (token->featureName);
+
+ eFree (token);
+}
+
+static void readToken (tokenInfo *const token)
+{
+ int c;
+
+ token->type = TOKEN_UNDEFINED;
+ token->keyword = KEYWORD_NONE;
+ vStringClear (token->string);
+
+getNextChar:
+
+ do
+ c = fileGetc ();
+ while (c == '\t' || c == ' ' || c == '\n');
+
+ switch (c)
+ {
+ case EOF: longjmp (Exception, (int)ExceptionEOF); break;
+ case ';': token->type = TOKEN_SEMICOLON; break;
+ case '!': token->type = TOKEN_BANG; break;
+ case '}': token->type = TOKEN_CLOSE_BRACE; break;
+ case ']': token->type = TOKEN_CLOSE_BRACKET; break;
+ case ')': token->type = TOKEN_CLOSE_PAREN; break;
+ case ',': token->type = TOKEN_COMMA; break;
+ case '$': token->type = TOKEN_DOLLAR; break;
+ case '.': token->type = TOKEN_DOT; break;
+ case '{': token->type = TOKEN_OPEN_BRACE; break;
+ case '[': token->type = TOKEN_OPEN_BRACKET; break;
+ case '(': token->type = TOKEN_OPEN_PAREN; break;
+ case '~': token->type = TOKEN_TILDE; break;
+
+
+ case '+':
+ case '*':
+ case '^':
+ case '=': token->type = TOKEN_OPERATOR; break;
+
+ case '-':
+ c = fileGetc ();
+ if (c == '>')
+ token->type = TOKEN_CONSTRAINT;
+ else if (c == '-') /* is this the start of a comment? */
+ {
+ skipToCharacter ('\n');
+ goto getNextChar;
+ }
+ else
+ {
+ if (!isspace (c))
+ fileUngetc (c);
+ token->type = TOKEN_OPERATOR;
+ }
+ break;
+
+ case '?':
+ case ':':
+ {
+ int c2 = fileGetc ();
+ if (c2 == '=')
+ token->type = TOKEN_OPERATOR;
+ else
+ {
+ if (!isspace (c2))
+ fileUngetc (c2);
+ if (c == ':')
+ token->type = TOKEN_COLON;
+ else
+ token->type = TOKEN_QUESTION;
+ }
+ break;
+ }
+
+ case '<':
+ c = fileGetc ();
+ if (c != '=' && c != '>' && !isspace (c))
+ fileUngetc (c);
+ token->type = TOKEN_OPERATOR;
+ break;
+
+ case '>':
+ c = fileGetc ();
+ if (c != '=' && c != '>' && !isspace (c))
+ fileUngetc (c);
+ token->type = TOKEN_OPERATOR;
+ break;
+
+ case '/':
+ c = fileGetc ();
+ if (c != '/' && c != '=' && !isspace (c))
+ fileUngetc (c);
+ token->type = TOKEN_OPERATOR;
+ break;
+
+ case '\\':
+ c = fileGetc ();
+ if (c != '\\' && !isspace (c))
+ fileUngetc (c);
+ token->type = TOKEN_OPERATOR;
+ break;
+
+ case '"':
+ token->type = TOKEN_STRING;
+ parseString (token->string);
+ break;
+
+ case '\'':
+ token->type = TOKEN_CHARACTER;
+ parseCharacter ();
+ break;
+
+ default:
+ if (isalpha (c))
+ {
+ parseIdentifier (token->string, c);
+ token->keyword = analyzeToken (token->string, Lang_eiffel);
+ if (isKeyword (token, KEYWORD_NONE))
+ token->type = TOKEN_IDENTIFIER;
+ else
+ token->type = TOKEN_KEYWORD;
+ }
+ else if (isdigit (c))
+ {
+ vString* numeric = parseNumeric (c);
+ vStringCat (token->string, numeric);
+ vStringDelete (numeric);
+ token->type = TOKEN_NUMERIC;
+ }
+ else if (isFreeOperatorChar (c))
+ {
+ parseFreeOperator (token->string, c);
+ token->type = TOKEN_OPERATOR;
+ }
+ else
+ {
+ token->type = TOKEN_UNDEFINED;
+ Assert (! isType (token, TOKEN_UNDEFINED));
+ }
+ break;
+ }
+}
+
+/*
+* Scanning functions
+*/
+
+static boolean isIdentifierMatch (
+ const tokenInfo *const token, const char *const name)
+{
+ return (boolean) (isType (token, TOKEN_IDENTIFIER) &&
+ strcasecmp (vStringValue (token->string), name) == 0);
+}
+
+static void findToken (tokenInfo *const token, const tokenType type)
+{
+ while (! isType (token, type))
+ readToken (token);
+}
+
+static void findKeyword (tokenInfo *const token, const keywordId keyword)
+{
+ while (! isKeyword (token, keyword))
+ readToken (token);
+}
+
+static boolean parseType (tokenInfo *const token);
+
+static void parseGeneric (tokenInfo *const token, boolean declaration __unused__)
+{
+ unsigned int depth = 0;
+#ifdef TYPE_REFERENCE_TOOL
+ boolean constraint = FALSE;
+#endif
+ Assert (isType (token, TOKEN_OPEN_BRACKET));
+ do
+ {
+ if (isType (token, TOKEN_OPEN_BRACKET))
+ {
+ ++depth;
+ readToken (token);
+ }
+ else if (isType (token, TOKEN_CLOSE_BRACKET))
+ {
+ --depth;
+ readToken (token);
+ }
+#ifdef TYPE_REFERENCE_TOOL
+ else if (declaration)
+ {
+ boolean advanced = FALSE;
+ if (depth == 1)
+ {
+ if (isType (token, TOKEN_CONSTRAINT))
+ constraint = TRUE;
+ else if (isKeyword (token, KEYWORD_create))
+ findKeyword (token, KEYWORD_end);
+ else if (isType (token, TOKEN_IDENTIFIER))
+ {
+ if (constraint)
+ advanced = parseType (token);
+ else
+ addGenericName (token);
+ constraint = FALSE;
+ }
+ }
+ else if (isType (token, TOKEN_IDENTIFIER))
+ advanced = parseType (token);
+ if (! advanced)
+ readToken (token);
+ }
+#endif
+ else
+ parseType (token);
+ } while (depth > 0);
+}
+
+static boolean parseType (tokenInfo *const token)
+{
+ tokenInfo* const id = newToken ();
+ copyToken (id, token);
+ readToken (token);
+ if (isType (token, TOKEN_COLON)) /* check for "{entity: TYPE}" */
+ {
+ readToken (id);
+ readToken (token);
+ }
+ if (isKeyword (id, KEYWORD_like))
+ {
+ if (isType (token, TOKEN_IDENTIFIER) ||
+ isKeyword (token, KEYWORD_Current))
+ readToken (token);
+ }
+ else
+ {
+ if (isKeyword (id, KEYWORD_expanded))
+ {
+ copyToken (id, token);
+ readToken (token);
+ }
+ if (isType (id, TOKEN_IDENTIFIER))
+ {
+#ifdef TYPE_REFERENCE_TOOL
+ reportType (id);
+#endif
+ if (isType (token, TOKEN_OPEN_BRACKET))
+ parseGeneric (token, FALSE);
+ else if ((strcmp ("BIT", vStringValue (id->string)) == 0))
+ readToken (token); /* read token after number of bits */
+ }
+ }
+ deleteToken (id);
+ return TRUE;
+}
+
+static void parseEntityType (tokenInfo *const token)
+{
+ Assert (isType (token, TOKEN_COLON));
+ readToken (token);
+
+ if (isType (token, TOKEN_BANG) || isType (token, TOKEN_QUESTION))
+ readToken (token); /* skip over '!' or '?' */
+ parseType (token);
+}
+
+static void parseLocal (tokenInfo *const token)
+{
+ Assert (isKeyword (token, KEYWORD_local));
+ readToken (token);
+
+ /* Check keyword first in case local clause is empty
+ */
+ while (! isKeyword (token, KEYWORD_do) &&
+ ! isKeyword (token, KEYWORD_once))
+ {
+#ifndef TYPE_REFERENCE_TOOL
+ if (isType (token, TOKEN_IDENTIFIER))
+ makeEiffelLocalTag (token);
+#endif
+ readToken (token);
+ if (isType (token, TOKEN_COLON))
+ parseEntityType (token);
+ }
+}
+
+static void findFeatureEnd (tokenInfo *const token)
+{
+ boolean isFound = isKeyword (token, KEYWORD_is);
+ if (isFound)
+ readToken (token);
+ switch (token->keyword)
+ {
+ case KEYWORD_deferred:
+ case KEYWORD_do:
+ case KEYWORD_external:
+ case KEYWORD_local:
+ case KEYWORD_obsolete:
+ case KEYWORD_once:
+ case KEYWORD_require:
+ {
+ int depth = 1;
+
+ while (depth > 0)
+ {
+#ifdef TYPE_REFERENCE_TOOL
+ if (isType (token, TOKEN_OPEN_BRACE))
+ {
+ readToken (token);
+ if (isType (token, TOKEN_IDENTIFIER))
+ parseType (token);
+ }
+ else if (isType (token, TOKEN_BANG))
+ {
+ readToken (token);
+ if (isType (token, TOKEN_IDENTIFIER))
+ parseType (token);
+ if (isType (token, TOKEN_BANG))
+ readToken (token);
+ }
+ else
+#endif
+ switch (token->keyword)
+ {
+ case KEYWORD_check:
+ case KEYWORD_debug:
+ case KEYWORD_from:
+ case KEYWORD_if:
+ case KEYWORD_inspect:
+ ++depth;
+ break;
+
+ case KEYWORD_local:
+ parseLocal (token);
+ break;
+
+ case KEYWORD_end:
+ --depth;
+ break;
+
+ default:
+ break;
+ }
+ readToken (token);
+ }
+ break;
+ }
+
+ default:
+ /* is this a manifest constant? */
+ if (isFound || isType (token, TOKEN_OPERATOR)) {
+ if (isType (token, TOKEN_OPERATOR))
+ readToken (token);
+ readToken (token);
+ }
+ break;
+ }
+}
+
+static boolean readFeatureName (tokenInfo *const token)
+{
+ boolean isFeatureName = FALSE;
+
+ if (isKeyword (token, KEYWORD_frozen))
+ readToken (token);
+ if (isType (token, TOKEN_IDENTIFIER))
+ isFeatureName = TRUE;
+ else if (isKeyword (token, KEYWORD_assign)) /* legacy code */
+ isFeatureName = TRUE;
+ else if (isKeyword (token, KEYWORD_infix) ||
+ isKeyword (token, KEYWORD_prefix))
+ {
+ readToken (token);
+ if (isType (token, TOKEN_STRING))
+ isFeatureName = TRUE;
+ }
+ return isFeatureName;
+}
+
+static void parseArguments (tokenInfo *const token)
+{
+#ifndef TYPE_REFERENCE_TOOL
+ findToken (token, TOKEN_CLOSE_PAREN);
+ readToken (token);
+#else
+ Assert (isType (token, TOKEN_OPEN_PAREN));
+ readToken (token);
+ do
+ {
+ if (isType (token, TOKEN_COLON))
+ parseEntityType (token);
+ else
+ readToken (token);
+ } while (! isType (token, TOKEN_CLOSE_PAREN));
+ readToken (token);
+#endif
+}
+
+static boolean parseFeature (tokenInfo *const token)
+{
+ boolean found = FALSE;
+ while (readFeatureName (token))
+ {
+ found = TRUE;
+#ifndef TYPE_REFERENCE_TOOL
+ makeEiffelFeatureTag (token);
+#endif
+ readToken (token);
+ if (isType (token, TOKEN_COMMA))
+ readToken (token);
+ }
+ if (found)
+ {
+ if (isKeyword (token, KEYWORD_alias)) {
+ readToken (token);
+#ifndef TYPE_REFERENCE_TOOL
+ if (isType (token, TOKEN_STRING))
+ makeEiffelFeatureTag (token);
+#endif
+ readToken (token);
+ }
+ if (isType (token, TOKEN_OPEN_PAREN)) /* arguments? */
+ parseArguments (token);
+ if (isType (token, TOKEN_COLON)) /* a query? */
+ parseEntityType (token);
+ if (isKeyword (token, KEYWORD_assign))
+ {
+ readToken (token);
+ readToken (token);
+ }
+ if (isKeyword (token, KEYWORD_obsolete))
+ {
+ readToken (token);
+ if (isType (token, TOKEN_STRING))
+ readToken (token);
+ }
+ findFeatureEnd (token);
+ }
+ return found;
+}
+
+static void parseExport (tokenInfo *const token)
+{
+ token->isExported = TRUE;
+ readToken (token);
+ if (isType (token, TOKEN_OPEN_BRACE))
+ {
+ token->isExported = FALSE;
+ while (! isType (token, TOKEN_CLOSE_BRACE))
+ {
+ if (isType (token, TOKEN_IDENTIFIER))
+ token->isExported |= !isIdentifierMatch (token, "NONE");
+ readToken (token);
+ }
+ readToken (token);
+ }
+}
+
+static void parseFeatureClauses (tokenInfo *const token)
+{
+ Assert (isKeyword (token, KEYWORD_feature));
+ do
+ {
+ if (isKeyword (token, KEYWORD_feature))
+ parseExport (token);
+ if (! isKeyword (token, KEYWORD_feature) &&
+ ! isKeyword (token, KEYWORD_invariant) &&
+ ! isKeyword (token, KEYWORD_indexing))
+ {
+ if (! parseFeature (token))
+ readToken (token);
+ }
+ } while (! isKeyword (token, KEYWORD_end) &&
+ ! isKeyword (token, KEYWORD_invariant) &&
+ ! isKeyword (token, KEYWORD_indexing));
+}
+
+static void parseRename (tokenInfo *const token)
+{
+ Assert (isKeyword (token, KEYWORD_rename));
+ do {
+ readToken (token);
+ if (readFeatureName (token))
+ {
+ readToken (token);
+ if (isKeyword (token, KEYWORD_as))
+ {
+ readToken (token);
+ if (readFeatureName (token))
+ {
+#ifndef TYPE_REFERENCE_TOOL
+ makeEiffelFeatureTag (token); /* renamed feature */
+#endif
+ readToken (token);
+ }
+ }
+ }
+ } while (isType (token, TOKEN_COMMA));
+}
+
+static void parseInherit (tokenInfo *const token)
+{
+ Assert (isKeyword (token, KEYWORD_inherit));
+ readToken (token);
+ while (isType (token, TOKEN_IDENTIFIER))
+ {
+ parseType (token);
+ if (isType (token, TOKEN_KEYWORD))
+ {
+ switch (token->keyword) /* check for feature adaptation */
+ {
+ case KEYWORD_rename:
+ parseRename (token);
+ case KEYWORD_export:
+ case KEYWORD_undefine:
+ case KEYWORD_redefine:
+ case KEYWORD_select:
+ findKeyword (token, KEYWORD_end);
+ readToken (token);
+ break;
+
+ case KEYWORD_end:
+ readToken (token);
+ break;
+
+ default: break;
+ }
+ }
+ if (isType (token, TOKEN_SEMICOLON))
+ readToken (token);
+ }
+}
+
+static void parseConvert (tokenInfo *const token)
+{
+ Assert (isKeyword (token, KEYWORD_convert));
+ do
+ {
+ readToken (token);
+ if (! isType (token, TOKEN_IDENTIFIER))
+ break;
+ else if (isType (token, TOKEN_OPEN_PAREN))
+ {
+ while (! isType (token, TOKEN_CLOSE_PAREN))
+ readToken (token);
+ }
+ else if (isType (token, TOKEN_COLON))
+ {
+ readToken (token);
+ if (! isType (token, TOKEN_OPEN_BRACE))
+ break;
+ else while (! isType (token, TOKEN_CLOSE_BRACE))
+ readToken (token);
+ }
+ } while (isType (token, TOKEN_COMMA));
+}
+
+static void parseClass (tokenInfo *const token)
+{
+ Assert (isKeyword (token, KEYWORD_class));
+ readToken (token);
+ if (isType (token, TOKEN_IDENTIFIER))
+ {
+#ifndef TYPE_REFERENCE_TOOL
+ makeEiffelClassTag (token);
+ readToken (token);
+#else
+ vStringCopy (token->className, token->string);
+ vStringUpper (token->className);
+ if (PrintClass)
+ puts (vStringValue (token->className));
+ if (! PrintReferences)
+ exit (0);
+ readToken (token);
+#endif
+ }
+
+ do
+ {
+ if (isType (token, TOKEN_OPEN_BRACKET))
+ parseGeneric (token, TRUE);
+ else if (! isType (token, TOKEN_KEYWORD))
+ readToken (token);
+ else switch (token->keyword)
+ {
+ case KEYWORD_inherit: parseInherit (token); break;
+ case KEYWORD_feature: parseFeatureClauses (token); break;
+ case KEYWORD_convert: parseConvert (token); break;
+ default: readToken (token); break;
+ }
+ } while (! isKeyword (token, KEYWORD_end));
+}
+
+static void initialize (const langType language)
+{
+ Lang_eiffel = language;
+ buildEiffelKeywordHash ();
+}
+
+static void findEiffelTags (void)
+{
+ tokenInfo *const token = newToken ();
+ exception_t exception;
+
+ exception = (exception_t) (setjmp (Exception));
+ while (exception == ExceptionNone)
+ {
+ findKeyword (token, KEYWORD_class);
+ parseClass (token);
+ }
+ deleteToken (token);
+}
+
+#ifndef TYPE_REFERENCE_TOOL
+
+extern parserDefinition* EiffelParser (void)
+{
+ static const char *const extensions [] = { "e", NULL };
+ parserDefinition* def = parserNew ("Eiffel");
+ def->kinds = EiffelKinds;
+ def->kindCount = KIND_COUNT (EiffelKinds);
+ def->extensions = extensions;
+ def->parser = findEiffelTags;
+ def->initialize = initialize;
+ return def;
+}
+
+#else
+
+static void findReferences (void)
+{
+ ReferencedTypes = stringListNew ();
+ GenericNames = stringListNew ();
+ initialize (0);
+
+ findEiffelTags ();
+
+ stringListDelete (GenericNames);
+ GenericNames = NULL;
+ stringListDelete (ReferencedTypes);
+ ReferencedTypes = NULL;
+}
+
+static const char *const Usage =
+ "Prints names of types referenced by an Eiffel language file.\n"
+ "\n"
+ "Usage: %s [-cdrs] [file_name | -]\n"
+ "\n"
+ "Options:\n"
+ " -c Print class name of current file (on first line of output).\n"
+ " -d Enable debug output.\n"
+ " -r Print types referenced by current file (default unless -c).\n"
+ " -s Include self-references.\n"
+ "\n";
+
+extern int main (int argc, char** argv)
+{
+ int i;
+ for (i = 1 ; argv [i] != NULL ; ++i)
+ {
+ const char *const arg = argv [i];
+ if (arg [0] == '-')
+ {
+ int j;
+ if (arg [1] == '\0')
+ {
+ File = stdin;
+ FileName = "stdin";
+ }
+ else for (j = 1 ; arg [j] != '\0' ; ++j) switch (arg [j])
+ {
+ case 'c': PrintClass = 1; break;
+ case 'r': PrintReferences = 1; break;
+ case 's': SelfReferences = 1; break;
+ case 'd': Debug = 1; break;
+ default:
+ fprintf (errout, "%s: unknown option: %c\n", argv [0], arg [1]);
+ fprintf (errout, Usage, argv [0]);
+ exit (1);
+ break;
+ }
+ }
+ else if (File != NULL)
+ {
+ fprintf (errout, Usage, argv [0]);
+ exit (1);
+ }
+ else
+ {
+ FileName = arg;
+ File = fopen (FileName, "r");
+ if (File == NULL)
+ {
+ perror (argv [0]);
+ exit (1);
+ }
+ }
+ }
+ if (! PrintClass)
+ PrintReferences = 1;
+ if (File == NULL)
+ {
+ fprintf (errout, Usage, argv [0]);
+ exit (1);
+ }
+ else
+ {
+ findReferences ();
+ fclose (File);
+ }
+ return 0;
+}
+
+#endif
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/entry.c b/entry.c
new file mode 100644
index 0000000..3890e50
--- /dev/null
+++ b/entry.c
@@ -0,0 +1,847 @@
+/*
+* $Id: entry.c 443 2006-05-30 04:37:13Z darren $
+*
+* Copyright (c) 1996-2002, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* This module contains functions for creating tag entries.
+*/
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include <string.h>
+#include <ctype.h> /* to define isspace () */
+#include <errno.h>
+
+#if defined (HAVE_SYS_TYPES_H)
+# include <sys/types.h> /* to declare off_t on some hosts */
+#endif
+#if defined (HAVE_TYPES_H)
+# include <types.h> /* to declare off_t on some hosts */
+#endif
+#if defined (HAVE_UNISTD_H)
+# include <unistd.h> /* to declare close (), ftruncate (), truncate () */
+#endif
+
+/* These header files provide for the functions necessary to do file
+ * truncation.
+ */
+#ifdef HAVE_FCNTL_H
+# include <fcntl.h>
+#endif
+#ifdef HAVE_IO_H
+# include <io.h>
+#endif
+
+#include "debug.h"
+#include "ctags.h"
+#include "entry.h"
+#include "main.h"
+#include "options.h"
+#include "read.h"
+#include "routines.h"
+#include "sort.h"
+#include "strlist.h"
+
+/*
+* MACROS
+*/
+#define PSEUDO_TAG_PREFIX "!_"
+
+#define includeExtensionFlags() (Option.tagFileFormat > 1)
+
+/*
+ * Portability defines
+ */
+#if !defined(HAVE_TRUNCATE) && !defined(HAVE_FTRUNCATE) && !defined(HAVE_CHSIZE)
+# define USE_REPLACEMENT_TRUNCATE
+#endif
+
+/* Hack for rediculous practice of Microsoft Visual C++.
+ */
+#if defined (WIN32) && defined (_MSC_VER)
+# define chsize _chsize
+# define open _open
+# define close _close
+# define O_RDWR _O_RDWR
+#endif
+
+/*
+* DATA DEFINITIONS
+*/
+
+tagFile TagFile = {
+ NULL, /* tag file name */
+ NULL, /* tag file directory (absolute) */
+ NULL, /* file pointer */
+ { 0, 0 }, /* numTags */
+ { 0, 0, 0 }, /* max */
+ { NULL, NULL, 0 }, /* etags */
+ NULL /* vLine */
+};
+
+static boolean TagsToStdout = FALSE;
+
+/*
+* FUNCTION PROTOTYPES
+*/
+#ifdef NEED_PROTO_TRUNCATE
+extern int truncate (const char *path, off_t length);
+#endif
+
+#ifdef NEED_PROTO_FTRUNCATE
+extern int ftruncate (int fd, off_t length);
+#endif
+
+/*
+* FUNCTION DEFINITIONS
+*/
+
+extern void freeTagFileResources (void)
+{
+ if (TagFile.directory != NULL)
+ eFree (TagFile.directory);
+ vStringDelete (TagFile.vLine);
+}
+
+extern const char *tagFileName (void)
+{
+ return TagFile.name;
+}
+
+/*
+* Pseudo tag support
+*/
+
+static void rememberMaxLengths (const size_t nameLength, const size_t lineLength)
+{
+ if (nameLength > TagFile.max.tag)
+ TagFile.max.tag = nameLength;
+
+ if (lineLength > TagFile.max.line)
+ TagFile.max.line = lineLength;
+}
+
+static void writePseudoTag (
+ const char *const tagName,
+ const char *const fileName,
+ const char *const pattern)
+{
+ const int length = fprintf (
+ TagFile.fp, "%s%s\t%s\t/%s/\n",
+ PSEUDO_TAG_PREFIX, tagName, fileName, pattern);
+ ++TagFile.numTags.added;
+ rememberMaxLengths (strlen (tagName), (size_t) length);
+}
+
+static void addPseudoTags (void)
+{
+ if (! Option.xref)
+ {
+ char format [11];
+ const char *formatComment = "unknown format";
+
+ sprintf (format, "%u", Option.tagFileFormat);
+
+ if (Option.tagFileFormat == 1)
+ formatComment = "original ctags format";
+ else if (Option.tagFileFormat == 2)
+ formatComment =
+ "extended format; --format=1 will not append ;\" to lines";
+
+ writePseudoTag ("TAG_FILE_FORMAT", format, formatComment);
+ writePseudoTag ("TAG_FILE_SORTED",
+ Option.sorted == SO_FOLDSORTED ? "2" :
+ (Option.sorted == SO_SORTED ? "1" : "0"),
+ "0=unsorted, 1=sorted, 2=foldcase");
+ writePseudoTag ("TAG_PROGRAM_AUTHOR", AUTHOR_NAME, AUTHOR_EMAIL);
+ writePseudoTag ("TAG_PROGRAM_NAME", PROGRAM_NAME, "");
+ writePseudoTag ("TAG_PROGRAM_URL", PROGRAM_URL, "official site");
+ writePseudoTag ("TAG_PROGRAM_VERSION", PROGRAM_VERSION, "");
+ }
+}
+
+static void updateSortedFlag (
+ const char *const line, FILE *const fp, fpos_t startOfLine)
+{
+ const char *const tab = strchr (line, '\t');
+
+ if (tab != NULL)
+ {
+ const long boolOffset = tab - line + 1; /* where it should be */
+
+ if (line [boolOffset] == '0' || line [boolOffset] == '1')
+ {
+ fpos_t nextLine;
+
+ if (fgetpos (fp, &nextLine) == -1 || fsetpos (fp, &startOfLine) == -1)
+ error (WARNING, "Failed to update 'sorted' pseudo-tag");
+ else
+ {
+ fpos_t flagLocation;
+ int c, d;
+
+ do
+ c = fgetc (fp);
+ while (c != '\t' && c != '\n');
+ fgetpos (fp, &flagLocation);
+ d = fgetc (fp);
+ if (c == '\t' && (d == '0' || d == '1') &&
+ d != (int) Option.sorted)
+ {
+ fsetpos (fp, &flagLocation);
+ fputc (Option.sorted == SO_FOLDSORTED ? '2' :
+ (Option.sorted == SO_SORTED ? '1' : '0'), fp);
+ }
+ fsetpos (fp, &nextLine);
+ }
+ }
+ }
+}
+
+/* Look through all line beginning with "!_TAG_FILE", and update those which
+ * require it.
+ */
+static long unsigned int updatePseudoTags (FILE *const fp)
+{
+ enum { maxEntryLength = 20 };
+ char entry [maxEntryLength + 1];
+ unsigned long linesRead = 0;
+ fpos_t startOfLine;
+ size_t entryLength;
+ const char *line;
+
+ sprintf (entry, "%sTAG_FILE", PSEUDO_TAG_PREFIX);
+ entryLength = strlen (entry);
+ Assert (entryLength < maxEntryLength);
+
+ fgetpos (fp, &startOfLine);
+ line = readLine (TagFile.vLine, fp);
+ while (line != NULL && line [0] == entry [0])
+ {
+ ++linesRead;
+ if (strncmp (line, entry, entryLength) == 0)
+ {
+ char tab, classType [16];
+
+ if (sscanf (line + entryLength, "%15s%c", classType, &tab) == 2 &&
+ tab == '\t')
+ {
+ if (strcmp (classType, "_SORTED") == 0)
+ updateSortedFlag (line, fp, startOfLine);
+ }
+ fgetpos (fp, &startOfLine);
+ }
+ line = readLine (TagFile.vLine, fp);
+ }
+ while (line != NULL) /* skip to end of file */
+ {
+ ++linesRead;
+ line = readLine (TagFile.vLine, fp);
+ }
+ return linesRead;
+}
+
+/*
+ * Tag file management
+ */
+
+static boolean isValidTagAddress (const char *const excmd)
+{
+ boolean isValid = FALSE;
+
+ if (strchr ("/?", excmd [0]) != NULL)
+ isValid = TRUE;
+ else
+ {
+ char *address = xMalloc (strlen (excmd) + 1, char);
+ if (sscanf (excmd, "%[^;\n]", address) == 1 &&
+ strspn (address,"0123456789") == strlen (address))
+ isValid = TRUE;
+ eFree (address);
+ }
+ return isValid;
+}
+
+static boolean isCtagsLine (const char *const line)
+{
+ enum fieldList { TAG, TAB1, SRC_FILE, TAB2, EXCMD, NUM_FIELDS };
+ boolean ok = FALSE; /* we assume not unless confirmed */
+ const size_t fieldLength = strlen (line) + 1;
+ char *const fields = xMalloc (NUM_FIELDS * fieldLength, char);
+
+ if (fields == NULL)
+ error (FATAL, "Cannot analyze tag file");
+ else
+ {
+#define field(x) (fields + ((size_t) (x) * fieldLength))
+
+ const int numFields = sscanf (
+ line, "%[^\t]%[\t]%[^\t]%[\t]%[^\r\n]",
+ field (TAG), field (TAB1), field (SRC_FILE),
+ field (TAB2), field (EXCMD));
+
+ /* There must be exactly five fields: two tab fields containing
+ * exactly one tab each, the tag must not begin with "#", and the
+ * file name should not end with ";", and the excmd must be
+ * accceptable.
+ *
+ * These conditions will reject tag-looking lines like:
+ * int a; <C-comment>
+ * #define LABEL <C-comment>
+ */
+ if (numFields == NUM_FIELDS &&
+ strlen (field (TAB1)) == 1 &&
+ strlen (field (TAB2)) == 1 &&
+ field (TAG) [0] != '#' &&
+ field (SRC_FILE) [strlen (field (SRC_FILE)) - 1] != ';' &&
+ isValidTagAddress (field (EXCMD)))
+ ok = TRUE;
+
+ eFree (fields);
+ }
+ return ok;
+}
+
+static boolean isEtagsLine (const char *const line)
+{
+ boolean result = FALSE;
+ if (line [0] == '\f')
+ result = (boolean) (line [1] == '\n' || line [1] == '\r');
+ return result;
+}
+
+static boolean isTagFile (const char *const filename)
+{
+ boolean ok = FALSE; /* we assume not unless confirmed */
+ FILE *const fp = fopen (filename, "rb");
+
+ if (fp == NULL && errno == ENOENT)
+ ok = TRUE;
+ else if (fp != NULL)
+ {
+ const char *line = readLine (TagFile.vLine, fp);
+
+ if (line == NULL)
+ ok = TRUE;
+ else
+ ok = (boolean) (isCtagsLine (line) || isEtagsLine (line));
+ fclose (fp);
+ }
+ return ok;
+}
+
+extern void copyBytes (FILE* const fromFp, FILE* const toFp, const long size)
+{
+ enum { BufferSize = 1000 };
+ long toRead, numRead;
+ char* buffer = xMalloc (BufferSize, char);
+ long remaining = size;
+ do
+ {
+ toRead = (0 < remaining && remaining < BufferSize) ?
+ remaining : (long) BufferSize;
+ numRead = fread (buffer, (size_t) 1, (size_t) toRead, fromFp);
+ if (fwrite (buffer, (size_t)1, (size_t)numRead, toFp) < (size_t)numRead)
+ error (FATAL | PERROR, "cannot complete write");
+ if (remaining > 0)
+ remaining -= numRead;
+ } while (numRead == toRead && remaining != 0);
+ eFree (buffer);
+}
+
+extern void copyFile (const char *const from, const char *const to, const long size)
+{
+ FILE* const fromFp = fopen (from, "rb");
+ if (fromFp == NULL)
+ error (FATAL | PERROR, "cannot open file to copy");
+ else
+ {
+ FILE* const toFp = fopen (to, "wb");
+ if (toFp == NULL)
+ error (FATAL | PERROR, "cannot open copy destination");
+ else
+ {
+ copyBytes (fromFp, toFp, size);
+ fclose (toFp);
+ }
+ fclose (fromFp);
+ }
+}
+
+extern void openTagFile (void)
+{
+ setDefaultTagFileName ();
+ TagsToStdout = isDestinationStdout ();
+
+ if (TagFile.vLine == NULL)
+ TagFile.vLine = vStringNew ();
+
+ /* Open the tags file.
+ */
+ if (TagsToStdout)
+ TagFile.fp = tempFile ("w", &TagFile.name);
+ else
+ {
+ boolean fileExists;
+
+ setDefaultTagFileName ();
+ TagFile.name = eStrdup (Option.tagFileName);
+ fileExists = doesFileExist (TagFile.name);
+ if (fileExists && ! isTagFile (TagFile.name))
+ error (FATAL,
+ "\"%s\" doesn't look like a tag file; I refuse to overwrite it.",
+ TagFile.name);
+
+ if (Option.etags)
+ {
+ if (Option.append && fileExists)
+ TagFile.fp = fopen (TagFile.name, "a+b");
+ else
+ TagFile.fp = fopen (TagFile.name, "w+b");
+ }
+ else
+ {
+ if (Option.append && fileExists)
+ {
+ TagFile.fp = fopen (TagFile.name, "r+");
+ if (TagFile.fp != NULL)
+ {
+ TagFile.numTags.prev = updatePseudoTags (TagFile.fp);
+ fclose (TagFile.fp);
+ TagFile.fp = fopen (TagFile.name, "a+");
+ }
+ }
+ else
+ {
+ TagFile.fp = fopen (TagFile.name, "w");
+ if (TagFile.fp != NULL)
+ addPseudoTags ();
+ }
+ }
+ if (TagFile.fp == NULL)
+ {
+ error (FATAL | PERROR, "cannot open tag file");
+ exit (1);
+ }
+ }
+ if (TagsToStdout)
+ TagFile.directory = eStrdup (CurrentDirectory);
+ else
+ TagFile.directory = absoluteDirname (TagFile.name);
+}
+
+#ifdef USE_REPLACEMENT_TRUNCATE
+
+/* Replacement for missing library function.
+ */
+static int replacementTruncate (const char *const name, const long size)
+{
+ char *tempName = NULL;
+ FILE *fp = tempFile ("w", &tempName);
+ fclose (fp);
+ copyFile (name, tempName, size);
+ copyFile (tempName, name, WHOLE_FILE);
+ remove (tempName);
+ eFree (tempName);
+
+ return 0;
+}
+
+#endif
+
+static void sortTagFile (void)
+{
+ if (TagFile.numTags.added > 0L)
+ {
+ if (Option.sorted != SO_UNSORTED)
+ {
+ verbose ("sorting tag file\n");
+#ifdef EXTERNAL_SORT
+ externalSortTags (TagsToStdout);
+#else
+ internalSortTags (TagsToStdout);
+#endif
+ }
+ else if (TagsToStdout)
+ catFile (tagFileName ());
+ }
+ if (TagsToStdout)
+ remove (tagFileName ()); /* remove temporary file */
+}
+
+static void resizeTagFile (const long newSize)
+{
+ int result;
+
+#ifdef USE_REPLACEMENT_TRUNCATE
+ result = replacementTruncate (TagFile.name, newSize);
+#else
+# ifdef HAVE_TRUNCATE
+ result = truncate (TagFile.name, (off_t) newSize);
+# else
+ const int fd = open (TagFile.name, O_RDWR);
+
+ if (fd == -1)
+ result = -1;
+ else
+ {
+# ifdef HAVE_FTRUNCATE
+ result = ftruncate (fd, (off_t) newSize);
+# else
+# ifdef HAVE_CHSIZE
+ result = chsize (fd, newSize);
+# endif
+# endif
+ close (fd);
+ }
+# endif
+#endif
+ if (result == -1)
+ fprintf (errout, "Cannot shorten tag file: errno = %d\n", errno);
+}
+
+static void writeEtagsIncludes (FILE *const fp)
+{
+ if (Option.etagsInclude)
+ {
+ unsigned int i;
+ for (i = 0 ; i < stringListCount (Option.etagsInclude) ; ++i)
+ {
+ vString *item = stringListItem (Option.etagsInclude, i);
+ fprintf (fp, "\f\n%s,include\n", vStringValue (item));
+ }
+ }
+}
+
+extern void closeTagFile (const boolean resize)
+{
+ long desiredSize, size;
+
+ if (Option.etags)
+ writeEtagsIncludes (TagFile.fp);
+ desiredSize = ftell (TagFile.fp);
+ fseek (TagFile.fp, 0L, SEEK_END);
+ size = ftell (TagFile.fp);
+ fclose (TagFile.fp);
+ if (resize && desiredSize < size)
+ {
+ DebugStatement (
+ debugPrintf (DEBUG_STATUS, "shrinking %s from %ld to %ld bytes\n",
+ TagFile.name, size, desiredSize); )
+ resizeTagFile (desiredSize);
+ }
+ sortTagFile ();
+ eFree (TagFile.name);
+ TagFile.name = NULL;
+}
+
+extern void beginEtagsFile (void)
+{
+ TagFile.etags.fp = tempFile ("w+b", &TagFile.etags.name);
+ TagFile.etags.byteCount = 0;
+}
+
+extern void endEtagsFile (const char *const name)
+{
+ const char *line;
+
+ fprintf (TagFile.fp, "\f\n%s,%ld\n", name, (long) TagFile.etags.byteCount);
+ if (TagFile.etags.fp != NULL)
+ {
+ rewind (TagFile.etags.fp);
+ while ((line = readLine (TagFile.vLine, TagFile.etags.fp)) != NULL)
+ fputs (line, TagFile.fp);
+ fclose (TagFile.etags.fp);
+ remove (TagFile.etags.name);
+ eFree (TagFile.etags.name);
+ TagFile.etags.fp = NULL;
+ TagFile.etags.name = NULL;
+ }
+}
+
+/*
+ * Tag entry management
+ */
+
+/* This function copies the current line out to a specified file. It has no
+ * effect on the fileGetc () function. During copying, any '\' characters
+ * are doubled and a leading '^' or trailing '$' is also quoted. End of line
+ * characters (line feed or carriage return) are dropped.
+ */
+static size_t writeSourceLine (FILE *const fp, const char *const line)
+{
+ size_t length = 0;
+ const char *p;
+
+ /* Write everything up to, but not including, a line end character.
+ */
+ for (p = line ; *p != '\0' ; ++p)
+ {
+ const int next = *(p + 1);
+ const int c = *p;
+
+ if (c == CRETURN || c == NEWLINE)
+ break;
+
+ /* If character is '\', or a terminal '$', then quote it.
+ */
+ if (c == BACKSLASH || c == (Option.backward ? '?' : '/') ||
+ (c == '$' && (next == NEWLINE || next == CRETURN)))
+ {
+ putc (BACKSLASH, fp);
+ ++length;
+ }
+ putc (c, fp);
+ ++length;
+ }
+ return length;
+}
+
+/* Writes "line", stripping leading and duplicate white space.
+ */
+static size_t writeCompactSourceLine (FILE *const fp, const char *const line)
+{
+ boolean lineStarted = FALSE;
+ size_t length = 0;
+ const char *p;
+ int c;
+
+ /* Write everything up to, but not including, the newline.
+ */
+ for (p = line, c = *p ; c != NEWLINE && c != '\0' ; c = *++p)
+ {
+ if (lineStarted || ! isspace (c)) /* ignore leading spaces */
+ {
+ lineStarted = TRUE;
+ if (isspace (c))
+ {
+ int next;
+
+ /* Consume repeating white space.
+ */
+ while (next = *(p+1) , isspace (next) && next != NEWLINE)
+ ++p;
+ c = ' '; /* force space character for any white space */
+ }
+ if (c != CRETURN || *(p + 1) != NEWLINE)
+ {
+ putc (c, fp);
+ ++length;
+ }
+ }
+ }
+ return length;
+}
+
+static int writeXrefEntry (const tagEntryInfo *const tag)
+{
+ const char *const line =
+ readSourceLine (TagFile.vLine, tag->filePosition, NULL);
+ int length;
+
+ if (Option.tagFileFormat == 1)
+ length = fprintf (TagFile.fp, "%-16s %4lu %-16s ", tag->name,
+ tag->lineNumber, tag->sourceFileName);
+ else
+ length = fprintf (TagFile.fp, "%-16s %-10s %4lu %-16s ", tag->name,
+ tag->kindName, tag->lineNumber, tag->sourceFileName);
+
+ length += writeCompactSourceLine (TagFile.fp, line);
+ putc (NEWLINE, TagFile.fp);
+ ++length;
+
+ return length;
+}
+
+/* Truncates the text line containing the tag at the character following the
+ * tag, providing a character which designates the end of the tag.
+ */
+static void truncateTagLine (
+ char *const line, const char *const token, const boolean discardNewline)
+{
+ char *p = strstr (line, token);
+
+ if (p != NULL)
+ {
+ p += strlen (token);
+ if (*p != '\0' && ! (*p == '\n' && discardNewline))
+ ++p; /* skip past character terminating character */
+ *p = '\0';
+ }
+}
+
+static int writeEtagsEntry (const tagEntryInfo *const tag)
+{
+ int length;
+
+ if (tag->isFileEntry)
+ length = fprintf (TagFile.etags.fp, "\177%s\001%lu,0\n",
+ tag->name, tag->lineNumber);
+ else
+ {
+ long seekValue;
+ char *const line =
+ readSourceLine (TagFile.vLine, tag->filePosition, &seekValue);
+
+ if (tag->truncateLine)
+ truncateTagLine (line, tag->name, TRUE);
+ else
+ line [strlen (line) - 1] = '\0';
+
+ length = fprintf (TagFile.etags.fp, "%s\177%s\001%lu,%ld\n", line,
+ tag->name, tag->lineNumber, seekValue);
+ }
+ TagFile.etags.byteCount += length;
+
+ return length;
+}
+
+static int addExtensionFields (const tagEntryInfo *const tag)
+{
+ const char* const kindKey = Option.extensionFields.kindKey ? "kind:" : "";
+ boolean first = TRUE;
+ const char* separator = ";\"";
+ const char* const empty = "";
+ int length = 0;
+/* "sep" returns a value only the first time it is evaluated */
+#define sep (first ? (first = FALSE, separator) : empty)
+
+ if (tag->kindName != NULL && (Option.extensionFields.kindLong ||
+ (Option.extensionFields.kind && tag->kind == '\0')))
+ length += fprintf (TagFile.fp,"%s\t%s%s", sep, kindKey, tag->kindName);
+ else if (tag->kind != '\0' && (Option.extensionFields.kind ||
+ (Option.extensionFields.kindLong && tag->kindName == NULL)))
+ length += fprintf (TagFile.fp, "%s\t%s%c", sep, kindKey, tag->kind);
+
+ if (Option.extensionFields.lineNumber)
+ length += fprintf (TagFile.fp, "%s\tline:%ld", sep, tag->lineNumber);
+
+ if (Option.extensionFields.language && tag->language != NULL)
+ length += fprintf (TagFile.fp, "%s\tlanguage:%s", sep, tag->language);
+
+ if (Option.extensionFields.scope &&
+ tag->extensionFields.scope [0] != NULL &&
+ tag->extensionFields.scope [1] != NULL)
+ length += fprintf (TagFile.fp, "%s\t%s:%s", sep,
+ tag->extensionFields.scope [0],
+ tag->extensionFields.scope [1]);
+
+ if (Option.extensionFields.typeRef &&
+ tag->extensionFields.typeRef [0] != NULL &&
+ tag->extensionFields.typeRef [1] != NULL)
+ length += fprintf (TagFile.fp, "%s\ttyperef:%s:%s", sep,
+ tag->extensionFields.typeRef [0],
+ tag->extensionFields.typeRef [1]);
+
+ if (Option.extensionFields.fileScope && tag->isFileScope)
+ length += fprintf (TagFile.fp, "%s\tfile:", sep);
+
+ if (Option.extensionFields.inheritance &&
+ tag->extensionFields.inheritance != NULL)
+ length += fprintf (TagFile.fp, "%s\tinherits:%s", sep,
+ tag->extensionFields.inheritance);
+
+ if (Option.extensionFields.access && tag->extensionFields.access != NULL)
+ length += fprintf (TagFile.fp, "%s\taccess:%s", sep,
+ tag->extensionFields.access);
+
+ if (Option.extensionFields.implementation &&
+ tag->extensionFields.implementation != NULL)
+ length += fprintf (TagFile.fp, "%s\timplementation:%s", sep,
+ tag->extensionFields.implementation);
+
+ if (Option.extensionFields.signature &&
+ tag->extensionFields.signature != NULL)
+ length += fprintf (TagFile.fp, "%s\tsignature:%s", sep,
+ tag->extensionFields.signature);
+
+ return length;
+#undef sep
+}
+
+static int writePatternEntry (const tagEntryInfo *const tag)
+{
+ char *const line = readSourceLine (TagFile.vLine, tag->filePosition, NULL);
+ const int searchChar = Option.backward ? '?' : '/';
+ boolean newlineTerminated;
+ int length = 0;
+
+ if (tag->truncateLine)
+ truncateTagLine (line, tag->name, FALSE);
+ newlineTerminated = (boolean) (line [strlen (line) - 1] == '\n');
+
+ length += fprintf (TagFile.fp, "%c^", searchChar);
+ length += writeSourceLine (TagFile.fp, line);
+ length += fprintf (TagFile.fp, "%s%c", newlineTerminated ? "$":"", searchChar);
+
+ return length;
+}
+
+static int writeLineNumberEntry (const tagEntryInfo *const tag)
+{
+ return fprintf (TagFile.fp, "%lu", tag->lineNumber);
+}
+
+static int writeCtagsEntry (const tagEntryInfo *const tag)
+{
+ int length = fprintf (TagFile.fp, "%s\t%s\t",
+ tag->name, tag->sourceFileName);
+
+ if (tag->lineNumberEntry)
+ length += writeLineNumberEntry (tag);
+ else
+ length += writePatternEntry (tag);
+
+ if (includeExtensionFlags ())
+ length += addExtensionFields (tag);
+
+ length += fprintf (TagFile.fp, "\n");
+
+ return length;
+}
+
+extern void makeTagEntry (const tagEntryInfo *const tag)
+{
+ Assert (tag->name != NULL);
+ if (tag->name [0] == '\0')
+ error (WARNING, "ignoring null tag in %s", vStringValue (File.name));
+ else
+ {
+ int length = 0;
+
+ DebugStatement ( debugEntry (tag); )
+ if (Option.xref)
+ {
+ if (! tag->isFileEntry)
+ length = writeXrefEntry (tag);
+ }
+ else if (Option.etags)
+ length = writeEtagsEntry (tag);
+ else
+ length = writeCtagsEntry (tag);
+
+ ++TagFile.numTags.added;
+ rememberMaxLengths (strlen (tag->name), (size_t) length);
+ DebugStatement ( fflush (TagFile.fp); )
+ }
+}
+
+extern void initTagEntry (tagEntryInfo *const e, const char *const name)
+{
+ Assert (File.source.name != NULL);
+ memset (e, 0, sizeof (tagEntryInfo));
+ e->lineNumberEntry = (boolean) (Option.locate == EX_LINENUM);
+ e->lineNumber = getSourceLineNumber ();
+ e->language = getSourceLanguageName ();
+ e->filePosition = getInputFilePosition ();
+ e->sourceFileName = getSourceFileTagPath ();
+ e->name = name;
+}
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/entry.h b/entry.h
new file mode 100644
index 0000000..2365c50
--- /dev/null
+++ b/entry.h
@@ -0,0 +1,103 @@
+/*
+* $Id: entry.h 443 2006-05-30 04:37:13Z darren $
+*
+* Copyright (c) 1998-2002, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* External interface to entry.c
+*/
+#ifndef _ENTRY_H
+#define _ENTRY_H
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include <stdio.h>
+
+#include "vstring.h"
+
+/*
+* MACROS
+*/
+#define WHOLE_FILE -1L
+
+/*
+* DATA DECLARATIONS
+*/
+
+/* Maintains the state of the tag file.
+ */
+typedef struct eTagFile {
+ char *name;
+ char *directory;
+ FILE *fp;
+ struct sNumTags { unsigned long added, prev; } numTags;
+ struct sMax { size_t line, tag, file; } max;
+ struct sEtags {
+ char *name;
+ FILE *fp;
+ size_t byteCount;
+ } etags;
+ vString *vLine;
+} tagFile;
+
+typedef struct sTagFields {
+ unsigned int count; /* number of additional extension flags */
+ const char *const *label; /* list of labels for extension flags */
+ const char *const *value; /* list of values for extension flags */
+} tagFields;
+
+/* Information about the current tag candidate.
+ */
+typedef struct sTagEntryInfo {
+ boolean lineNumberEntry; /* pattern or line number entry */
+ unsigned long lineNumber; /* line number of tag */
+ fpos_t filePosition; /* file position of line containing tag */
+ const char* language; /* language of source file */
+ boolean isFileScope; /* is tag visibile only within source file? */
+ boolean isFileEntry; /* is this just an entry for a file name? */
+ boolean truncateLine; /* truncate tag line at end of tag name? */
+ const char *sourceFileName; /* name of source file */
+ const char *name; /* name of the tag */
+ const char *kindName; /* kind of tag */
+ char kind; /* single character representation of kind */
+ struct {
+ const char* access;
+ const char* fileScope;
+ const char* implementation;
+ const char* inheritance;
+ const char* scope [2]; /* value and key */
+ const char* signature;
+
+ /* type (union/struct/etc.) and name for a variable or typedef. */
+ const char* typeRef [2]; /* e.g., "struct" and struct name */
+
+ } extensionFields; /* list of extension fields*/
+} tagEntryInfo;
+
+/*
+* GLOBAL VARIABLES
+*/
+extern tagFile TagFile;
+
+/*
+* FUNCTION PROTOTYPES
+*/
+extern void freeTagFileResources (void);
+extern const char *tagFileName (void);
+extern void copyBytes (FILE* const fromFp, FILE* const toFp, const long size);
+extern void copyFile (const char *const from, const char *const to, const long size);
+extern void openTagFile (void);
+extern void closeTagFile (const boolean resize);
+extern void beginEtagsFile (void);
+extern void endEtagsFile (const char *const name);
+extern void makeTagEntry (const tagEntryInfo *const tag);
+extern void initTagEntry (tagEntryInfo *const e, const char *const name);
+
+#endif /* _ENTRY_H */
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/erlang.c b/erlang.c
new file mode 100644
index 0000000..23469aa
--- /dev/null
+++ b/erlang.c
@@ -0,0 +1,189 @@
+/*
+* $Id: erlang.c 443 2006-05-30 04:37:13Z darren $
+*
+* Copyright (c) 2003, Brent Fulgham <bfulgham@debian.org>
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* This module contains functions for generating tags for Erlang language
+* files. Some of the parsing constructs are based on the Emacs 'etags'
+* program by Francesco Potori <pot@gnu.org>
+*/
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include <string.h>
+
+#include "entry.h"
+#include "options.h"
+#include "read.h"
+#include "routines.h"
+#include "vstring.h"
+
+/*
+* DATA DEFINITIONS
+*/
+typedef enum {
+ K_MACRO, K_FUNCTION, K_MODULE, K_RECORD
+} erlangKind;
+
+static kindOption ErlangKinds[] = {
+ {TRUE, 'd', "macro", "macro definitions"},
+ {TRUE, 'f', "function", "functions"},
+ {TRUE, 'm', "module", "modules"},
+ {TRUE, 'r', "record", "record definitions"},
+};
+
+/*
+* FUNCTION DEFINITIONS
+*/
+/* tagEntryInfo and vString should be preinitialized/preallocated but not
+ * necessary. If successful you will find class name in vString
+ */
+
+static boolean isIdentifierFirstCharacter (int c)
+{
+ return (boolean) (isalpha (c));
+}
+
+static boolean isIdentifierCharacter (int c)
+{
+ return (boolean) (isalnum (c) || c == '_' || c == ':');
+}
+
+static const unsigned char *skipSpace (const unsigned char *cp)
+{
+ while (isspace ((int) *cp))
+ ++cp;
+ return cp;
+}
+
+static const unsigned char *parseIdentifier (
+ const unsigned char *cp, vString *const identifier)
+{
+ vStringClear (identifier);
+ while (isIdentifierCharacter ((int) *cp))
+ {
+ vStringPut (identifier, (int) *cp);
+ ++cp;
+ }
+ vStringTerminate (identifier);
+ return cp;
+}
+
+static void makeMemberTag (
+ vString *const identifier, erlangKind kind, vString *const module)
+{
+ if (ErlangKinds [kind].enabled && vStringLength (identifier) > 0)
+ {
+ tagEntryInfo tag;
+ initTagEntry (&tag, vStringValue (identifier));
+ tag.kindName = ErlangKinds[kind].name;
+ tag.kind = ErlangKinds[kind].letter;
+
+ if (module != NULL && vStringLength (module) > 0)
+ {
+ tag.extensionFields.scope [0] = "module";
+ tag.extensionFields.scope [1] = vStringValue (module);
+ }
+ makeTagEntry (&tag);
+ }
+}
+
+static void parseModuleTag (const unsigned char *cp, vString *const module)
+{
+ vString *const identifier = vStringNew ();
+ parseIdentifier (cp, identifier);
+ makeSimpleTag (identifier, ErlangKinds, K_MODULE);
+
+ /* All further entries go in the new module */
+ vStringCopy (module, identifier);
+ vStringDelete (identifier);
+}
+
+static void parseSimpleTag (const unsigned char *cp, erlangKind kind)
+{
+ vString *const identifier = vStringNew ();
+ parseIdentifier (cp, identifier);
+ makeSimpleTag (identifier, ErlangKinds, kind);
+ vStringDelete (identifier);
+}
+
+static void parseFunctionTag (const unsigned char *cp, vString *const module)
+{
+ vString *const identifier = vStringNew ();
+ parseIdentifier (cp, identifier);
+ makeMemberTag (identifier, K_FUNCTION, module);
+ vStringDelete (identifier);
+}
+
+/*
+ * Directives are of the form:
+ * -module(foo)
+ * -define(foo, bar)
+ * -record(graph, {vtab = notable, cyclic = true}).
+ */
+static void parseDirective (const unsigned char *cp, vString *const module)
+{
+ /*
+ * A directive will be either a record definition or a directive.
+ * Record definitions are handled separately
+ */
+ vString *const directive = vStringNew ();
+ const char *const drtv = vStringValue (directive);
+ cp = parseIdentifier (cp, directive);
+ cp = skipSpace (cp);
+ if (*cp == '(')
+ ++cp;
+
+ if (strcmp (drtv, "record") == 0)
+ parseSimpleTag (cp, K_RECORD);
+ else if (strcmp (drtv, "define") == 0)
+ parseSimpleTag (cp, K_MACRO);
+ else if (strcmp (drtv, "module") == 0)
+ parseModuleTag (cp, module);
+ /* Otherwise, it was an import, export, etc. */
+
+ vStringDelete (directive);
+}
+
+static void findErlangTags (void)
+{
+ vString *const module = vStringNew ();
+ const unsigned char *line;
+
+ while ((line = fileReadLine ()) != NULL)
+ {
+ const unsigned char *cp = line;
+
+ if (*cp == '%') /* skip initial comment */
+ continue;
+ if (*cp == '"') /* strings sometimes start in column one */
+ continue;
+
+ if ( *cp == '-')
+ {
+ ++cp; /* Move off of the '-' */
+ parseDirective(cp, module);
+ }
+ else if (isIdentifierFirstCharacter ((int) *cp))
+ parseFunctionTag (cp, module);
+ }
+ vStringDelete (module);
+}
+
+extern parserDefinition *ErlangParser (void)
+{
+ static const char *const extensions[] = { "erl", "ERL", "hrl", "HRL", NULL };
+ parserDefinition *def = parserNew ("Erlang");
+ def->kinds = ErlangKinds;
+ def->kindCount = KIND_COUNT (ErlangKinds);
+ def->extensions = extensions;
+ def->parser = findErlangTags;
+ return def;
+}
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/flex.c b/flex.c
new file mode 100644
index 0000000..06ca243
--- /dev/null
+++ b/flex.c
@@ -0,0 +1,2243 @@
+/*
+ * $Id: flex.c 666 2008-05-15 17:47:31Z dfishburn $
+ *
+ * Copyright (c) 2008, David Fishburn
+ *
+ * This source code is released for free distribution under the terms of the
+ * GNU General Public License.
+ *
+ * This module contains functions for generating tags for Adobe languages.
+ * There are a number of different ones, but this will begin with:
+ * Flex
+ * MXML files (*.mMacromedia XML)
+ * ActionScript files (*.as)
+ *
+ * Flex 3 language reference
+ * http://livedocs.adobe.com/flex/3/langref/index.html
+ */
+
+/*
+ * INCLUDE FILES
+ */
+#include "general.h" /* must always come first */
+#include <ctype.h> /* to define isalpha () */
+#include <setjmp.h>
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+#include "debug.h"
+#include "entry.h"
+#include "keyword.h"
+#include "parse.h"
+#include "read.h"
+#include "routines.h"
+#include "vstring.h"
+
+/*
+ * MACROS
+ */
+#define isType(token,t) (boolean) ((token)->type == (t))
+#define isKeyword(token,k) (boolean) ((token)->keyword == (k))
+
+/*
+ * DATA DECLARATIONS
+ */
+
+typedef enum eException { ExceptionNone, ExceptionEOF } exception_t;
+
+/*
+ * Tracks class and function names already created
+ */
+static stringList *ClassNames;
+static stringList *FunctionNames;
+
+/* Used to specify type of keyword.
+*/
+typedef enum eKeywordId {
+ KEYWORD_NONE = -1,
+ KEYWORD_function,
+ KEYWORD_capital_function,
+ KEYWORD_object,
+ KEYWORD_capital_object,
+ KEYWORD_prototype,
+ KEYWORD_var,
+ KEYWORD_new,
+ KEYWORD_this,
+ KEYWORD_for,
+ KEYWORD_while,
+ KEYWORD_do,
+ KEYWORD_if,
+ KEYWORD_else,
+ KEYWORD_switch,
+ KEYWORD_try,
+ KEYWORD_catch,
+ KEYWORD_finally,
+ KEYWORD_public,
+ KEYWORD_private,
+ KEYWORD_static,
+ KEYWORD_class,
+ KEYWORD_id,
+ KEYWORD_script,
+ KEYWORD_cdata,
+ KEYWORD_mx
+} keywordId;
+
+/* Used to determine whether keyword is valid for the token language and
+ * what its ID is.
+ */
+typedef struct sKeywordDesc {
+ const char *name;
+ keywordId id;
+} keywordDesc;
+
+typedef enum eTokenType {
+ TOKEN_UNDEFINED,
+ TOKEN_CHARACTER,
+ TOKEN_CLOSE_PAREN,
+ TOKEN_SEMICOLON,
+ TOKEN_COLON,
+ TOKEN_COMMA,
+ TOKEN_KEYWORD,
+ TOKEN_OPEN_PAREN,
+ TOKEN_OPERATOR,
+ TOKEN_IDENTIFIER,
+ TOKEN_STRING,
+ TOKEN_PERIOD,
+ TOKEN_OPEN_CURLY,
+ TOKEN_CLOSE_CURLY,
+ TOKEN_EQUAL_SIGN,
+ TOKEN_EXCLAMATION,
+ TOKEN_FORWARD_SLASH,
+ TOKEN_OPEN_SQUARE,
+ TOKEN_CLOSE_SQUARE,
+ TOKEN_OPEN_MXML,
+ TOKEN_CLOSE_MXML,
+ TOKEN_CLOSE_SGML,
+ TOKEN_LESS_THAN,
+ TOKEN_GREATER_THAN,
+ TOKEN_QUESTION_MARK
+} tokenType;
+
+typedef struct sTokenInfo {
+ tokenType type;
+ keywordId keyword;
+ vString * string;
+ vString * scope;
+ unsigned long lineNumber;
+ fpos_t filePosition;
+ int nestLevel;
+ boolean ignoreTag;
+ boolean isClass;
+} tokenInfo;
+
+/*
+ * DATA DEFINITIONS
+ */
+
+static langType Lang_js;
+
+static jmp_buf Exception;
+
+typedef enum {
+ FLEXTAG_FUNCTION,
+ FLEXTAG_CLASS,
+ FLEXTAG_METHOD,
+ FLEXTAG_PROPERTY,
+ FLEXTAG_VARIABLE,
+ FLEXTAG_MXTAG,
+ FLEXTAG_COUNT
+} flexKind;
+
+static kindOption FlexKinds [] = {
+ { TRUE, 'f', "function", "functions" },
+ { TRUE, 'c', "class", "classes" },
+ { TRUE, 'm', "method", "methods" },
+ { TRUE, 'p', "property", "properties" },
+ { TRUE, 'v', "variable", "global variables" },
+ { TRUE, 'x', "mxtag", "mxtags" }
+};
+
+static const keywordDesc FlexKeywordTable [] = {
+ /* keyword keyword ID */
+ { "function", KEYWORD_function },
+ { "Function", KEYWORD_capital_function },
+ { "object", KEYWORD_object },
+ { "Object", KEYWORD_capital_object },
+ { "prototype", KEYWORD_prototype },
+ { "var", KEYWORD_var },
+ { "new", KEYWORD_new },
+ { "this", KEYWORD_this },
+ { "for", KEYWORD_for },
+ { "while", KEYWORD_while },
+ { "do", KEYWORD_do },
+ { "if", KEYWORD_if },
+ { "else", KEYWORD_else },
+ { "switch", KEYWORD_switch },
+ { "try", KEYWORD_try },
+ { "catch", KEYWORD_catch },
+ { "finally", KEYWORD_finally },
+ { "public", KEYWORD_public },
+ { "private", KEYWORD_private },
+ { "static", KEYWORD_static },
+ { "class", KEYWORD_class },
+ { "id", KEYWORD_id },
+ { "script", KEYWORD_script },
+ { "cdata", KEYWORD_cdata },
+ { "mx", KEYWORD_mx }
+};
+
+/*
+ * FUNCTION DEFINITIONS
+ */
+
+/* Recursive functions */
+static void parseFunction (tokenInfo *const token);
+static boolean parseBlock (tokenInfo *const token, tokenInfo *const parent);
+static boolean parseLine (tokenInfo *const token);
+static boolean parseActionScript (tokenInfo *const token);
+
+static boolean isIdentChar (const int c)
+{
+ return (boolean)
+ (isalpha (c) || isdigit (c) || c == '$' ||
+ c == '@' || c == '_' || c == '#');
+}
+
+static void buildFlexKeywordHash (void)
+{
+ const size_t count = sizeof (FlexKeywordTable) /
+ sizeof (FlexKeywordTable [0]);
+ size_t i;
+ for (i = 0 ; i < count ; ++i)
+ {
+ const keywordDesc* const p = &FlexKeywordTable [i];
+ addKeyword (p->name, Lang_js, (int) p->id);
+ }
+}
+
+static tokenInfo *newToken (void)
+{
+ tokenInfo *const token = xMalloc (1, tokenInfo);
+
+ token->type = TOKEN_UNDEFINED;
+ token->keyword = KEYWORD_NONE;
+ token->string = vStringNew ();
+ token->scope = vStringNew ();
+ token->nestLevel = 0;
+ token->isClass = FALSE;
+ token->ignoreTag = FALSE;
+ token->lineNumber = getSourceLineNumber ();
+ token->filePosition = getInputFilePosition ();
+
+ return token;
+}
+
+static void deleteToken (tokenInfo *const token)
+{
+ vStringDelete (token->string);
+ vStringDelete (token->scope);
+ eFree (token);
+}
+
+/*
+ * Tag generation functions
+ */
+
+static void makeConstTag (tokenInfo *const token, const flexKind kind)
+{
+ if (FlexKinds [kind].enabled && ! token->ignoreTag )
+ {
+ const char *const name = vStringValue (token->string);
+ tagEntryInfo e;
+ initTagEntry (&e, name);
+
+ e.lineNumber = token->lineNumber;
+ e.filePosition = token->filePosition;
+ e.kindName = FlexKinds [kind].name;
+ e.kind = FlexKinds [kind].letter;
+
+ makeTagEntry (&e);
+ }
+}
+
+static void makeFlexTag (tokenInfo *const token, flexKind kind)
+{
+ vString * fulltag;
+
+ if (FlexKinds [kind].enabled && ! token->ignoreTag )
+ {
+ DebugStatement (
+ debugPrintf (DEBUG_PARSE
+ , "\n makeFlexTag start: token isClass:%d scope:%s name:%s\n"
+ , token->isClass
+ , vStringValue(token->scope)
+ , vStringValue(token->string)
+ );
+ );
+ if (kind == FLEXTAG_FUNCTION && token->isClass )
+ {
+ kind = FLEXTAG_METHOD;
+ }
+ /*
+ * If a scope has been added to the token, change the token
+ * string to include the scope when making the tag.
+ */
+ if ( vStringLength(token->scope) > 0 )
+ {
+ fulltag = vStringNew ();
+ vStringCopy(fulltag, token->scope);
+ vStringCatS (fulltag, ".");
+ vStringCatS (fulltag, vStringValue(token->string));
+ vStringTerminate(fulltag);
+ vStringCopy(token->string, fulltag);
+ vStringDelete (fulltag);
+ }
+ makeConstTag (token, kind);
+ }
+}
+
+static void makeClassTag (tokenInfo *const token)
+{
+ vString * fulltag;
+
+ if ( ! token->ignoreTag )
+ {
+ fulltag = vStringNew ();
+ if (vStringLength (token->scope) > 0)
+ {
+ vStringCopy(fulltag, token->scope);
+ vStringCatS (fulltag, ".");
+ vStringCatS (fulltag, vStringValue(token->string));
+ }
+ else
+ {
+ vStringCopy(fulltag, token->string);
+ }
+ vStringTerminate(fulltag);
+ if ( ! stringListHas(ClassNames, vStringValue (fulltag)) )
+ {
+ stringListAdd (ClassNames, vStringNewCopy (fulltag));
+ makeFlexTag (token, FLEXTAG_CLASS);
+ }
+ vStringDelete (fulltag);
+ }
+}
+
+static void makeMXTag (tokenInfo *const token)
+{
+ vString * fulltag;
+
+ if ( ! token->ignoreTag )
+ {
+ fulltag = vStringNew ();
+ if (vStringLength (token->scope) > 0)
+ {
+ vStringCopy(fulltag, token->scope);
+ vStringCatS (fulltag, ".");
+ vStringCatS (fulltag, vStringValue(token->string));
+ }
+ else
+ {
+ vStringCopy(fulltag, token->string);
+ }
+ vStringTerminate(fulltag);
+ makeFlexTag (token, FLEXTAG_MXTAG);
+ vStringDelete (fulltag);
+ }
+}
+
+static void makeFunctionTag (tokenInfo *const token)
+{
+ vString * fulltag;
+
+ if ( ! token->ignoreTag )
+ {
+ fulltag = vStringNew ();
+ if (vStringLength (token->scope) > 0)
+ {
+ vStringCopy(fulltag, token->scope);
+ vStringCatS (fulltag, ".");
+ vStringCatS (fulltag, vStringValue(token->string));
+ }
+ else
+ {
+ vStringCopy(fulltag, token->string);
+ }
+ vStringTerminate(fulltag);
+ if ( ! stringListHas(FunctionNames, vStringValue (fulltag)) )
+ {
+ stringListAdd (FunctionNames, vStringNewCopy (fulltag));
+ makeFlexTag (token, FLEXTAG_FUNCTION);
+ }
+ vStringDelete (fulltag);
+ }
+}
+
+/*
+ * Parsing functions
+ */
+
+static void parseString (vString *const string, const int delimiter)
+{
+ boolean end = FALSE;
+ while (! end)
+ {
+ int c = fileGetc ();
+ if (c == EOF)
+ end = TRUE;
+ else if (c == '\\')
+ {
+ c = fileGetc(); /* This maybe a ' or ". */
+ vStringPut(string, c);
+ }
+ else if (c == delimiter)
+ end = TRUE;
+ else
+ vStringPut (string, c);
+ }
+ vStringTerminate (string);
+}
+
+/* Read a C identifier beginning with "firstChar" and places it into
+ * "name".
+ */
+static void parseIdentifier (vString *const string, const int firstChar)
+{
+ int c = firstChar;
+ Assert (isIdentChar (c));
+ do
+ {
+ vStringPut (string, c);
+ c = fileGetc ();
+ } while (isIdentChar (c));
+ vStringTerminate (string);
+ if (!isspace (c))
+ fileUngetc (c); /* unget non-identifier character */
+}
+
+static void readToken (tokenInfo *const token)
+{
+ int c;
+
+ token->type = TOKEN_UNDEFINED;
+ token->keyword = KEYWORD_NONE;
+ vStringClear (token->string);
+
+getNextChar:
+ do
+ {
+ c = fileGetc ();
+ token->lineNumber = getSourceLineNumber ();
+ token->filePosition = getInputFilePosition ();
+ }
+ while (c == '\t' || c == ' ' || c == '\n');
+
+ switch (c)
+ {
+ case EOF: longjmp (Exception, (int)ExceptionEOF); break;
+ case '(': token->type = TOKEN_OPEN_PAREN; break;
+ case ')': token->type = TOKEN_CLOSE_PAREN; break;
+ case ';': token->type = TOKEN_SEMICOLON; break;
+ case ',': token->type = TOKEN_COMMA; break;
+ case '.': token->type = TOKEN_PERIOD; break;
+ case ':': token->type = TOKEN_COLON; break;
+ case '{': token->type = TOKEN_OPEN_CURLY; break;
+ case '}': token->type = TOKEN_CLOSE_CURLY; break;
+ case '=': token->type = TOKEN_EQUAL_SIGN; break;
+ case '[': token->type = TOKEN_OPEN_SQUARE; break;
+ case ']': token->type = TOKEN_CLOSE_SQUARE; break;
+ case '?': token->type = TOKEN_QUESTION_MARK; break;
+
+ case '\'':
+ case '"':
+ token->type = TOKEN_STRING;
+ parseString (token->string, c);
+ token->lineNumber = getSourceLineNumber ();
+ token->filePosition = getInputFilePosition ();
+ break;
+
+ case '\\':
+ c = fileGetc ();
+ if (c != '\\' && c != '"' && !isspace (c))
+ fileUngetc (c);
+ token->type = TOKEN_CHARACTER;
+ token->lineNumber = getSourceLineNumber ();
+ token->filePosition = getInputFilePosition ();
+ break;
+
+ case '/':
+ {
+ int d = fileGetc ();
+ if ( (d != '*') && /* is this the start of a comment? */
+ (d != '/') && /* is a one line comment? */
+ (d != '>') ) /* is this a close XML tag? */
+ {
+ fileUngetc (d);
+ token->type = TOKEN_FORWARD_SLASH;
+ token->lineNumber = getSourceLineNumber ();
+ token->filePosition = getInputFilePosition ();
+ }
+ else
+ {
+ if (d == '*')
+ {
+ do
+ {
+ fileSkipToCharacter ('*');
+ c = fileGetc ();
+ if (c == '/')
+ break;
+ else
+ fileUngetc (c);
+ } while (c != EOF && c != '\0');
+ goto getNextChar;
+ }
+ else if (d == '/') /* is this the start of a comment? */
+ {
+ fileSkipToCharacter ('\n');
+ goto getNextChar;
+ }
+ else if (d == '>') /* is this the start of a comment? */
+ {
+ token->type = TOKEN_CLOSE_SGML;
+ token->lineNumber = getSourceLineNumber ();
+ token->filePosition = getInputFilePosition ();
+ }
+ }
+ break;
+ }
+
+ case '<':
+ {
+ /*
+ * An XML comment looks like this
+ * <!-- anything over multiple lines -->
+ */
+ int d = fileGetc ();
+
+ if ( (d != '!' ) && /* is this the start of a comment? */
+ (d != '/' ) && /* is this the start of a closing mx tag */
+ (d != 'm' ) ) /* is this the start of a mx tag */
+ {
+ fileUngetc (d);
+ token->type = TOKEN_LESS_THAN;
+ token->lineNumber = getSourceLineNumber ();
+ token->filePosition = getInputFilePosition ();
+
+ }
+ else
+ {
+ if (d == '!')
+ {
+ int e = fileGetc ();
+ if ( e != '-' ) /* is this the start of a comment? */
+ {
+ fileUngetc (e);
+ fileUngetc (d);
+ token->type = TOKEN_LESS_THAN;
+ token->lineNumber = getSourceLineNumber ();
+ token->filePosition = getInputFilePosition ();
+ }
+ else
+ {
+ if (e == '-')
+ {
+ int f = fileGetc ();
+ if ( f != '-' ) /* is this the start of a comment? */
+ {
+ fileUngetc (f);
+ fileUngetc (e);
+ fileUngetc (d);
+ token->type = TOKEN_LESS_THAN;
+ token->lineNumber = getSourceLineNumber ();
+ token->filePosition = getInputFilePosition ();
+ }
+ else
+ {
+ if (f == '-')
+ {
+ do
+ {
+ fileSkipToCharacter ('-');
+ c = fileGetc ();
+ if (c == '-')
+ {
+ d = fileGetc ();
+ if (d == '>')
+ break;
+ else
+ {
+ fileUngetc (d);
+ fileUngetc (c);
+ }
+ break;
+ }
+ else
+ fileUngetc (c);
+ } while (c != EOF && c != '\0');
+ goto getNextChar;
+ }
+ }
+ }
+ }
+ }
+ else if (d == 'm')
+ {
+ int e = fileGetc ();
+ if ( e != 'x' ) /* continuing an mx tag */
+ {
+ fileUngetc (e);
+ fileUngetc (d);
+ token->type = TOKEN_LESS_THAN;
+ token->lineNumber = getSourceLineNumber ();
+ token->filePosition = getInputFilePosition ();
+ }
+ else
+ {
+ if (e == 'x')
+ {
+ int f = fileGetc ();
+ if ( f != ':' ) /* is this the start of a comment? */
+ {
+ fileUngetc (f);
+ fileUngetc (e);
+ fileUngetc (d);
+ token->type = TOKEN_LESS_THAN;
+ token->lineNumber = getSourceLineNumber ();
+ token->filePosition = getInputFilePosition ();
+ }
+ else
+ {
+ if (f == ':')
+ {
+ token->type = TOKEN_OPEN_MXML;
+ token->lineNumber = getSourceLineNumber ();
+ token->filePosition = getInputFilePosition ();
+ }
+ }
+ }
+ }
+ }
+ else if (d == '/')
+ {
+ int e = fileGetc ();
+ if ( e != 'm' ) /* continuing an mx tag */
+ {
+ fileUngetc (e);
+ fileUngetc (d);
+ token->type = TOKEN_LESS_THAN;
+ token->lineNumber = getSourceLineNumber ();
+ token->filePosition = getInputFilePosition ();
+ }
+ else
+ {
+ int f = fileGetc ();
+ if ( f != 'x' ) /* continuing an mx tag */
+ {
+ fileUngetc (f);
+ fileUngetc (e);
+ token->type = TOKEN_LESS_THAN;
+ token->lineNumber = getSourceLineNumber ();
+ token->filePosition = getInputFilePosition ();
+ }
+ else
+ {
+ if (f == 'x')
+ {
+ int g = fileGetc ();
+ if ( g != ':' ) /* is this the start of a comment? */
+ {
+ fileUngetc (g);
+ fileUngetc (f);
+ fileUngetc (e);
+ token->type = TOKEN_LESS_THAN;
+ token->lineNumber = getSourceLineNumber ();
+ token->filePosition = getInputFilePosition ();
+ }
+ else
+ {
+ if (g == ':')
+ {
+ token->type = TOKEN_CLOSE_MXML;
+ token->lineNumber = getSourceLineNumber ();
+ token->filePosition = getInputFilePosition ();
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ break;
+ }
+
+ case '>':
+ token->type = TOKEN_GREATER_THAN;
+ token->lineNumber = getSourceLineNumber ();
+ token->filePosition = getInputFilePosition ();
+ break;
+
+ case '!':
+ token->type = TOKEN_EXCLAMATION;
+ /*token->lineNumber = getSourceLineNumber ();
+ token->filePosition = getInputFilePosition ();*/
+ break;
+
+ default:
+ if (! isIdentChar (c))
+ token->type = TOKEN_UNDEFINED;
+ else
+ {
+ parseIdentifier (token->string, c);
+ token->lineNumber = getSourceLineNumber ();
+ token->filePosition = getInputFilePosition ();
+ token->keyword = analyzeToken (token->string, Lang_js);
+ if (isKeyword (token, KEYWORD_NONE))
+ token->type = TOKEN_IDENTIFIER;
+ else
+ token->type = TOKEN_KEYWORD;
+ }
+ break;
+ }
+}
+
+static void copyToken (tokenInfo *const dest, tokenInfo *const src)
+{
+ dest->nestLevel = src->nestLevel;
+ dest->lineNumber = src->lineNumber;
+ dest->filePosition = src->filePosition;
+ dest->type = src->type;
+ dest->keyword = src->keyword;
+ dest->isClass = src->isClass;
+ vStringCopy(dest->string, src->string);
+ vStringCopy(dest->scope, src->scope);
+}
+
+/*
+ * Token parsing functions
+ */
+
+static void skipArgumentList (tokenInfo *const token)
+{
+ int nest_level = 0;
+
+ /*
+ * Other databases can have arguments with fully declared
+ * datatypes:
+ * ( name varchar(30), text binary(10) )
+ * So we must check for nested open and closing parantheses
+ */
+
+ if (isType (token, TOKEN_OPEN_PAREN)) /* arguments? */
+ {
+ nest_level++;
+ while (! (isType (token, TOKEN_CLOSE_PAREN) && (nest_level == 0)))
+ {
+ readToken (token);
+ if (isType (token, TOKEN_OPEN_PAREN))
+ {
+ nest_level++;
+ }
+ if (isType (token, TOKEN_CLOSE_PAREN))
+ {
+ if (nest_level > 0)
+ {
+ nest_level--;
+ }
+ }
+ }
+ readToken (token);
+ }
+}
+
+static void skipArrayList (tokenInfo *const token)
+{
+ int nest_level = 0;
+
+ /*
+ * Handle square brackets
+ * var name[1]
+ * So we must check for nested open and closing square brackets
+ */
+
+ if (isType (token, TOKEN_OPEN_SQUARE)) /* arguments? */
+ {
+ nest_level++;
+ while (! (isType (token, TOKEN_CLOSE_SQUARE) && (nest_level == 0)))
+ {
+ readToken (token);
+ if (isType (token, TOKEN_OPEN_SQUARE))
+ {
+ nest_level++;
+ }
+ if (isType (token, TOKEN_CLOSE_SQUARE))
+ {
+ if (nest_level > 0)
+ {
+ nest_level--;
+ }
+ }
+ }
+ readToken (token);
+ }
+}
+
+static void addContext (tokenInfo* const parent, const tokenInfo* const child)
+{
+ if (vStringLength (parent->string) > 0)
+ {
+ vStringCatS (parent->string, ".");
+ }
+ vStringCatS (parent->string, vStringValue(child->string));
+ vStringTerminate(parent->string);
+}
+
+static void addToScope (tokenInfo* const token, vString* const extra)
+{
+ if (vStringLength (token->scope) > 0)
+ {
+ vStringCatS (token->scope, ".");
+ }
+ vStringCatS (token->scope, vStringValue(extra));
+ vStringTerminate(token->scope);
+}
+
+/*
+ * Scanning functions
+ */
+
+static void findCmdTerm (tokenInfo *const token)
+{
+ /*
+ * Read until we find either a semicolon or closing brace.
+ * Any nested braces will be handled within.
+ */
+ while (! ( isType (token, TOKEN_SEMICOLON) ||
+ isType (token, TOKEN_CLOSE_CURLY) ) )
+ {
+ /* Handle nested blocks */
+ if ( isType (token, TOKEN_OPEN_CURLY))
+ {
+ parseBlock (token, token);
+ }
+ else if ( isType (token, TOKEN_OPEN_PAREN) )
+ {
+ skipArgumentList(token);
+ }
+ else
+ {
+ readToken (token);
+ }
+ }
+}
+
+static void parseSwitch (tokenInfo *const token)
+{
+ /*
+ * switch (expression){
+ * case value1:
+ * statement;
+ * break;
+ * case value2:
+ * statement;
+ * break;
+ * default : statement;
+ * }
+ */
+
+ readToken (token);
+
+ if (isType (token, TOKEN_OPEN_PAREN))
+ {
+ skipArgumentList(token);
+ }
+
+ if (isType (token, TOKEN_OPEN_CURLY))
+ {
+ do
+ {
+ readToken (token);
+ } while (! (isType (token, TOKEN_CLOSE_SGML) ||
+ isType (token, TOKEN_CLOSE_MXML) ||
+ isType (token, TOKEN_CLOSE_CURLY) ||
+ isType (token, TOKEN_GREATER_THAN)) );
+ }
+
+}
+
+static void parseLoop (tokenInfo *const token)
+{
+ /*
+ * Handles these statements
+ * for (x=0; x<3; x++)
+ * document.write("This text is repeated three times<br>");
+ *
+ * for (x=0; x<3; x++)
+ * {
+ * document.write("This text is repeated three times<br>");
+ * }
+ *
+ * while (number<5){
+ * document.write(number+"<br>");
+ * number++;
+ * }
+ *
+ * do{
+ * document.write(number+"<br>");
+ * number++;
+ * }
+ * while (number<5);
+ */
+
+ if (isKeyword (token, KEYWORD_for) || isKeyword (token, KEYWORD_while))
+ {
+ readToken(token);
+
+ if (isType (token, TOKEN_OPEN_PAREN))
+ {
+ /*
+ * Handle nameless functions, these will only
+ * be considered methods.
+ */
+ skipArgumentList(token);
+ }
+
+ if (isType (token, TOKEN_OPEN_CURLY))
+ {
+ /*
+ * This will be either a function or a class.
+ * We can only determine this by checking the body
+ * of the function. If we find a "this." we know
+ * it is a class, otherwise it is a function.
+ */
+ parseBlock (token, token);
+ }
+ else
+ {
+ parseLine(token);
+ }
+ }
+ else if (isKeyword (token, KEYWORD_do))
+ {
+ readToken(token);
+
+ if (isType (token, TOKEN_OPEN_CURLY))
+ {
+ /*
+ * This will be either a function or a class.
+ * We can only determine this by checking the body
+ * of the function. If we find a "this." we know
+ * it is a class, otherwise it is a function.
+ */
+ parseBlock (token, token);
+ }
+ else
+ {
+ parseLine(token);
+ }
+
+ readToken(token);
+
+ if (isKeyword (token, KEYWORD_while))
+ {
+ readToken(token);
+
+ if (isType (token, TOKEN_OPEN_PAREN))
+ {
+ /*
+ * Handle nameless functions, these will only
+ * be considered methods.
+ */
+ skipArgumentList(token);
+ }
+ }
+ }
+}
+
+static boolean parseIf (tokenInfo *const token)
+{
+ boolean read_next_token = TRUE;
+ /*
+ * If statements have two forms
+ * if ( ... )
+ * one line;
+ *
+ * if ( ... )
+ * statement;
+ * else
+ * statement
+ *
+ * if ( ... ) {
+ * multiple;
+ * statements;
+ * }
+ *
+ *
+ * if ( ... ) {
+ * return elem
+ * }
+ *
+ * This example if correctly written, but the
+ * else contains only 1 statement without a terminator
+ * since the function finishes with the closing brace.
+ *
+ * function a(flag){
+ * if(flag)
+ * test(1);
+ * else
+ * test(2)
+ * }
+ *
+ * TODO: Deal with statements that can optional end
+ * without a semi-colon. Currently this messes up
+ * the parsing of blocks.
+ * Need to somehow detect this has happened, and either
+ * backup a token, or skip reading the next token if
+ * that is possible from all code locations.
+ *
+ */
+
+ readToken (token);
+
+ if (isKeyword (token, KEYWORD_if))
+ {
+ /*
+ * Check for an "else if" and consume the "if"
+ */
+ readToken (token);
+ }
+
+ if (isType (token, TOKEN_OPEN_PAREN))
+ {
+ /*
+ * Handle nameless functions, these will only
+ * be considered methods.
+ */
+ skipArgumentList(token);
+ }
+
+ if (isType (token, TOKEN_OPEN_CURLY))
+ {
+ /*
+ * This will be either a function or a class.
+ * We can only determine this by checking the body
+ * of the function. If we find a "this." we know
+ * it is a class, otherwise it is a function.
+ */
+ parseBlock (token, token);
+ }
+ else
+ {
+ findCmdTerm (token);
+
+ /*
+ * The IF could be followed by an ELSE statement.
+ * This too could have two formats, a curly braced
+ * multiline section, or another single line.
+ */
+
+ if (isType (token, TOKEN_CLOSE_CURLY))
+ {
+ /*
+ * This statement did not have a line terminator.
+ */
+ read_next_token = FALSE;
+ }
+ else
+ {
+ readToken (token);
+
+ if (isType (token, TOKEN_CLOSE_CURLY))
+ {
+ /*
+ * This statement did not have a line terminator.
+ */
+ read_next_token = FALSE;
+ }
+ else
+ {
+ if (isKeyword (token, KEYWORD_else))
+ read_next_token = parseIf (token);
+ }
+ }
+ }
+ return read_next_token;
+}
+
+static void parseFunction (tokenInfo *const token)
+{
+ tokenInfo *const name = newToken ();
+
+ /*
+ * This deals with these formats
+ * private static function ioErrorHandler( event:IOErrorEvent ):void {
+ */
+
+ if ( isKeyword(token, KEYWORD_function) )
+ {
+ readToken (token);
+ }
+
+ copyToken (name, token);
+ /* Add scope in case this is an INNER function
+ addToScope(name, token->scope);
+ */
+
+ DebugStatement (
+ debugPrintf (DEBUG_PARSE
+ , "\n parseFunction: token isClass:%d scope:%s name:%s\n"
+ , token->isClass
+ , vStringValue(token->scope)
+ , vStringValue(token->string)
+ );
+ );
+ DebugStatement (
+ debugPrintf (DEBUG_PARSE
+ , "\n parseFunction: name isClass:%d scope:%s name:%s\n"
+ , name->isClass
+ , vStringValue(name->scope)
+ , vStringValue(name->string)
+ );
+ );
+
+ readToken (token);
+
+ if ( isType (token, TOKEN_OPEN_PAREN) )
+ skipArgumentList(token);
+
+ if ( isType (token, TOKEN_COLON) )
+ {
+ /*
+ * function fname ():ReturnType
+ */
+ readToken (token);
+ readToken (token);
+ }
+
+ if ( isType (token, TOKEN_OPEN_CURLY) )
+ {
+ DebugStatement (
+ debugPrintf (DEBUG_PARSE
+ , "\n parseFunction end: name isClass:%d scope:%s name:%s\n"
+ , name->isClass
+ , vStringValue(name->scope)
+ , vStringValue(name->string)
+ );
+ );
+ parseBlock (token, name);
+ DebugStatement (
+ debugPrintf (DEBUG_PARSE
+ , "\n parseFunction end2: token isClass:%d scope:%s name:%s\n"
+ , token->isClass
+ , vStringValue(token->scope)
+ , vStringValue(token->string)
+ );
+ );
+ DebugStatement (
+ debugPrintf (DEBUG_PARSE
+ , "\n parseFunction end2: token isClass:%d scope:%s name:%s\n"
+ , token->isClass
+ , vStringValue(token->scope)
+ , vStringValue(token->string)
+ );
+ );
+ DebugStatement (
+ debugPrintf (DEBUG_PARSE
+ , "\n parseFunction end3: name isClass:%d scope:%s name:%s\n"
+ , name->isClass
+ , vStringValue(name->scope)
+ , vStringValue(name->string)
+ );
+ );
+ makeFunctionTag (name);
+ }
+
+ findCmdTerm (token);
+
+ deleteToken (name);
+}
+
+static boolean parseBlock (tokenInfo *const token, tokenInfo *const parent)
+{
+ boolean read_next_token = TRUE;
+ vString * saveScope = vStringNew ();
+
+ vStringClear(saveScope);
+ vStringCopy (saveScope, token->scope);
+ token->nestLevel++;
+ DebugStatement (
+ debugPrintf (DEBUG_PARSE
+ , "\n parseBlock start: token isClass:%d scope:%s name:%s\n"
+ , token->isClass
+ , vStringValue(token->scope)
+ , vStringValue(token->string)
+ );
+ );
+ /*
+ * Make this routine a bit more forgiving.
+ * If called on an open_curly advance it
+ */
+ if ( isType (token, TOKEN_OPEN_CURLY) &&
+ isKeyword(token, KEYWORD_NONE) )
+ readToken(token);
+
+ if (! isType (token, TOKEN_CLOSE_CURLY))
+ {
+ /*
+ * Read until we find the closing brace,
+ * any nested braces will be handled within
+ */
+ do
+ {
+ if (isType (token, TOKEN_OPEN_CURLY))
+ {
+ /* Handle nested blocks */
+ parseBlock (token, parent);
+ }
+ else
+ {
+ /*
+ * It is possible for a line to have no terminator
+ * if the following line is a closing brace.
+ * parseLine will detect this case and indicate
+ * whether we should read an additional token.
+ */
+ read_next_token = parseLine (token);
+ }
+
+ /*
+ * Always read a new token unless we find a statement without
+ * a ending terminator
+ */
+ if( read_next_token )
+ readToken(token);
+
+ /*
+ * If we find a statement without a terminator consider the
+ * block finished, otherwise the stack will be off by one.
+ */
+ } while (! isType (token, TOKEN_CLOSE_CURLY) && read_next_token );
+ }
+
+ vStringDelete(saveScope);
+ token->nestLevel--;
+
+ DebugStatement (
+ debugPrintf (DEBUG_PARSE
+ , "\n parseBlock end: token isClass:%d scope:%s name:%s\n"
+ , token->isClass
+ , vStringValue(token->scope)
+ , vStringValue(token->string)
+ );
+ );
+ return FALSE;
+}
+
+static void parseMethods (tokenInfo *const token, tokenInfo *const class)
+{
+ tokenInfo *const name = newToken ();
+
+ /*
+ * This deals with these formats
+ * validProperty : 2,
+ * validMethod : function(a,b) {}
+ * 'validMethod2' : function(a,b) {}
+ * container.dirtyTab = {'url': false, 'title':false, 'snapshot':false, '*': false}
+ */
+
+ do
+ {
+ readToken (token);
+ if (isType (token, TOKEN_STRING) || isKeyword(token, KEYWORD_NONE))
+ {
+ copyToken (name, token);
+
+ readToken (token);
+ if ( isType (token, TOKEN_COLON) )
+ {
+ readToken (token);
+ if ( isKeyword (token, KEYWORD_function) )
+ {
+ readToken (token);
+ if ( isType (token, TOKEN_OPEN_PAREN) )
+ {
+ skipArgumentList(token);
+ }
+
+ if (isType (token, TOKEN_OPEN_CURLY))
+ {
+ addToScope (name, class->string);
+ makeFlexTag (name, FLEXTAG_METHOD);
+ parseBlock (token, name);
+
+ /*
+ * Read to the closing curly, check next
+ * token, if a comma, we must loop again
+ */
+ readToken (token);
+ }
+ }
+ else
+ {
+ addToScope (name, class->string);
+ makeFlexTag (name, FLEXTAG_PROPERTY);
+
+ /*
+ * Read the next token, if a comma
+ * we must loop again
+ */
+ readToken (token);
+ }
+ }
+ }
+ } while ( isType(token, TOKEN_COMMA) );
+
+ findCmdTerm (token);
+
+ deleteToken (name);
+}
+
+static boolean parseVar (tokenInfo *const token, boolean is_public)
+{
+ tokenInfo *const name = newToken ();
+ tokenInfo *const secondary_name = newToken ();
+ vString * saveScope = vStringNew ();
+ boolean is_terminated = TRUE;
+
+ vStringClear(saveScope);
+ vStringCopy (saveScope, token->scope);
+ /*
+ * Variables are defined as:
+ * private static var lastFaultMessage:Date = new Date( 0 );
+ * private static var webRequests:ArrayCollection = new ArrayCollection();
+ */
+
+ if ( isKeyword(token, KEYWORD_var) )
+ {
+ readToken(token);
+ }
+
+ /* Variable name */
+ copyToken (name, token);
+ readToken(token);
+
+ if ( isType (token, TOKEN_COLON) )
+ {
+ /*
+ * var vname ():DataType = new Date();
+ * var vname ():DataType;
+ */
+ readToken (token);
+ readToken (token);
+ }
+
+ while (! isType (token, TOKEN_SEMICOLON) )
+ {
+ readToken (token);
+ }
+
+ if ( isType (token, TOKEN_SEMICOLON) )
+ {
+ /*
+ * Only create variables for global scope
+ */
+ /* if ( token->nestLevel == 0 && is_global ) */
+ if ( is_public )
+ {
+ if (isType (token, TOKEN_SEMICOLON))
+ makeFlexTag (name, FLEXTAG_VARIABLE);
+ }
+ }
+
+ vStringCopy(token->scope, saveScope);
+ deleteToken (name);
+ deleteToken (secondary_name);
+ vStringDelete(saveScope);
+
+ return is_terminated;
+}
+
+static boolean parseClass (tokenInfo *const token)
+{
+ tokenInfo *const name = newToken ();
+ vString * saveScope = vStringNew ();
+ boolean saveIsClass = token->isClass;
+
+ vStringClear(saveScope);
+ vStringCopy (saveScope, token->scope);
+ /*
+ * Variables are defined as:
+ * private static var lastFaultMessage:Date = new Date( 0 );
+ * private static var webRequests:ArrayCollection = new ArrayCollection();
+ */
+
+ if ( isKeyword(token, KEYWORD_class) )
+ {
+ readToken(token);
+ }
+
+ token->isClass = TRUE;
+ /* Add class name to scope */
+ addToScope(token, token->string);
+ /* Class name */
+ copyToken (name, token);
+ readToken(token);
+
+ DebugStatement (
+ debugPrintf (DEBUG_PARSE
+ , "\n parseClass start: token isClass:%d scope:%s name:%s\n"
+ , token->isClass
+ , vStringValue(token->scope)
+ , vStringValue(token->string)
+ );
+ );
+ if ( isType (token, TOKEN_OPEN_CURLY) )
+ {
+ makeClassTag (name);
+ parseBlock (token, name);
+ }
+
+ DebugStatement (
+ debugPrintf (DEBUG_PARSE
+ , "\n parseClass end: token isClass:%d scope:%s name:%s\n"
+ , token->isClass
+ , vStringValue(token->scope)
+ , vStringValue(token->string)
+ );
+ );
+ vStringCopy(token->scope, saveScope);
+ token->isClass = saveIsClass;
+ deleteToken (name);
+ vStringDelete(saveScope);
+
+ return TRUE;
+}
+
+static boolean parseStatement (tokenInfo *const token)
+{
+ tokenInfo *const name = newToken ();
+ tokenInfo *const secondary_name = newToken ();
+ vString * saveScope = vStringNew ();
+ boolean is_public = FALSE;
+ boolean is_class = FALSE;
+ boolean is_terminated = TRUE;
+ boolean is_global = FALSE;
+ boolean is_prototype = FALSE;
+ vString * fulltag;
+
+ vStringClear(saveScope);
+ vStringCopy (saveScope, token->scope);
+ DebugStatement (
+ debugPrintf (DEBUG_PARSE
+ , "\n parseStatement: token isClass:%d scope:%s name:%s\n"
+ , token->isClass
+ , vStringValue(token->scope)
+ , vStringValue(token->string)
+ );
+ );
+ /*
+ * Functions can be named or unnamed.
+ * This deals with these formats:
+ * Function
+ * validFunctionOne = function(a,b) {}
+ * testlib.validFunctionFive = function(a,b) {}
+ * var innerThree = function(a,b) {}
+ * var innerFour = (a,b) {}
+ * var D2 = secondary_fcn_name(a,b) {}
+ * var D3 = new Function("a", "b", "return a+b;");
+ * Class
+ * testlib.extras.ValidClassOne = function(a,b) {
+ * this.a = a;
+ * }
+ * Class Methods
+ * testlib.extras.ValidClassOne.prototype = {
+ * 'validMethodOne' : function(a,b) {},
+ * 'validMethodTwo' : function(a,b) {}
+ * }
+ * ValidClassTwo = function ()
+ * {
+ * this.validMethodThree = function() {}
+ * // unnamed method
+ * this.validMethodFour = () {}
+ * }
+ * Database.prototype.validMethodThree = Database_getTodaysDate;
+ */
+
+ if ( isKeyword(token, KEYWORD_public) )
+ {
+ is_public = TRUE;
+ readToken(token);
+ }
+
+ if ( isKeyword(token, KEYWORD_private) )
+ {
+ readToken(token);
+ }
+
+ if ( isKeyword(token, KEYWORD_static) )
+ {
+ readToken(token);
+ }
+
+ if (isType(token, TOKEN_KEYWORD))
+ {
+ switch (token->keyword)
+ {
+ case KEYWORD_for:
+ case KEYWORD_while:
+ case KEYWORD_do:
+ parseLoop (token);
+ break;
+ case KEYWORD_if:
+ case KEYWORD_else:
+ case KEYWORD_try:
+ case KEYWORD_catch:
+ case KEYWORD_finally:
+ /* Common semantics */
+ is_terminated = parseIf (token);
+ break;
+ case KEYWORD_switch:
+ parseSwitch (token);
+ break;
+ case KEYWORD_class:
+ parseClass (token);
+ return is_terminated;
+ break;
+ case KEYWORD_function:
+ parseFunction (token);
+ return is_terminated;
+ break;
+ case KEYWORD_var:
+ parseVar (token, is_public);
+ return is_terminated;
+ break;
+ default:
+ readToken(token);
+ break;
+ }
+ }
+
+ copyToken (name, token);
+
+ while (! isType (token, TOKEN_CLOSE_CURLY) &&
+ ! isType (token, TOKEN_SEMICOLON) &&
+ ! isType (token, TOKEN_EQUAL_SIGN) )
+ {
+ /* Potentially the name of the function */
+ readToken (token);
+ if (isType (token, TOKEN_PERIOD))
+ {
+ /*
+ * Cannot be a global variable is it has dot references in the name
+ */
+ is_global = FALSE;
+ do
+ {
+ readToken (token);
+ if ( isKeyword(token, KEYWORD_NONE) )
+ {
+ if ( is_class )
+ {
+ vStringCopy(saveScope, token->scope);
+ addToScope(token, name->string);
+ }
+ else
+ addContext (name, token);
+ }
+ else if ( isKeyword(token, KEYWORD_prototype) )
+ {
+ /*
+ * When we reach the "prototype" tag, we infer:
+ * "BindAgent" is a class
+ * "build" is a method
+ *
+ * function BindAgent( repeatableIdName, newParentIdName ) {
+ * }
+ *
+ * CASE 1
+ * Specified function name: "build"
+ * BindAgent.prototype.build = function( mode ) {
+ * ignore everything within this function
+ * }
+ *
+ * CASE 2
+ * Prototype listing
+ * ValidClassOne.prototype = {
+ * 'validMethodOne' : function(a,b) {},
+ * 'validMethodTwo' : function(a,b) {}
+ * }
+ *
+ */
+ makeClassTag (name);
+ is_class = TRUE;
+ is_prototype = TRUE;
+
+ /*
+ * There should a ".function_name" next.
+ */
+ readToken (token);
+ if (isType (token, TOKEN_PERIOD))
+ {
+ /*
+ * Handle CASE 1
+ */
+ readToken (token);
+ if ( isKeyword(token, KEYWORD_NONE) )
+ {
+ vStringCopy(saveScope, token->scope);
+ addToScope(token, name->string);
+
+ makeFlexTag (token, FLEXTAG_METHOD);
+ /*
+ * We can read until the end of the block / statement.
+ * We need to correctly parse any nested blocks, but
+ * we do NOT want to create any tags based on what is
+ * within the blocks.
+ */
+ token->ignoreTag = TRUE;
+ /*
+ * Find to the end of the statement
+ */
+ findCmdTerm (token);
+ token->ignoreTag = FALSE;
+ is_terminated = TRUE;
+ goto cleanUp;
+ }
+ }
+ else if (isType (token, TOKEN_EQUAL_SIGN))
+ {
+ readToken (token);
+ if (isType (token, TOKEN_OPEN_CURLY))
+ {
+ /*
+ * Handle CASE 2
+ *
+ * Creates tags for each of these class methods
+ * ValidClassOne.prototype = {
+ * 'validMethodOne' : function(a,b) {},
+ * 'validMethodTwo' : function(a,b) {}
+ * }
+ */
+ parseMethods(token, name);
+ /*
+ * Find to the end of the statement
+ */
+ findCmdTerm (token);
+ token->ignoreTag = FALSE;
+ is_terminated = TRUE;
+ goto cleanUp;
+ }
+ }
+ }
+ readToken (token);
+ } while (isType (token, TOKEN_PERIOD));
+ }
+
+ if ( isType (token, TOKEN_OPEN_PAREN) )
+ skipArgumentList(token);
+
+ if ( isType (token, TOKEN_COLON) )
+ {
+ /*
+ * Functions are of this form:
+ * function fname ():ReturnType {
+ */
+ readToken (token);
+ readToken (token);
+ }
+
+ if ( isType (token, TOKEN_OPEN_SQUARE) )
+ skipArrayList(token);
+
+ }
+
+ if ( isType (token, TOKEN_CLOSE_CURLY) )
+ {
+ /*
+ * Reaching this section without having
+ * processed an open curly brace indicates
+ * the statement is most likely not terminated.
+ */
+ is_terminated = FALSE;
+ goto cleanUp;
+ }
+
+ if ( isType (token, TOKEN_SEMICOLON) )
+ {
+ /*
+ * Only create variables for global scope
+ */
+ if ( token->nestLevel == 0 && is_global )
+ {
+ /*
+ * Handles this syntax:
+ * var g_var2;
+ */
+ if (isType (token, TOKEN_SEMICOLON))
+ makeFlexTag (name, FLEXTAG_VARIABLE);
+ }
+ /*
+ * Statement has ended.
+ * This deals with calls to functions, like:
+ * alert(..);
+ */
+ goto cleanUp;
+ }
+
+ if ( isType (token, TOKEN_EQUAL_SIGN) )
+ {
+ readToken (token);
+
+ if ( isKeyword (token, KEYWORD_function) )
+ {
+ readToken (token);
+
+ if ( isKeyword (token, KEYWORD_NONE) &&
+ ! isType (token, TOKEN_OPEN_PAREN) )
+ {
+ /*
+ * Functions of this format:
+ * var D2A = function theAdd(a, b)
+ * {
+ * return a+b;
+ * }
+ * Are really two separate defined functions and
+ * can be referenced in two ways:
+ * alert( D2A(1,2) ); // produces 3
+ * alert( theAdd(1,2) ); // also produces 3
+ * So it must have two tags:
+ * D2A
+ * theAdd
+ * Save the reference to the name for later use, once
+ * we have established this is a valid function we will
+ * create the secondary reference to it.
+ */
+ copyToken (secondary_name, token);
+ readToken (token);
+ }
+
+ if ( isType (token, TOKEN_OPEN_PAREN) )
+ skipArgumentList(token);
+
+ if (isType (token, TOKEN_OPEN_CURLY))
+ {
+ /*
+ * This will be either a function or a class.
+ * We can only determine this by checking the body
+ * of the function. If we find a "this." we know
+ * it is a class, otherwise it is a function.
+ */
+ if ( token->isClass )
+ {
+ makeFlexTag (name, FLEXTAG_METHOD);
+ if ( vStringLength(secondary_name->string) > 0 )
+ makeFunctionTag (secondary_name);
+ parseBlock (token, name);
+ }
+ else
+ {
+ parseBlock (token, name);
+ makeFunctionTag (name);
+
+ if ( vStringLength(secondary_name->string) > 0 )
+ makeFunctionTag (secondary_name);
+
+ /*
+ * Find to the end of the statement
+ */
+ goto cleanUp;
+ }
+ }
+ }
+ else if (isType (token, TOKEN_OPEN_PAREN))
+ {
+ /*
+ * Handle nameless functions
+ * this.method_name = () {}
+ */
+ skipArgumentList(token);
+
+ if (isType (token, TOKEN_OPEN_CURLY))
+ {
+ /*
+ * Nameless functions are only setup as methods.
+ */
+ makeFlexTag (name, FLEXTAG_METHOD);
+ parseBlock (token, name);
+ }
+ }
+ else if (isType (token, TOKEN_OPEN_CURLY))
+ {
+ /*
+ * Creates tags for each of these class methods
+ * ValidClassOne.prototype = {
+ * 'validMethodOne' : function(a,b) {},
+ * 'validMethodTwo' : function(a,b) {}
+ * }
+ */
+ parseMethods(token, name);
+ if (isType (token, TOKEN_CLOSE_CURLY))
+ {
+ /*
+ * Assume the closing parantheses terminates
+ * this statements.
+ */
+ is_terminated = TRUE;
+ }
+ }
+ else if (isKeyword (token, KEYWORD_new))
+ {
+ readToken (token);
+ if ( isKeyword (token, KEYWORD_function) ||
+ isKeyword (token, KEYWORD_capital_function) ||
+ isKeyword (token, KEYWORD_object) ||
+ isKeyword (token, KEYWORD_capital_object) )
+ {
+ if ( isKeyword (token, KEYWORD_object) ||
+ isKeyword (token, KEYWORD_capital_object) )
+ is_class = TRUE;
+
+ readToken (token);
+ if ( isType (token, TOKEN_OPEN_PAREN) )
+ skipArgumentList(token);
+
+ if (isType (token, TOKEN_SEMICOLON))
+ {
+ if ( token->nestLevel == 0 )
+ {
+ if ( is_class )
+ {
+ makeClassTag (name);
+ } else {
+ makeFunctionTag (name);
+ }
+ }
+ }
+ }
+ }
+ else if (isKeyword (token, KEYWORD_NONE))
+ {
+ /*
+ * Only create variables for global scope
+ */
+ if ( token->nestLevel == 0 && is_global )
+ {
+ /*
+ * A pointer can be created to the function.
+ * If we recognize the function/class name ignore the variable.
+ * This format looks identical to a variable definition.
+ * A variable defined outside of a block is considered
+ * a global variable:
+ * var g_var1 = 1;
+ * var g_var2;
+ * This is not a global variable:
+ * var g_var = function;
+ * This is a global variable:
+ * var g_var = different_var_name;
+ */
+ fulltag = vStringNew ();
+ if (vStringLength (token->scope) > 0)
+ {
+ vStringCopy(fulltag, token->scope);
+ vStringCatS (fulltag, ".");
+ vStringCatS (fulltag, vStringValue(token->string));
+ }
+ else
+ {
+ vStringCopy(fulltag, token->string);
+ }
+ vStringTerminate(fulltag);
+ if ( ! stringListHas(FunctionNames, vStringValue (fulltag)) &&
+ ! stringListHas(ClassNames, vStringValue (fulltag)) )
+ {
+ findCmdTerm (token);
+ if (isType (token, TOKEN_SEMICOLON))
+ makeFlexTag (name, FLEXTAG_VARIABLE);
+ }
+ vStringDelete (fulltag);
+ }
+ }
+ }
+ findCmdTerm (token);
+
+ /*
+ * Statements can be optionally terminated in the case of
+ * statement prior to a close curly brace as in the
+ * document.write line below:
+ *
+ * function checkForUpdate() {
+ * if( 1==1 ) {
+ * document.write("hello from checkForUpdate<br>")
+ * }
+ * return 1;
+ * }
+ */
+ if ( ! is_terminated && isType (token, TOKEN_CLOSE_CURLY))
+ is_terminated = FALSE;
+
+
+cleanUp:
+ vStringCopy(token->scope, saveScope);
+ deleteToken (name);
+ deleteToken (secondary_name);
+ vStringDelete(saveScope);
+
+ return is_terminated;
+}
+
+static boolean parseLine (tokenInfo *const token)
+{
+ boolean is_terminated = TRUE;
+ /*
+ * Detect the common statements, if, while, for, do, ...
+ * This is necessary since the last statement within a block "{}"
+ * can be optionally terminated.
+ *
+ * If the statement is not terminated, we need to tell
+ * the calling routine to prevent reading an additional token
+ * looking for the end of the statement.
+ */
+
+ if (isType(token, TOKEN_KEYWORD))
+ {
+ switch (token->keyword)
+ {
+ case KEYWORD_for:
+ case KEYWORD_while:
+ case KEYWORD_do:
+ parseLoop (token);
+ break;
+ case KEYWORD_if:
+ case KEYWORD_else:
+ case KEYWORD_try:
+ case KEYWORD_catch:
+ case KEYWORD_finally:
+ /* Common semantics */
+ is_terminated = parseIf (token);
+ break;
+ case KEYWORD_switch:
+ parseSwitch (token);
+ break;
+ default:
+ parseStatement (token);
+ break;
+ }
+ }
+ else
+ {
+ /*
+ * Special case where single line statements may not be
+ * SEMICOLON terminated. parseBlock needs to know this
+ * so that it does not read the next token.
+ */
+ is_terminated = parseStatement (token);
+ }
+ return is_terminated;
+}
+
+static boolean parseCDATA (tokenInfo *const token)
+{
+ if (isType (token, TOKEN_LESS_THAN))
+ {
+ /*
+ * Handle these tags
+ * <![CDATA[
+ * ...
+ * ]]>
+ */
+ readToken (token);
+ if (isType (token, TOKEN_EXCLAMATION))
+ {
+ /*
+ * Not sure why I had to comment these out, but I did.
+ * readToken (token);
+ * if (isType (token, TOKEN_OPEN_SQUARE))
+ * {
+ */
+ readToken (token);
+ if (isKeyword (token, KEYWORD_cdata))
+ {
+ readToken (token);
+ if (isType (token, TOKEN_OPEN_SQUARE))
+ {
+ parseActionScript (token);
+ if (isType (token, TOKEN_CLOSE_SQUARE))
+ {
+ readToken (token);
+ if (isType (token, TOKEN_CLOSE_SQUARE))
+ {
+ readToken (token);
+ }
+ }
+ }
+ }
+ /*} Not sure */
+ }
+ }
+ else
+ {
+ parseActionScript (token);
+ }
+ return TRUE;
+}
+
+static boolean parseMXML (tokenInfo *const token)
+{
+ tokenInfo *const name = newToken ();
+ tokenInfo *const type = newToken ();
+ /*
+ * Detect the common statements, if, while, for, do, ...
+ * This is necessary since the last statement within a block "{}"
+ * can be optionally terminated.
+ *
+ * If the statement is not terminated, we need to tell
+ * the calling routine to prevent reading an additional token
+ * looking for the end of the statement.
+ */
+
+ readToken (token);
+
+ if (isKeyword (token, KEYWORD_script))
+ {
+ /*
+ * These tags can be of this form:
+ * <mx:Script src="filename.as" />
+ */
+ do
+ {
+ readToken (token);
+ } while (! (isType (token, TOKEN_CLOSE_SGML) ||
+ isType (token, TOKEN_CLOSE_MXML) ||
+ isType (token, TOKEN_GREATER_THAN)) );
+
+ if (isType (token, TOKEN_CLOSE_MXML))
+ {
+ /*
+ * We have found a </mx:type> tag
+ * Finish reading the "type" and ">"
+ */
+ readToken (token);
+ readToken (token);
+ goto cleanUp;
+ }
+ if (isType (token, TOKEN_CLOSE_SGML))
+ {
+ /*
+ * We have found a <mx:Script src="filename.as" />
+ */
+ goto cleanUp;
+ }
+
+ /*
+ * This is a beginning of an embedded script.
+ * These typically are of this format:
+ * <mx:Script>
+ * <![CDATA[
+ * ... ActionScript ...
+ * ]]>
+ * </mx:Script>
+ */
+ readToken (token);
+ parseCDATA (token);
+
+ readToken (token);
+ if (isType (token, TOKEN_CLOSE_MXML))
+ {
+ /*
+ * We have found a </mx:type> tag
+ * Finish reading the "type" and ">"
+ */
+ readToken (token);
+ readToken (token);
+ }
+ goto cleanUp;
+ }
+
+ copyToken (type, token);
+
+ readToken (token);
+ do
+ {
+ if (isType (token, TOKEN_OPEN_MXML))
+ {
+ parseMXML (token);
+ }
+ else if (isKeyword (token, KEYWORD_id))
+ {
+ /* = */
+ readToken (token);
+ readToken (token);
+
+ copyToken (name, token);
+ addToScope (name, type->string);
+ makeMXTag (name);
+ }
+ readToken (token);
+ } while (! (isType (token, TOKEN_CLOSE_SGML) || isType (token, TOKEN_CLOSE_MXML)) );
+
+ if (isType (token, TOKEN_CLOSE_MXML))
+ {
+ /*
+ * We have found a </mx:type> tag
+ * Finish reading the "type" and ">"
+ */
+ readToken (token);
+ readToken (token);
+ }
+
+cleanUp:
+ deleteToken (name);
+ deleteToken (type);
+ return TRUE;
+}
+
+static boolean parseActionScript (tokenInfo *const token)
+{
+ do
+ {
+ readToken (token);
+
+ if (isType (token, TOKEN_LESS_THAN))
+ {
+ /*
+ * Handle these tags
+ * <![CDATA[
+ * ...
+ * ]]>
+ */
+ readToken (token);
+ if (isType (token, TOKEN_EQUAL_SIGN))
+ {
+ if (isType (token, TOKEN_OPEN_SQUARE))
+ {
+ readToken (token);
+ if (isKeyword (token, KEYWORD_cdata))
+ {
+ readToken (token);
+ }
+ }
+ }
+ }
+ if (isType (token, TOKEN_CLOSE_SQUARE))
+ {
+ /*
+ * Handle these tags
+ * <![CDATA[
+ * ...
+ * ]]>
+ */
+ readToken (token);
+ if (isType (token, TOKEN_CLOSE_SQUARE))
+ {
+ readToken (token);
+ if (isType (token, TOKEN_GREATER_THAN))
+ {
+ return TRUE;
+ }
+ }
+ }
+ else if (isType (token, TOKEN_CLOSE_MXML))
+ {
+ /*
+ * Read the Script> tags
+ */
+ readToken (token);
+ readToken (token);
+ return TRUE;
+ }
+ else if (isType (token, TOKEN_OPEN_MXML))
+ {
+ parseMXML (token);
+ }
+ else
+ {
+ if (isType(token, TOKEN_KEYWORD))
+ {
+ switch (token->keyword)
+ {
+ case KEYWORD_function: parseFunction (token); break;
+ default: parseLine (token); break;
+ }
+ }
+ else
+ {
+ parseLine (token);
+ }
+ }
+ } while (TRUE);
+}
+
+static void parseFlexFile (tokenInfo *const token)
+{
+ do
+ {
+ readToken (token);
+
+ if (isType (token, TOKEN_OPEN_MXML))
+ {
+ parseMXML (token);
+ }
+ if (isType (token, TOKEN_LESS_THAN))
+ {
+ readToken (token);
+ if (isType (token, TOKEN_QUESTION_MARK))
+ {
+ readToken (token);
+ while (! isType (token, TOKEN_QUESTION_MARK) )
+ {
+ readToken (token);
+ }
+ readToken (token);
+ }
+ }
+ else
+ {
+ parseActionScript (token);
+ }
+ } while (TRUE);
+}
+
+static void initialize (const langType language)
+{
+ Assert (sizeof (FlexKinds) / sizeof (FlexKinds [0]) == FLEXTAG_COUNT);
+ Lang_js = language;
+ buildFlexKeywordHash ();
+}
+
+static void findFlexTags (void)
+{
+ tokenInfo *const token = newToken ();
+ exception_t exception;
+
+ ClassNames = stringListNew ();
+ FunctionNames = stringListNew ();
+
+ exception = (exception_t) (setjmp (Exception));
+ while (exception == ExceptionNone)
+ parseFlexFile (token);
+
+ stringListDelete (ClassNames);
+ stringListDelete (FunctionNames);
+ ClassNames = NULL;
+ FunctionNames = NULL;
+ deleteToken (token);
+}
+
+/* Create parser definition stucture */
+extern parserDefinition* FlexParser (void)
+{
+ static const char *const extensions [] = { "as", "mxml", NULL };
+ parserDefinition *const def = parserNew ("Flex");
+ def->extensions = extensions;
+ /*
+ * New definitions for parsing instead of regex
+ */
+ def->kinds = FlexKinds;
+ def->kindCount = KIND_COUNT (FlexKinds);
+ def->parser = findFlexTags;
+ def->initialize = initialize;
+
+ return def;
+}
+/* vi:set tabstop=4 shiftwidth=4 noexpandtab: */
diff --git a/fortran.c b/fortran.c
new file mode 100644
index 0000000..2a6f85c
--- /dev/null
+++ b/fortran.c
@@ -0,0 +1,2197 @@
+/*
+* $Id: fortran.c 660 2008-04-20 23:30:12Z elliotth $
+*
+* Copyright (c) 1998-2003, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* This module contains functions for generating tags for Fortran language
+* files.
+*/
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include <string.h>
+#include <limits.h>
+#include <ctype.h> /* to define tolower () */
+#include <setjmp.h>
+
+#include "debug.h"
+#include "entry.h"
+#include "keyword.h"
+#include "options.h"
+#include "parse.h"
+#include "read.h"
+#include "routines.h"
+#include "vstring.h"
+
+/*
+* MACROS
+*/
+#define isident(c) (isalnum(c) || (c) == '_')
+#define isBlank(c) (boolean) (c == ' ' || c == '\t')
+#define isType(token,t) (boolean) ((token)->type == (t))
+#define isKeyword(token,k) (boolean) ((token)->keyword == (k))
+#define isSecondaryKeyword(token,k) (boolean) ((token)->secondary == NULL ? \
+ FALSE : (token)->secondary->keyword == (k))
+
+/*
+* DATA DECLARATIONS
+*/
+
+typedef enum eException {
+ ExceptionNone, ExceptionEOF, ExceptionFixedFormat, ExceptionLoop
+} exception_t;
+
+/* Used to designate type of line read in fixed source form.
+ */
+typedef enum eFortranLineType {
+ LTYPE_UNDETERMINED,
+ LTYPE_INVALID,
+ LTYPE_COMMENT,
+ LTYPE_CONTINUATION,
+ LTYPE_EOF,
+ LTYPE_INITIAL,
+ LTYPE_SHORT
+} lineType;
+
+/* Used to specify type of keyword.
+ */
+typedef enum eKeywordId {
+ KEYWORD_NONE = -1,
+ KEYWORD_allocatable,
+ KEYWORD_assignment,
+ KEYWORD_automatic,
+ KEYWORD_block,
+ KEYWORD_byte,
+ KEYWORD_cexternal,
+ KEYWORD_cglobal,
+ KEYWORD_character,
+ KEYWORD_common,
+ KEYWORD_complex,
+ KEYWORD_contains,
+ KEYWORD_data,
+ KEYWORD_dimension,
+ KEYWORD_dllexport,
+ KEYWORD_dllimport,
+ KEYWORD_do,
+ KEYWORD_double,
+ KEYWORD_elemental,
+ KEYWORD_end,
+ KEYWORD_entry,
+ KEYWORD_equivalence,
+ KEYWORD_external,
+ KEYWORD_format,
+ KEYWORD_function,
+ KEYWORD_if,
+ KEYWORD_implicit,
+ KEYWORD_include,
+ KEYWORD_inline,
+ KEYWORD_integer,
+ KEYWORD_intent,
+ KEYWORD_interface,
+ KEYWORD_intrinsic,
+ KEYWORD_logical,
+ KEYWORD_map,
+ KEYWORD_module,
+ KEYWORD_namelist,
+ KEYWORD_operator,
+ KEYWORD_optional,
+ KEYWORD_parameter,
+ KEYWORD_pascal,
+ KEYWORD_pexternal,
+ KEYWORD_pglobal,
+ KEYWORD_pointer,
+ KEYWORD_precision,
+ KEYWORD_private,
+ KEYWORD_program,
+ KEYWORD_public,
+ KEYWORD_pure,
+ KEYWORD_real,
+ KEYWORD_record,
+ KEYWORD_recursive,
+ KEYWORD_save,
+ KEYWORD_select,
+ KEYWORD_sequence,
+ KEYWORD_static,
+ KEYWORD_stdcall,
+ KEYWORD_structure,
+ KEYWORD_subroutine,
+ KEYWORD_target,
+ KEYWORD_then,
+ KEYWORD_type,
+ KEYWORD_union,
+ KEYWORD_use,
+ KEYWORD_value,
+ KEYWORD_virtual,
+ KEYWORD_volatile,
+ KEYWORD_where,
+ KEYWORD_while
+} keywordId;
+
+/* Used to determine whether keyword is valid for the token language and
+ * what its ID is.
+ */
+typedef struct sKeywordDesc {
+ const char *name;
+ keywordId id;
+} keywordDesc;
+
+typedef enum eTokenType {
+ TOKEN_UNDEFINED,
+ TOKEN_COMMA,
+ TOKEN_DOUBLE_COLON,
+ TOKEN_IDENTIFIER,
+ TOKEN_KEYWORD,
+ TOKEN_LABEL,
+ TOKEN_NUMERIC,
+ TOKEN_OPERATOR,
+ TOKEN_PAREN_CLOSE,
+ TOKEN_PAREN_OPEN,
+ TOKEN_PERCENT,
+ TOKEN_STATEMENT_END,
+ TOKEN_STRING
+} tokenType;
+
+typedef enum eTagType {
+ TAG_UNDEFINED = -1,
+ TAG_BLOCK_DATA,
+ TAG_COMMON_BLOCK,
+ TAG_ENTRY_POINT,
+ TAG_FUNCTION,
+ TAG_INTERFACE,
+ TAG_COMPONENT,
+ TAG_LABEL,
+ TAG_LOCAL,
+ TAG_MODULE,
+ TAG_NAMELIST,
+ TAG_PROGRAM,
+ TAG_SUBROUTINE,
+ TAG_DERIVED_TYPE,
+ TAG_VARIABLE,
+ TAG_COUNT /* must be last */
+} tagType;
+
+typedef struct sTokenInfo {
+ tokenType type;
+ keywordId keyword;
+ tagType tag;
+ vString* string;
+ struct sTokenInfo *secondary;
+ unsigned long lineNumber;
+ fpos_t filePosition;
+} tokenInfo;
+
+/*
+* DATA DEFINITIONS
+*/
+
+static langType Lang_fortran;
+static jmp_buf Exception;
+static int Ungetc;
+static unsigned int Column;
+static boolean FreeSourceForm;
+static boolean ParsingString;
+static tokenInfo *Parent;
+
+/* indexed by tagType */
+static kindOption FortranKinds [] = {
+ { TRUE, 'b', "block data", "block data"},
+ { TRUE, 'c', "common", "common blocks"},
+ { TRUE, 'e', "entry", "entry points"},
+ { TRUE, 'f', "function", "functions"},
+ { FALSE, 'i', "interface", "interface contents, generic names, and operators"},
+ { TRUE, 'k', "component", "type and structure components"},
+ { TRUE, 'l', "label", "labels"},
+ { FALSE, 'L', "local", "local, common block, and namelist variables"},
+ { TRUE, 'm', "module", "modules"},
+ { TRUE, 'n', "namelist", "namelists"},
+ { TRUE, 'p', "program", "programs"},
+ { TRUE, 's', "subroutine", "subroutines"},
+ { TRUE, 't', "type", "derived types and structures"},
+ { TRUE, 'v', "variable", "program (global) and module variables"}
+};
+
+/* For efinitions of Fortran 77 with extensions:
+ * http://www.fortran.com/fortran/F77_std/rjcnf0001.html
+ * http://scienide.uwaterloo.ca/MIPSpro7/007-2362-004/sgi_html/index.html
+ *
+ * For the Compaq Fortran Reference Manual:
+ * http://h18009.www1.hp.com/fortran/docs/lrm/dflrm.htm
+ */
+
+static const keywordDesc FortranKeywordTable [] = {
+ /* keyword keyword ID */
+ { "allocatable", KEYWORD_allocatable },
+ { "assignment", KEYWORD_assignment },
+ { "automatic", KEYWORD_automatic },
+ { "block", KEYWORD_block },
+ { "byte", KEYWORD_byte },
+ { "cexternal", KEYWORD_cexternal },
+ { "cglobal", KEYWORD_cglobal },
+ { "character", KEYWORD_character },
+ { "common", KEYWORD_common },
+ { "complex", KEYWORD_complex },
+ { "contains", KEYWORD_contains },
+ { "data", KEYWORD_data },
+ { "dimension", KEYWORD_dimension },
+ { "dll_export", KEYWORD_dllexport },
+ { "dll_import", KEYWORD_dllimport },
+ { "do", KEYWORD_do },
+ { "double", KEYWORD_double },
+ { "elemental", KEYWORD_elemental },
+ { "end", KEYWORD_end },
+ { "entry", KEYWORD_entry },
+ { "equivalence", KEYWORD_equivalence },
+ { "external", KEYWORD_external },
+ { "format", KEYWORD_format },
+ { "function", KEYWORD_function },
+ { "if", KEYWORD_if },
+ { "implicit", KEYWORD_implicit },
+ { "include", KEYWORD_include },
+ { "inline", KEYWORD_inline },
+ { "integer", KEYWORD_integer },
+ { "intent", KEYWORD_intent },
+ { "interface", KEYWORD_interface },
+ { "intrinsic", KEYWORD_intrinsic },
+ { "logical", KEYWORD_logical },
+ { "map", KEYWORD_map },
+ { "module", KEYWORD_module },
+ { "namelist", KEYWORD_namelist },
+ { "operator", KEYWORD_operator },
+ { "optional", KEYWORD_optional },
+ { "parameter", KEYWORD_parameter },
+ { "pascal", KEYWORD_pascal },
+ { "pexternal", KEYWORD_pexternal },
+ { "pglobal", KEYWORD_pglobal },
+ { "pointer", KEYWORD_pointer },
+ { "precision", KEYWORD_precision },
+ { "private", KEYWORD_private },
+ { "program", KEYWORD_program },
+ { "public", KEYWORD_public },
+ { "pure", KEYWORD_pure },
+ { "real", KEYWORD_real },
+ { "record", KEYWORD_record },
+ { "recursive", KEYWORD_recursive },
+ { "save", KEYWORD_save },
+ { "select", KEYWORD_select },
+ { "sequence", KEYWORD_sequence },
+ { "static", KEYWORD_static },
+ { "stdcall", KEYWORD_stdcall },
+ { "structure", KEYWORD_structure },
+ { "subroutine", KEYWORD_subroutine },
+ { "target", KEYWORD_target },
+ { "then", KEYWORD_then },
+ { "type", KEYWORD_type },
+ { "union", KEYWORD_union },
+ { "use", KEYWORD_use },
+ { "value", KEYWORD_value },
+ { "virtual", KEYWORD_virtual },
+ { "volatile", KEYWORD_volatile },
+ { "where", KEYWORD_where },
+ { "while", KEYWORD_while }
+};
+
+static struct {
+ unsigned int count;
+ unsigned int max;
+ tokenInfo* list;
+} Ancestors = { 0, 0, NULL };
+
+/*
+* FUNCTION PROTOTYPES
+*/
+static void parseStructureStmt (tokenInfo *const token);
+static void parseUnionStmt (tokenInfo *const token);
+static void parseDerivedTypeDef (tokenInfo *const token);
+static void parseFunctionSubprogram (tokenInfo *const token);
+static void parseSubroutineSubprogram (tokenInfo *const token);
+
+/*
+* FUNCTION DEFINITIONS
+*/
+
+static void ancestorPush (tokenInfo *const token)
+{
+ enum { incrementalIncrease = 10 };
+ if (Ancestors.list == NULL)
+ {
+ Assert (Ancestors.max == 0);
+ Ancestors.count = 0;
+ Ancestors.max = incrementalIncrease;
+ Ancestors.list = xMalloc (Ancestors.max, tokenInfo);
+ }
+ else if (Ancestors.count == Ancestors.max)
+ {
+ Ancestors.max += incrementalIncrease;
+ Ancestors.list = xRealloc (Ancestors.list, Ancestors.max, tokenInfo);
+ }
+ Ancestors.list [Ancestors.count] = *token;
+ Ancestors.list [Ancestors.count].string = vStringNewCopy (token->string);
+ Ancestors.count++;
+}
+
+static void ancestorPop (void)
+{
+ Assert (Ancestors.count > 0);
+ --Ancestors.count;
+ vStringDelete (Ancestors.list [Ancestors.count].string);
+
+ Ancestors.list [Ancestors.count].type = TOKEN_UNDEFINED;
+ Ancestors.list [Ancestors.count].keyword = KEYWORD_NONE;
+ Ancestors.list [Ancestors.count].secondary = NULL;
+ Ancestors.list [Ancestors.count].tag = TAG_UNDEFINED;
+ Ancestors.list [Ancestors.count].string = NULL;
+ Ancestors.list [Ancestors.count].lineNumber = 0L;
+}
+
+static const tokenInfo* ancestorScope (void)
+{
+ tokenInfo *result = NULL;
+ unsigned int i;
+ for (i = Ancestors.count ; i > 0 && result == NULL ; --i)
+ {
+ tokenInfo *const token = Ancestors.list + i - 1;
+ if (token->type == TOKEN_IDENTIFIER &&
+ token->tag != TAG_UNDEFINED && token->tag != TAG_INTERFACE)
+ result = token;
+ }
+ return result;
+}
+
+static const tokenInfo* ancestorTop (void)
+{
+ Assert (Ancestors.count > 0);
+ return &Ancestors.list [Ancestors.count - 1];
+}
+
+#define ancestorCount() (Ancestors.count)
+
+static void ancestorClear (void)
+{
+ while (Ancestors.count > 0)
+ ancestorPop ();
+ if (Ancestors.list != NULL)
+ eFree (Ancestors.list);
+ Ancestors.list = NULL;
+ Ancestors.count = 0;
+ Ancestors.max = 0;
+}
+
+static boolean insideInterface (void)
+{
+ boolean result = FALSE;
+ unsigned int i;
+ for (i = 0 ; i < Ancestors.count && !result ; ++i)
+ {
+ if (Ancestors.list [i].tag == TAG_INTERFACE)
+ result = TRUE;
+ }
+ return result;
+}
+
+static void buildFortranKeywordHash (void)
+{
+ const size_t count =
+ sizeof (FortranKeywordTable) / sizeof (FortranKeywordTable [0]);
+ size_t i;
+ for (i = 0 ; i < count ; ++i)
+ {
+ const keywordDesc* const p = &FortranKeywordTable [i];
+ addKeyword (p->name, Lang_fortran, (int) p->id);
+ }
+}
+
+/*
+* Tag generation functions
+*/
+
+static tokenInfo *newToken (void)
+{
+ tokenInfo *const token = xMalloc (1, tokenInfo);
+
+ token->type = TOKEN_UNDEFINED;
+ token->keyword = KEYWORD_NONE;
+ token->tag = TAG_UNDEFINED;
+ token->string = vStringNew ();
+ token->secondary = NULL;
+ token->lineNumber = getSourceLineNumber ();
+ token->filePosition = getInputFilePosition ();
+
+ return token;
+}
+
+static tokenInfo *newTokenFrom (tokenInfo *const token)
+{
+ tokenInfo *result = newToken ();
+ *result = *token;
+ result->string = vStringNewCopy (token->string);
+ token->secondary = NULL;
+ return result;
+}
+
+static void deleteToken (tokenInfo *const token)
+{
+ if (token != NULL)
+ {
+ vStringDelete (token->string);
+ deleteToken (token->secondary);
+ token->secondary = NULL;
+ eFree (token);
+ }
+}
+
+static boolean isFileScope (const tagType type)
+{
+ return (boolean) (type == TAG_LABEL || type == TAG_LOCAL);
+}
+
+static boolean includeTag (const tagType type)
+{
+ boolean include;
+ Assert (type != TAG_UNDEFINED);
+ include = FortranKinds [(int) type].enabled;
+ if (include && isFileScope (type))
+ include = Option.include.fileScope;
+ return include;
+}
+
+static void makeFortranTag (tokenInfo *const token, tagType tag)
+{
+ token->tag = tag;
+ if (includeTag (token->tag))
+ {
+ const char *const name = vStringValue (token->string);
+ tagEntryInfo e;
+
+ initTagEntry (&e, name);
+
+ if (token->tag == TAG_COMMON_BLOCK)
+ e.lineNumberEntry = (boolean) (Option.locate != EX_PATTERN);
+
+ e.lineNumber = token->lineNumber;
+ e.filePosition = token->filePosition;
+ e.isFileScope = isFileScope (token->tag);
+ e.kindName = FortranKinds [token->tag].name;
+ e.kind = FortranKinds [token->tag].letter;
+ e.truncateLine = (boolean) (token->tag != TAG_LABEL);
+
+ if (ancestorCount () > 0)
+ {
+ const tokenInfo* const scope = ancestorScope ();
+ if (scope != NULL)
+ {
+ e.extensionFields.scope [0] = FortranKinds [scope->tag].name;
+ e.extensionFields.scope [1] = vStringValue (scope->string);
+ }
+ }
+ if (! insideInterface () || includeTag (TAG_INTERFACE))
+ makeTagEntry (&e);
+ }
+}
+
+/*
+* Parsing functions
+*/
+
+static int skipLine (void)
+{
+ int c;
+
+ do
+ c = fileGetc ();
+ while (c != EOF && c != '\n');
+
+ return c;
+}
+
+static void makeLabelTag (vString *const label)
+{
+ tokenInfo *token = newToken ();
+ token->type = TOKEN_LABEL;
+ vStringCopy (token->string, label);
+ makeFortranTag (token, TAG_LABEL);
+ deleteToken (token);
+}
+
+static lineType getLineType (void)
+{
+ vString *label = vStringNew ();
+ int column = 0;
+ lineType type = LTYPE_UNDETERMINED;
+
+ do /* read in first 6 "margin" characters */
+ {
+ int c = fileGetc ();
+
+ /* 3.2.1 Comment_Line. A comment line is any line that contains
+ * a C or an asterisk in column 1, or contains only blank characters
+ * in columns 1 through 72. A comment line that contains a C or
+ * an asterisk in column 1 may contain any character capable of
+ * representation in the processor in columns 2 through 72.
+ */
+ /* EXCEPTION! Some compilers permit '!' as a commment character here.
+ *
+ * Treat # and $ in column 1 as comment to permit preprocessor directives.
+ * Treat D and d in column 1 as comment for HP debug statements.
+ */
+ if (column == 0 && strchr ("*Cc!#$Dd", c) != NULL)
+ type = LTYPE_COMMENT;
+ else if (c == '\t') /* EXCEPTION! Some compilers permit a tab here */
+ {
+ column = 8;
+ type = LTYPE_INITIAL;
+ }
+ else if (column == 5)
+ {
+ /* 3.2.2 Initial_Line. An initial line is any line that is not
+ * a comment line and contains the character blank or the digit 0
+ * in column 6. Columns 1 through 5 may contain a statement label
+ * (3.4), or each of the columns 1 through 5 must contain the
+ * character blank.
+ */
+ if (c == ' ' || c == '0')
+ type = LTYPE_INITIAL;
+
+ /* 3.2.3 Continuation_Line. A continuation line is any line that
+ * contains any character of the FORTRAN character set other than
+ * the character blank or the digit 0 in column 6 and contains
+ * only blank characters in columns 1 through 5.
+ */
+ else if (vStringLength (label) == 0)
+ type = LTYPE_CONTINUATION;
+ else
+ type = LTYPE_INVALID;
+ }
+ else if (c == ' ')
+ ;
+ else if (c == EOF)
+ type = LTYPE_EOF;
+ else if (c == '\n')
+ type = LTYPE_SHORT;
+ else if (isdigit (c))
+ vStringPut (label, c);
+ else
+ type = LTYPE_INVALID;
+
+ ++column;
+ } while (column < 6 && type == LTYPE_UNDETERMINED);
+
+ Assert (type != LTYPE_UNDETERMINED);
+
+ if (vStringLength (label) > 0)
+ {
+ vStringTerminate (label);
+ makeLabelTag (label);
+ }
+ vStringDelete (label);
+ return type;
+}
+
+static int getFixedFormChar (void)
+{
+ boolean newline = FALSE;
+ lineType type;
+ int c = '\0';
+
+ if (Column > 0)
+ {
+#ifdef STRICT_FIXED_FORM
+ /* EXCEPTION! Some compilers permit more than 72 characters per line.
+ */
+ if (Column > 71)
+ c = skipLine ();
+ else
+#endif
+ {
+ c = fileGetc ();
+ ++Column;
+ }
+ if (c == '\n')
+ {
+ newline = TRUE; /* need to check for continuation line */
+ Column = 0;
+ }
+ else if (c == '!' && ! ParsingString)
+ {
+ c = skipLine ();
+ newline = TRUE; /* need to check for continuation line */
+ Column = 0;
+ }
+ else if (c == '&') /* check for free source form */
+ {
+ const int c2 = fileGetc ();
+ if (c2 == '\n')
+ longjmp (Exception, (int) ExceptionFixedFormat);
+ else
+ fileUngetc (c2);
+ }
+ }
+ while (Column == 0)
+ {
+ type = getLineType ();
+ switch (type)
+ {
+ case LTYPE_UNDETERMINED:
+ case LTYPE_INVALID:
+ longjmp (Exception, (int) ExceptionFixedFormat);
+ break;
+
+ case LTYPE_SHORT: break;
+ case LTYPE_COMMENT: skipLine (); break;
+
+ case LTYPE_EOF:
+ Column = 6;
+ if (newline)
+ c = '\n';
+ else
+ c = EOF;
+ break;
+
+ case LTYPE_INITIAL:
+ if (newline)
+ {
+ c = '\n';
+ Column = 6;
+ break;
+ }
+ /* fall through to next case */
+ case LTYPE_CONTINUATION:
+ Column = 5;
+ do
+ {
+ c = fileGetc ();
+ ++Column;
+ } while (isBlank (c));
+ if (c == '\n')
+ Column = 0;
+ else if (Column > 6)
+ {
+ fileUngetc (c);
+ c = ' ';
+ }
+ break;
+
+ default:
+ Assert ("Unexpected line type" == NULL);
+ }
+ }
+ return c;
+}
+
+static int skipToNextLine (void)
+{
+ int c = skipLine ();
+ if (c != EOF)
+ c = fileGetc ();
+ return c;
+}
+
+static int getFreeFormChar (void)
+{
+ static boolean newline = TRUE;
+ boolean advanceLine = FALSE;
+ int c = fileGetc ();
+
+ /* If the last nonblank, non-comment character of a FORTRAN 90
+ * free-format text line is an ampersand then the next non-comment
+ * line is a continuation line.
+ */
+ if (c == '&')
+ {
+ do
+ c = fileGetc ();
+ while (isspace (c) && c != '\n');
+ if (c == '\n')
+ {
+ newline = TRUE;
+ advanceLine = TRUE;
+ }
+ else if (c == '!')
+ advanceLine = TRUE;
+ else
+ {
+ fileUngetc (c);
+ c = '&';
+ }
+ }
+ else if (newline && (c == '!' || c == '#'))
+ advanceLine = TRUE;
+ while (advanceLine)
+ {
+ while (isspace (c))
+ c = fileGetc ();
+ if (c == '!' || (newline && c == '#'))
+ {
+ c = skipToNextLine ();
+ newline = TRUE;
+ continue;
+ }
+ if (c == '&')
+ c = fileGetc ();
+ else
+ advanceLine = FALSE;
+ }
+ newline = (boolean) (c == '\n');
+ return c;
+}
+
+static int getChar (void)
+{
+ int c;
+
+ if (Ungetc != '\0')
+ {
+ c = Ungetc;
+ Ungetc = '\0';
+ }
+ else if (FreeSourceForm)
+ c = getFreeFormChar ();
+ else
+ c = getFixedFormChar ();
+ return c;
+}
+
+static void ungetChar (const int c)
+{
+ Ungetc = c;
+}
+
+/* If a numeric is passed in 'c', this is used as the first digit of the
+ * numeric being parsed.
+ */
+static vString *parseInteger (int c)
+{
+ vString *string = vStringNew ();
+
+ if (c == '-')
+ {
+ vStringPut (string, c);
+ c = getChar ();
+ }
+ else if (! isdigit (c))
+ c = getChar ();
+ while (c != EOF && isdigit (c))
+ {
+ vStringPut (string, c);
+ c = getChar ();
+ }
+ vStringTerminate (string);
+
+ if (c == '_')
+ {
+ do
+ c = getChar ();
+ while (c != EOF && isalpha (c));
+ }
+ ungetChar (c);
+
+ return string;
+}
+
+static vString *parseNumeric (int c)
+{
+ vString *string = vStringNew ();
+ vString *integer = parseInteger (c);
+ vStringCopy (string, integer);
+ vStringDelete (integer);
+
+ c = getChar ();
+ if (c == '.')
+ {
+ integer = parseInteger ('\0');
+ vStringPut (string, c);
+ vStringCat (string, integer);
+ vStringDelete (integer);
+ c = getChar ();
+ }
+ if (tolower (c) == 'e')
+ {
+ integer = parseInteger ('\0');
+ vStringPut (string, c);
+ vStringCat (string, integer);
+ vStringDelete (integer);
+ }
+ else
+ ungetChar (c);
+
+ vStringTerminate (string);
+
+ return string;
+}
+
+static void parseString (vString *const string, const int delimiter)
+{
+ const unsigned long inputLineNumber = getInputLineNumber ();
+ int c;
+ ParsingString = TRUE;
+ c = getChar ();
+ while (c != delimiter && c != '\n' && c != EOF)
+ {
+ vStringPut (string, c);
+ c = getChar ();
+ }
+ if (c == '\n' || c == EOF)
+ {
+ verbose ("%s: unterminated character string at line %lu\n",
+ getInputFileName (), inputLineNumber);
+ if (c == EOF)
+ longjmp (Exception, (int) ExceptionEOF);
+ else if (! FreeSourceForm)
+ longjmp (Exception, (int) ExceptionFixedFormat);
+ }
+ vStringTerminate (string);
+ ParsingString = FALSE;
+}
+
+/* Read a C identifier beginning with "firstChar" and places it into "name".
+ */
+static void parseIdentifier (vString *const string, const int firstChar)
+{
+ int c = firstChar;
+
+ do
+ {
+ vStringPut (string, c);
+ c = getChar ();
+ } while (isident (c));
+
+ vStringTerminate (string);
+ ungetChar (c); /* unget non-identifier character */
+}
+
+static void checkForLabel (void)
+{
+ tokenInfo* token = NULL;
+ int length;
+ int c;
+
+ do
+ c = getChar ();
+ while (isBlank (c));
+
+ for (length = 0 ; isdigit (c) && length < 5 ; ++length)
+ {
+ if (token == NULL)
+ {
+ token = newToken ();
+ token->type = TOKEN_LABEL;
+ }
+ vStringPut (token->string, c);
+ c = getChar ();
+ }
+ if (length > 0 && token != NULL)
+ {
+ vStringTerminate (token->string);
+ makeFortranTag (token, TAG_LABEL);
+ deleteToken (token);
+ }
+ ungetChar (c);
+}
+
+static void readIdentifier (tokenInfo *const token, const int c)
+{
+ parseIdentifier (token->string, c);
+ token->keyword = analyzeToken (token->string, Lang_fortran);
+ if (! isKeyword (token, KEYWORD_NONE))
+ token->type = TOKEN_KEYWORD;
+ else
+ {
+ token->type = TOKEN_IDENTIFIER;
+ if (strncmp (vStringValue (token->string), "end", 3) == 0)
+ {
+ vString *const sub = vStringNewInit (vStringValue (token->string) + 3);
+ const keywordId kw = analyzeToken (sub, Lang_fortran);
+ vStringDelete (sub);
+ if (kw != KEYWORD_NONE)
+ {
+ token->secondary = newToken ();
+ token->secondary->type = TOKEN_KEYWORD;
+ token->secondary->keyword = kw;
+ token->keyword = KEYWORD_end;
+ }
+ }
+ }
+}
+
+static void readToken (tokenInfo *const token)
+{
+ int c;
+
+ deleteToken (token->secondary);
+ token->type = TOKEN_UNDEFINED;
+ token->tag = TAG_UNDEFINED;
+ token->keyword = KEYWORD_NONE;
+ token->secondary = NULL;
+ vStringClear (token->string);
+
+getNextChar:
+ c = getChar ();
+
+ token->lineNumber = getSourceLineNumber ();
+ token->filePosition = getInputFilePosition ();
+
+ switch (c)
+ {
+ case EOF: longjmp (Exception, (int) ExceptionEOF); break;
+ case ' ': goto getNextChar;
+ case '\t': goto getNextChar;
+ case ',': token->type = TOKEN_COMMA; break;
+ case '(': token->type = TOKEN_PAREN_OPEN; break;
+ case ')': token->type = TOKEN_PAREN_CLOSE; break;
+ case '%': token->type = TOKEN_PERCENT; break;
+
+ case '*':
+ case '/':
+ case '+':
+ case '-':
+ case '=':
+ case '<':
+ case '>':
+ {
+ const char *const operatorChars = "*/+=<>";
+ do {
+ vStringPut (token->string, c);
+ c = getChar ();
+ } while (strchr (operatorChars, c) != NULL);
+ ungetChar (c);
+ vStringTerminate (token->string);
+ token->type = TOKEN_OPERATOR;
+ break;
+ }
+
+ case '!':
+ if (FreeSourceForm)
+ {
+ do
+ c = getChar ();
+ while (c != '\n' && c != EOF);
+ }
+ else
+ {
+ skipLine ();
+ Column = 0;
+ }
+ /* fall through to newline case */
+ case '\n':
+ token->type = TOKEN_STATEMENT_END;
+ if (FreeSourceForm)
+ checkForLabel ();
+ break;
+
+ case '.':
+ parseIdentifier (token->string, c);
+ c = getChar ();
+ if (c == '.')
+ {
+ vStringPut (token->string, c);
+ vStringTerminate (token->string);
+ token->type = TOKEN_OPERATOR;
+ }
+ else
+ {
+ ungetChar (c);
+ token->type = TOKEN_UNDEFINED;
+ }
+ break;
+
+ case '"':
+ case '\'':
+ parseString (token->string, c);
+ token->type = TOKEN_STRING;
+ break;
+
+ case ';':
+ token->type = TOKEN_STATEMENT_END;
+ break;
+
+ case ':':
+ c = getChar ();
+ if (c == ':')
+ token->type = TOKEN_DOUBLE_COLON;
+ else
+ {
+ ungetChar (c);
+ token->type = TOKEN_UNDEFINED;
+ }
+ break;
+
+ default:
+ if (isalpha (c))
+ readIdentifier (token, c);
+ else if (isdigit (c))
+ {
+ vString *numeric = parseNumeric (c);
+ vStringCat (token->string, numeric);
+ vStringDelete (numeric);
+ token->type = TOKEN_NUMERIC;
+ }
+ else
+ token->type = TOKEN_UNDEFINED;
+ break;
+ }
+}
+
+static void readSubToken (tokenInfo *const token)
+{
+ if (token->secondary == NULL)
+ {
+ token->secondary = newToken ();
+ readToken (token->secondary);
+ }
+}
+
+/*
+* Scanning functions
+*/
+
+static void skipToToken (tokenInfo *const token, tokenType type)
+{
+ while (! isType (token, type) && ! isType (token, TOKEN_STATEMENT_END) &&
+ !(token->secondary != NULL && isType (token->secondary, TOKEN_STATEMENT_END)))
+ readToken (token);
+}
+
+static void skipPast (tokenInfo *const token, tokenType type)
+{
+ skipToToken (token, type);
+ if (! isType (token, TOKEN_STATEMENT_END))
+ readToken (token);
+}
+
+static void skipToNextStatement (tokenInfo *const token)
+{
+ do
+ {
+ skipToToken (token, TOKEN_STATEMENT_END);
+ readToken (token);
+ } while (isType (token, TOKEN_STATEMENT_END));
+}
+
+/* skip over parenthesis enclosed contents starting at next token.
+ * Token is left at the first token following closing parenthesis. If an
+ * opening parenthesis is not found, `token' is moved to the end of the
+ * statement.
+ */
+static void skipOverParens (tokenInfo *const token)
+{
+ int level = 0;
+ do {
+ if (isType (token, TOKEN_STATEMENT_END))
+ break;
+ else if (isType (token, TOKEN_PAREN_OPEN))
+ ++level;
+ else if (isType (token, TOKEN_PAREN_CLOSE))
+ --level;
+ readToken (token);
+ } while (level > 0);
+}
+
+static boolean isTypeSpec (tokenInfo *const token)
+{
+ boolean result;
+ switch (token->keyword)
+ {
+ case KEYWORD_byte:
+ case KEYWORD_integer:
+ case KEYWORD_real:
+ case KEYWORD_double:
+ case KEYWORD_complex:
+ case KEYWORD_character:
+ case KEYWORD_logical:
+ case KEYWORD_record:
+ case KEYWORD_type:
+ result = TRUE;
+ break;
+ default:
+ result = FALSE;
+ break;
+ }
+ return result;
+}
+
+static boolean isSubprogramPrefix (tokenInfo *const token)
+{
+ boolean result;
+ switch (token->keyword)
+ {
+ case KEYWORD_elemental:
+ case KEYWORD_pure:
+ case KEYWORD_recursive:
+ case KEYWORD_stdcall:
+ result = TRUE;
+ break;
+ default:
+ result = FALSE;
+ break;
+ }
+ return result;
+}
+
+/* type-spec
+ * is INTEGER [kind-selector]
+ * or REAL [kind-selector] is ( etc. )
+ * or DOUBLE PRECISION
+ * or COMPLEX [kind-selector]
+ * or CHARACTER [kind-selector]
+ * or LOGICAL [kind-selector]
+ * or TYPE ( type-name )
+ *
+ * Note that INTEGER and REAL may be followed by "*N" where "N" is an integer
+ */
+static void parseTypeSpec (tokenInfo *const token)
+{
+ /* parse type-spec, leaving `token' at first token following type-spec */
+ Assert (isTypeSpec (token));
+ switch (token->keyword)
+ {
+ case KEYWORD_character:
+ /* skip char-selector */
+ readToken (token);
+ if (isType (token, TOKEN_OPERATOR) &&
+ strcmp (vStringValue (token->string), "*") == 0)
+ readToken (token);
+ if (isType (token, TOKEN_PAREN_OPEN))
+ skipOverParens (token);
+ else if (isType (token, TOKEN_NUMERIC))
+ readToken (token);
+ break;
+
+
+ case KEYWORD_byte:
+ case KEYWORD_complex:
+ case KEYWORD_integer:
+ case KEYWORD_logical:
+ case KEYWORD_real:
+ readToken (token);
+ if (isType (token, TOKEN_PAREN_OPEN))
+ skipOverParens (token); /* skip kind-selector */
+ if (isType (token, TOKEN_OPERATOR) &&
+ strcmp (vStringValue (token->string), "*") == 0)
+ {
+ readToken (token);
+ readToken (token);
+ }
+ break;
+
+ case KEYWORD_double:
+ readToken (token);
+ if (isKeyword (token, KEYWORD_complex) ||
+ isKeyword (token, KEYWORD_precision))
+ readToken (token);
+ else
+ skipToToken (token, TOKEN_STATEMENT_END);
+ break;
+
+ case KEYWORD_record:
+ readToken (token);
+ if (isType (token, TOKEN_OPERATOR) &&
+ strcmp (vStringValue (token->string), "/") == 0)
+ {
+ readToken (token); /* skip to structure name */
+ readToken (token); /* skip to '/' */
+ readToken (token); /* skip to variable name */
+ }
+ break;
+
+ case KEYWORD_type:
+ readToken (token);
+ if (isType (token, TOKEN_PAREN_OPEN))
+ skipOverParens (token); /* skip type-name */
+ else
+ parseDerivedTypeDef (token);
+ break;
+
+ default:
+ skipToToken (token, TOKEN_STATEMENT_END);
+ break;
+ }
+}
+
+static boolean skipStatementIfKeyword (tokenInfo *const token, keywordId keyword)
+{
+ boolean result = FALSE;
+ if (isKeyword (token, keyword))
+ {
+ result = TRUE;
+ skipToNextStatement (token);
+ }
+ return result;
+}
+
+/* parse a list of qualifying specifiers, leaving `token' at first token
+ * following list. Examples of such specifiers are:
+ * [[, attr-spec] ::]
+ * [[, component-attr-spec-list] ::]
+ *
+ * attr-spec
+ * is PARAMETER
+ * or access-spec (is PUBLIC or PRIVATE)
+ * or ALLOCATABLE
+ * or DIMENSION ( array-spec )
+ * or EXTERNAL
+ * or INTENT ( intent-spec )
+ * or INTRINSIC
+ * or OPTIONAL
+ * or POINTER
+ * or SAVE
+ * or TARGET
+ *
+ * component-attr-spec
+ * is POINTER
+ * or DIMENSION ( component-array-spec )
+ */
+static void parseQualifierSpecList (tokenInfo *const token)
+{
+ do
+ {
+ readToken (token); /* should be an attr-spec */
+ switch (token->keyword)
+ {
+ case KEYWORD_parameter:
+ case KEYWORD_allocatable:
+ case KEYWORD_external:
+ case KEYWORD_intrinsic:
+ case KEYWORD_optional:
+ case KEYWORD_private:
+ case KEYWORD_pointer:
+ case KEYWORD_public:
+ case KEYWORD_save:
+ case KEYWORD_target:
+ readToken (token);
+ break;
+
+ case KEYWORD_dimension:
+ case KEYWORD_intent:
+ readToken (token);
+ skipOverParens (token);
+ break;
+
+ default: skipToToken (token, TOKEN_STATEMENT_END); break;
+ }
+ } while (isType (token, TOKEN_COMMA));
+ if (! isType (token, TOKEN_DOUBLE_COLON))
+ skipToToken (token, TOKEN_STATEMENT_END);
+}
+
+static tagType variableTagType (void)
+{
+ tagType result = TAG_VARIABLE;
+ if (ancestorCount () > 0)
+ {
+ const tokenInfo* const parent = ancestorTop ();
+ switch (parent->tag)
+ {
+ case TAG_MODULE: result = TAG_VARIABLE; break;
+ case TAG_DERIVED_TYPE: result = TAG_COMPONENT; break;
+ case TAG_FUNCTION: result = TAG_LOCAL; break;
+ case TAG_SUBROUTINE: result = TAG_LOCAL; break;
+ default: result = TAG_VARIABLE; break;
+ }
+ }
+ return result;
+}
+
+static void parseEntityDecl (tokenInfo *const token)
+{
+ Assert (isType (token, TOKEN_IDENTIFIER));
+ makeFortranTag (token, variableTagType ());
+ readToken (token);
+ if (isType (token, TOKEN_PAREN_OPEN))
+ skipOverParens (token);
+ if (isType (token, TOKEN_OPERATOR) &&
+ strcmp (vStringValue (token->string), "*") == 0)
+ {
+ readToken (token); /* read char-length */
+ if (isType (token, TOKEN_PAREN_OPEN))
+ skipOverParens (token);
+ else
+ readToken (token);
+ }
+ if (isType (token, TOKEN_OPERATOR))
+ {
+ if (strcmp (vStringValue (token->string), "/") == 0)
+ { /* skip over initializations of structure field */
+ readToken (token);
+ skipPast (token, TOKEN_OPERATOR);
+ }
+ else if (strcmp (vStringValue (token->string), "=") == 0)
+ {
+ while (! isType (token, TOKEN_COMMA) &&
+ ! isType (token, TOKEN_STATEMENT_END))
+ {
+ readToken (token);
+ if (isType (token, TOKEN_PAREN_OPEN))
+ skipOverParens (token);
+ }
+ }
+ }
+ /* token left at either comma or statement end */
+}
+
+static void parseEntityDeclList (tokenInfo *const token)
+{
+ if (isType (token, TOKEN_PERCENT))
+ skipToNextStatement (token);
+ else while (isType (token, TOKEN_IDENTIFIER) ||
+ (isType (token, TOKEN_KEYWORD) &&
+ !isKeyword (token, KEYWORD_function) &&
+ !isKeyword (token, KEYWORD_subroutine)))
+ {
+ /* compilers accept keywoeds as identifiers */
+ if (isType (token, TOKEN_KEYWORD))
+ token->type = TOKEN_IDENTIFIER;
+ parseEntityDecl (token);
+ if (isType (token, TOKEN_COMMA))
+ readToken (token);
+ else if (isType (token, TOKEN_STATEMENT_END))
+ {
+ skipToNextStatement (token);
+ break;
+ }
+ }
+}
+
+/* type-declaration-stmt is
+ * type-spec [[, attr-spec] ... ::] entity-decl-list
+ */
+static void parseTypeDeclarationStmt (tokenInfo *const token)
+{
+ Assert (isTypeSpec (token));
+ parseTypeSpec (token);
+ if (!isType (token, TOKEN_STATEMENT_END)) /* if not end of derived type... */
+ {
+ if (isType (token, TOKEN_COMMA))
+ parseQualifierSpecList (token);
+ if (isType (token, TOKEN_DOUBLE_COLON))
+ readToken (token);
+ parseEntityDeclList (token);
+ }
+ if (isType (token, TOKEN_STATEMENT_END))
+ skipToNextStatement (token);
+}
+
+/* namelist-stmt is
+ * NAMELIST /namelist-group-name/ namelist-group-object-list
+ * [[,]/[namelist-group-name]/ namelist-block-object-list] ...
+ *
+ * namelist-group-object is
+ * variable-name
+ *
+ * common-stmt is
+ * COMMON [/[common-block-name]/] common-block-object-list
+ * [[,]/[common-block-name]/ common-block-object-list] ...
+ *
+ * common-block-object is
+ * variable-name [ ( explicit-shape-spec-list ) ]
+ */
+static void parseCommonNamelistStmt (tokenInfo *const token, tagType type)
+{
+ Assert (isKeyword (token, KEYWORD_common) ||
+ isKeyword (token, KEYWORD_namelist));
+ readToken (token);
+ do
+ {
+ if (isType (token, TOKEN_OPERATOR) &&
+ strcmp (vStringValue (token->string), "/") == 0)
+ {
+ readToken (token);
+ if (isType (token, TOKEN_IDENTIFIER))
+ {
+ makeFortranTag (token, type);
+ readToken (token);
+ }
+ skipPast (token, TOKEN_OPERATOR);
+ }
+ if (isType (token, TOKEN_IDENTIFIER))
+ makeFortranTag (token, TAG_LOCAL);
+ readToken (token);
+ if (isType (token, TOKEN_PAREN_OPEN))
+ skipOverParens (token); /* skip explicit-shape-spec-list */
+ if (isType (token, TOKEN_COMMA))
+ readToken (token);
+ } while (! isType (token, TOKEN_STATEMENT_END));
+ skipToNextStatement (token);
+}
+
+static void parseFieldDefinition (tokenInfo *const token)
+{
+ if (isTypeSpec (token))
+ parseTypeDeclarationStmt (token);
+ else if (isKeyword (token, KEYWORD_structure))
+ parseStructureStmt (token);
+ else if (isKeyword (token, KEYWORD_union))
+ parseUnionStmt (token);
+ else
+ skipToNextStatement (token);
+}
+
+static void parseMap (tokenInfo *const token)
+{
+ Assert (isKeyword (token, KEYWORD_map));
+ skipToNextStatement (token);
+ while (! isKeyword (token, KEYWORD_end))
+ parseFieldDefinition (token);
+ readSubToken (token);
+ /* should be at KEYWORD_map token */
+ skipToNextStatement (token);
+}
+
+/* UNION
+ * MAP
+ * [field-definition] [field-definition] ...
+ * END MAP
+ * MAP
+ * [field-definition] [field-definition] ...
+ * END MAP
+ * [MAP
+ * [field-definition]
+ * [field-definition] ...
+ * END MAP] ...
+ * END UNION
+ * *
+ *
+ * Typed data declarations (variables or arrays) in structure declarations
+ * have the form of normal Fortran typed data declarations. Data items with
+ * different types can be freely intermixed within a structure declaration.
+ *
+ * Unnamed fields can be declared in a structure by specifying the pseudo
+ * name %FILL in place of an actual field name. You can use this mechanism to
+ * generate empty space in a record for purposes such as alignment.
+ *
+ * All mapped field declarations that are made within a UNION declaration
+ * share a common location within the containing structure. When initializing
+ * the fields within a UNION, the final initialization value assigned
+ * overlays any value previously assigned to a field definition that shares
+ * that field.
+ */
+static void parseUnionStmt (tokenInfo *const token)
+{
+ Assert (isKeyword (token, KEYWORD_union));
+ skipToNextStatement (token);
+ while (isKeyword (token, KEYWORD_map))
+ parseMap (token);
+ /* should be at KEYWORD_end token */
+ readSubToken (token);
+ /* secondary token should be KEYWORD_end token */
+ skipToNextStatement (token);
+}
+
+/* STRUCTURE [/structure-name/] [field-names]
+ * [field-definition]
+ * [field-definition] ...
+ * END STRUCTURE
+ *
+ * structure-name
+ * identifies the structure in a subsequent RECORD statement.
+ * Substructures can be established within a structure by means of either
+ * a nested STRUCTURE declaration or a RECORD statement.
+ *
+ * field-names
+ * (for substructure declarations only) one or more names having the
+ * structure of the substructure being defined.
+ *
+ * field-definition
+ * can be one or more of the following:
+ *
+ * Typed data declarations, which can optionally include one or more
+ * data initialization values.
+ *
+ * Substructure declarations (defined by either RECORD statements or
+ * subsequent STRUCTURE statements).
+ *
+ * UNION declarations, which are mapped fields defined by a block of
+ * statements. The syntax of a UNION declaration is described below.
+ *
+ * PARAMETER statements, which do not affect the form of the
+ * structure.
+ */
+static void parseStructureStmt (tokenInfo *const token)
+{
+ tokenInfo *name;
+ Assert (isKeyword (token, KEYWORD_structure));
+ readToken (token);
+ if (isType (token, TOKEN_OPERATOR) &&
+ strcmp (vStringValue (token->string), "/") == 0)
+ { /* read structure name */
+ readToken (token);
+ if (isType (token, TOKEN_IDENTIFIER))
+ makeFortranTag (token, TAG_DERIVED_TYPE);
+ name = newTokenFrom (token);
+ skipPast (token, TOKEN_OPERATOR);
+ }
+ else
+ { /* fake out anonymous structure */
+ name = newToken ();
+ name->type = TOKEN_IDENTIFIER;
+ name->tag = TAG_DERIVED_TYPE;
+ vStringCopyS (name->string, "anonymous");
+ }
+ while (isType (token, TOKEN_IDENTIFIER))
+ { /* read field names */
+ makeFortranTag (token, TAG_COMPONENT);
+ readToken (token);
+ if (isType (token, TOKEN_COMMA))
+ readToken (token);
+ }
+ skipToNextStatement (token);
+ ancestorPush (name);
+ while (! isKeyword (token, KEYWORD_end))
+ parseFieldDefinition (token);
+ readSubToken (token);
+ /* secondary token should be KEYWORD_structure token */
+ skipToNextStatement (token);
+ ancestorPop ();
+ deleteToken (name);
+}
+
+/* specification-stmt
+ * is access-stmt (is access-spec [[::] access-id-list)
+ * or allocatable-stmt (is ALLOCATABLE [::] array-name etc.)
+ * or common-stmt (is COMMON [ / [common-block-name] /] etc.)
+ * or data-stmt (is DATA data-stmt-list [[,] data-stmt-set] ...)
+ * or dimension-stmt (is DIMENSION [::] array-name etc.)
+ * or equivalence-stmt (is EQUIVALENCE equivalence-set-list)
+ * or external-stmt (is EXTERNAL etc.)
+ * or intent-stmt (is INTENT ( intent-spec ) [::] etc.)
+ * or instrinsic-stmt (is INTRINSIC etc.)
+ * or namelist-stmt (is NAMELIST / namelist-group-name / etc.)
+ * or optional-stmt (is OPTIONAL [::] etc.)
+ * or pointer-stmt (is POINTER [::] object-name etc.)
+ * or save-stmt (is SAVE etc.)
+ * or target-stmt (is TARGET [::] object-name etc.)
+ *
+ * access-spec is PUBLIC or PRIVATE
+ */
+static boolean parseSpecificationStmt (tokenInfo *const token)
+{
+ boolean result = TRUE;
+ switch (token->keyword)
+ {
+ case KEYWORD_common:
+ parseCommonNamelistStmt (token, TAG_COMMON_BLOCK);
+ break;
+
+ case KEYWORD_namelist:
+ parseCommonNamelistStmt (token, TAG_NAMELIST);
+ break;
+
+ case KEYWORD_structure:
+ parseStructureStmt (token);
+ break;
+
+ case KEYWORD_allocatable:
+ case KEYWORD_data:
+ case KEYWORD_dimension:
+ case KEYWORD_equivalence:
+ case KEYWORD_external:
+ case KEYWORD_intent:
+ case KEYWORD_intrinsic:
+ case KEYWORD_optional:
+ case KEYWORD_pointer:
+ case KEYWORD_private:
+ case KEYWORD_public:
+ case KEYWORD_save:
+ case KEYWORD_target:
+ skipToNextStatement (token);
+ break;
+
+ default:
+ result = FALSE;
+ break;
+ }
+ return result;
+}
+
+/* component-def-stmt is
+ * type-spec [[, component-attr-spec-list] ::] component-decl-list
+ *
+ * component-decl is
+ * component-name [ ( component-array-spec ) ] [ * char-length ]
+ */
+static void parseComponentDefStmt (tokenInfo *const token)
+{
+ Assert (isTypeSpec (token));
+ parseTypeSpec (token);
+ if (isType (token, TOKEN_COMMA))
+ parseQualifierSpecList (token);
+ if (isType (token, TOKEN_DOUBLE_COLON))
+ readToken (token);
+ parseEntityDeclList (token);
+}
+
+/* derived-type-def is
+ * derived-type-stmt is (TYPE [[, access-spec] ::] type-name
+ * [private-sequence-stmt] ... (is PRIVATE or SEQUENCE)
+ * component-def-stmt
+ * [component-def-stmt] ...
+ * end-type-stmt
+ */
+static void parseDerivedTypeDef (tokenInfo *const token)
+{
+ if (isType (token, TOKEN_COMMA))
+ parseQualifierSpecList (token);
+ if (isType (token, TOKEN_DOUBLE_COLON))
+ readToken (token);
+ if (isType (token, TOKEN_IDENTIFIER))
+ makeFortranTag (token, TAG_DERIVED_TYPE);
+ ancestorPush (token);
+ skipToNextStatement (token);
+ if (isKeyword (token, KEYWORD_private) ||
+ isKeyword (token, KEYWORD_sequence))
+ {
+ skipToNextStatement (token);
+ }
+ while (! isKeyword (token, KEYWORD_end))
+ {
+ if (isTypeSpec (token))
+ parseComponentDefStmt (token);
+ else
+ skipToNextStatement (token);
+ }
+ readSubToken (token);
+ /* secondary token should be KEYWORD_type token */
+ skipToToken (token, TOKEN_STATEMENT_END);
+ ancestorPop ();
+}
+
+/* interface-block
+ * interface-stmt (is INTERFACE [generic-spec])
+ * [interface-body]
+ * [module-procedure-stmt] ...
+ * end-interface-stmt (is END INTERFACE)
+ *
+ * generic-spec
+ * is generic-name
+ * or OPERATOR ( defined-operator )
+ * or ASSIGNMENT ( = )
+ *
+ * interface-body
+ * is function-stmt
+ * [specification-part]
+ * end-function-stmt
+ * or subroutine-stmt
+ * [specification-part]
+ * end-subroutine-stmt
+ *
+ * module-procedure-stmt is
+ * MODULE PROCEDURE procedure-name-list
+ */
+static void parseInterfaceBlock (tokenInfo *const token)
+{
+ tokenInfo *name = NULL;
+ Assert (isKeyword (token, KEYWORD_interface));
+ readToken (token);
+ if (isType (token, TOKEN_IDENTIFIER))
+ {
+ makeFortranTag (token, TAG_INTERFACE);
+ name = newTokenFrom (token);
+ }
+ else if (isKeyword (token, KEYWORD_assignment) ||
+ isKeyword (token, KEYWORD_operator))
+ {
+ readToken (token);
+ if (isType (token, TOKEN_PAREN_OPEN))
+ readToken (token);
+ if (isType (token, TOKEN_OPERATOR))
+ {
+ makeFortranTag (token, TAG_INTERFACE);
+ name = newTokenFrom (token);
+ }
+ }
+ if (name == NULL)
+ {
+ name = newToken ();
+ name->type = TOKEN_IDENTIFIER;
+ name->tag = TAG_INTERFACE;
+ }
+ ancestorPush (name);
+ while (! isKeyword (token, KEYWORD_end))
+ {
+ switch (token->keyword)
+ {
+ case KEYWORD_function: parseFunctionSubprogram (token); break;
+ case KEYWORD_subroutine: parseSubroutineSubprogram (token); break;
+
+ default:
+ if (isSubprogramPrefix (token))
+ readToken (token);
+ else if (isTypeSpec (token))
+ parseTypeSpec (token);
+ else
+ skipToNextStatement (token);
+ break;
+ }
+ }
+ readSubToken (token);
+ /* secondary token should be KEYWORD_interface token */
+ skipToNextStatement (token);
+ ancestorPop ();
+ deleteToken (name);
+}
+
+/* entry-stmt is
+ * ENTRY entry-name [ ( dummy-arg-list ) ]
+ */
+static void parseEntryStmt (tokenInfo *const token)
+{
+ Assert (isKeyword (token, KEYWORD_entry));
+ readToken (token);
+ if (isType (token, TOKEN_IDENTIFIER))
+ makeFortranTag (token, TAG_ENTRY_POINT);
+ skipToNextStatement (token);
+}
+
+/* stmt-function-stmt is
+ * function-name ([dummy-arg-name-list]) = scalar-expr
+ */
+static boolean parseStmtFunctionStmt (tokenInfo *const token)
+{
+ boolean result = FALSE;
+ Assert (isType (token, TOKEN_IDENTIFIER));
+#if 0 /* cannot reliably parse this yet */
+ makeFortranTag (token, TAG_FUNCTION);
+#endif
+ readToken (token);
+ if (isType (token, TOKEN_PAREN_OPEN))
+ {
+ skipOverParens (token);
+ result = (boolean) (isType (token, TOKEN_OPERATOR) &&
+ strcmp (vStringValue (token->string), "=") == 0);
+ }
+ skipToNextStatement (token);
+ return result;
+}
+
+static boolean isIgnoredDeclaration (tokenInfo *const token)
+{
+ boolean result;
+ switch (token->keyword)
+ {
+ case KEYWORD_cexternal:
+ case KEYWORD_cglobal:
+ case KEYWORD_dllexport:
+ case KEYWORD_dllimport:
+ case KEYWORD_external:
+ case KEYWORD_format:
+ case KEYWORD_include:
+ case KEYWORD_inline:
+ case KEYWORD_parameter:
+ case KEYWORD_pascal:
+ case KEYWORD_pexternal:
+ case KEYWORD_pglobal:
+ case KEYWORD_static:
+ case KEYWORD_value:
+ case KEYWORD_virtual:
+ case KEYWORD_volatile:
+ result = TRUE;
+ break;
+
+ default:
+ result = FALSE;
+ break;
+ }
+ return result;
+}
+
+/* declaration-construct
+ * [derived-type-def]
+ * [interface-block]
+ * [type-declaration-stmt]
+ * [specification-stmt]
+ * [parameter-stmt] (is PARAMETER ( named-constant-def-list )
+ * [format-stmt] (is FORMAT format-specification)
+ * [entry-stmt]
+ * [stmt-function-stmt]
+ */
+static boolean parseDeclarationConstruct (tokenInfo *const token)
+{
+ boolean result = TRUE;
+ switch (token->keyword)
+ {
+ case KEYWORD_entry: parseEntryStmt (token); break;
+ case KEYWORD_interface: parseInterfaceBlock (token); break;
+ case KEYWORD_stdcall: readToken (token); break;
+ /* derived type handled by parseTypeDeclarationStmt(); */
+
+ case KEYWORD_automatic:
+ readToken (token);
+ if (isTypeSpec (token))
+ parseTypeDeclarationStmt (token);
+ else
+ skipToNextStatement (token);
+ result = TRUE;
+ break;
+
+ default:
+ if (isIgnoredDeclaration (token))
+ skipToNextStatement (token);
+ else if (isTypeSpec (token))
+ {
+ parseTypeDeclarationStmt (token);
+ result = TRUE;
+ }
+ else if (isType (token, TOKEN_IDENTIFIER))
+ result = parseStmtFunctionStmt (token);
+ else
+ result = parseSpecificationStmt (token);
+ break;
+ }
+ return result;
+}
+
+/* implicit-part-stmt
+ * is [implicit-stmt] (is IMPLICIT etc.)
+ * or [parameter-stmt] (is PARAMETER etc.)
+ * or [format-stmt] (is FORMAT etc.)
+ * or [entry-stmt] (is ENTRY entry-name etc.)
+ */
+static boolean parseImplicitPartStmt (tokenInfo *const token)
+{
+ boolean result = TRUE;
+ switch (token->keyword)
+ {
+ case KEYWORD_entry: parseEntryStmt (token); break;
+
+ case KEYWORD_implicit:
+ case KEYWORD_include:
+ case KEYWORD_parameter:
+ case KEYWORD_format:
+ skipToNextStatement (token);
+ break;
+
+ default: result = FALSE; break;
+ }
+ return result;
+}
+
+/* specification-part is
+ * [use-stmt] ... (is USE module-name etc.)
+ * [implicit-part] (is [implicit-part-stmt] ... [implicit-stmt])
+ * [declaration-construct] ...
+ */
+static boolean parseSpecificationPart (tokenInfo *const token)
+{
+ boolean result = FALSE;
+ while (skipStatementIfKeyword (token, KEYWORD_use))
+ result = TRUE;
+ while (parseImplicitPartStmt (token))
+ result = TRUE;
+ while (parseDeclarationConstruct (token))
+ result = TRUE;
+ return result;
+}
+
+/* block-data is
+ * block-data-stmt (is BLOCK DATA [block-data-name]
+ * [specification-part]
+ * end-block-data-stmt (is END [BLOCK DATA [block-data-name]])
+ */
+static void parseBlockData (tokenInfo *const token)
+{
+ Assert (isKeyword (token, KEYWORD_block));
+ readToken (token);
+ if (isKeyword (token, KEYWORD_data))
+ {
+ readToken (token);
+ if (isType (token, TOKEN_IDENTIFIER))
+ makeFortranTag (token, TAG_BLOCK_DATA);
+ }
+ ancestorPush (token);
+ skipToNextStatement (token);
+ parseSpecificationPart (token);
+ while (! isKeyword (token, KEYWORD_end))
+ skipToNextStatement (token);
+ readSubToken (token);
+ /* secondary token should be KEYWORD_NONE or KEYWORD_block token */
+ skipToNextStatement (token);
+ ancestorPop ();
+}
+
+/* internal-subprogram-part is
+ * contains-stmt (is CONTAINS)
+ * internal-subprogram
+ * [internal-subprogram] ...
+ *
+ * internal-subprogram
+ * is function-subprogram
+ * or subroutine-subprogram
+ */
+static void parseInternalSubprogramPart (tokenInfo *const token)
+{
+ boolean done = FALSE;
+ if (isKeyword (token, KEYWORD_contains))
+ skipToNextStatement (token);
+ do
+ {
+ switch (token->keyword)
+ {
+ case KEYWORD_function: parseFunctionSubprogram (token); break;
+ case KEYWORD_subroutine: parseSubroutineSubprogram (token); break;
+ case KEYWORD_end: done = TRUE; break;
+
+ default:
+ if (isSubprogramPrefix (token))
+ readToken (token);
+ else if (isTypeSpec (token))
+ parseTypeSpec (token);
+ else
+ readToken (token);
+ break;
+ }
+ } while (! done);
+}
+
+/* module is
+ * module-stmt (is MODULE module-name)
+ * [specification-part]
+ * [module-subprogram-part]
+ * end-module-stmt (is END [MODULE [module-name]])
+ *
+ * module-subprogram-part
+ * contains-stmt (is CONTAINS)
+ * module-subprogram
+ * [module-subprogram] ...
+ *
+ * module-subprogram
+ * is function-subprogram
+ * or subroutine-subprogram
+ */
+static void parseModule (tokenInfo *const token)
+{
+ Assert (isKeyword (token, KEYWORD_module));
+ readToken (token);
+ if (isType (token, TOKEN_IDENTIFIER))
+ makeFortranTag (token, TAG_MODULE);
+ ancestorPush (token);
+ skipToNextStatement (token);
+ parseSpecificationPart (token);
+ if (isKeyword (token, KEYWORD_contains))
+ parseInternalSubprogramPart (token);
+ while (! isKeyword (token, KEYWORD_end))
+ skipToNextStatement (token);
+ readSubToken (token);
+ /* secondary token should be KEYWORD_NONE or KEYWORD_module token */
+ skipToNextStatement (token);
+ ancestorPop ();
+}
+
+/* execution-part
+ * executable-construct
+ *
+ * executable-contstruct is
+ * execution-part-construct [execution-part-construct]
+ *
+ * execution-part-construct
+ * is executable-construct
+ * or format-stmt
+ * or data-stmt
+ * or entry-stmt
+ */
+static boolean parseExecutionPart (tokenInfo *const token)
+{
+ boolean result = FALSE;
+ boolean done = FALSE;
+ while (! done)
+ {
+ switch (token->keyword)
+ {
+ default:
+ if (isSubprogramPrefix (token))
+ readToken (token);
+ else
+ skipToNextStatement (token);
+ result = TRUE;
+ break;
+
+ case KEYWORD_entry:
+ parseEntryStmt (token);
+ result = TRUE;
+ break;
+
+ case KEYWORD_contains:
+ case KEYWORD_function:
+ case KEYWORD_subroutine:
+ done = TRUE;
+ break;
+
+ case KEYWORD_end:
+ readSubToken (token);
+ if (isSecondaryKeyword (token, KEYWORD_do) ||
+ isSecondaryKeyword (token, KEYWORD_if) ||
+ isSecondaryKeyword (token, KEYWORD_select) ||
+ isSecondaryKeyword (token, KEYWORD_where))
+ {
+ skipToNextStatement (token);
+ result = TRUE;
+ }
+ else
+ done = TRUE;
+ break;
+ }
+ }
+ return result;
+}
+
+static void parseSubprogram (tokenInfo *const token, const tagType tag)
+{
+ Assert (isKeyword (token, KEYWORD_program) ||
+ isKeyword (token, KEYWORD_function) ||
+ isKeyword (token, KEYWORD_subroutine));
+ readToken (token);
+ if (isType (token, TOKEN_IDENTIFIER))
+ makeFortranTag (token, tag);
+ ancestorPush (token);
+ skipToNextStatement (token);
+ parseSpecificationPart (token);
+ parseExecutionPart (token);
+ if (isKeyword (token, KEYWORD_contains))
+ parseInternalSubprogramPart (token);
+ /* should be at KEYWORD_end token */
+ readSubToken (token);
+ /* secondary token should be one of KEYWORD_NONE, KEYWORD_program,
+ * KEYWORD_function, KEYWORD_function
+ */
+ skipToNextStatement (token);
+ ancestorPop ();
+}
+
+
+/* function-subprogram is
+ * function-stmt (is [prefix] FUNCTION function-name etc.)
+ * [specification-part]
+ * [execution-part]
+ * [internal-subprogram-part]
+ * end-function-stmt (is END [FUNCTION [function-name]])
+ *
+ * prefix
+ * is type-spec [RECURSIVE]
+ * or [RECURSIVE] type-spec
+ */
+static void parseFunctionSubprogram (tokenInfo *const token)
+{
+ parseSubprogram (token, TAG_FUNCTION);
+}
+
+/* subroutine-subprogram is
+ * subroutine-stmt (is [RECURSIVE] SUBROUTINE subroutine-name etc.)
+ * [specification-part]
+ * [execution-part]
+ * [internal-subprogram-part]
+ * end-subroutine-stmt (is END [SUBROUTINE [function-name]])
+ */
+static void parseSubroutineSubprogram (tokenInfo *const token)
+{
+ parseSubprogram (token, TAG_SUBROUTINE);
+}
+
+/* main-program is
+ * [program-stmt] (is PROGRAM program-name)
+ * [specification-part]
+ * [execution-part]
+ * [internal-subprogram-part ]
+ * end-program-stmt
+ */
+static void parseMainProgram (tokenInfo *const token)
+{
+ parseSubprogram (token, TAG_PROGRAM);
+}
+
+/* program-unit
+ * is main-program
+ * or external-subprogram (is function-subprogram or subroutine-subprogram)
+ * or module
+ * or block-data
+ */
+static void parseProgramUnit (tokenInfo *const token)
+{
+ readToken (token);
+ do
+ {
+ if (isType (token, TOKEN_STATEMENT_END))
+ readToken (token);
+ else switch (token->keyword)
+ {
+ case KEYWORD_block: parseBlockData (token); break;
+ case KEYWORD_end: skipToNextStatement (token); break;
+ case KEYWORD_function: parseFunctionSubprogram (token); break;
+ case KEYWORD_module: parseModule (token); break;
+ case KEYWORD_program: parseMainProgram (token); break;
+ case KEYWORD_subroutine: parseSubroutineSubprogram (token); break;
+
+ default:
+ if (isSubprogramPrefix (token))
+ readToken (token);
+ else
+ {
+ boolean one = parseSpecificationPart (token);
+ boolean two = parseExecutionPart (token);
+ if (! (one || two))
+ readToken (token);
+ }
+ break;
+ }
+ } while (TRUE);
+}
+
+static boolean findFortranTags (const unsigned int passCount)
+{
+ tokenInfo *token;
+ exception_t exception;
+ boolean retry;
+
+ Assert (passCount < 3);
+ Parent = newToken ();
+ token = newToken ();
+ FreeSourceForm = (boolean) (passCount > 1);
+ Column = 0;
+ exception = (exception_t) setjmp (Exception);
+ if (exception == ExceptionEOF)
+ retry = FALSE;
+ else if (exception == ExceptionFixedFormat && ! FreeSourceForm)
+ {
+ verbose ("%s: not fixed source form; retry as free source form\n",
+ getInputFileName ());
+ retry = TRUE;
+ }
+ else
+ {
+ parseProgramUnit (token);
+ retry = FALSE;
+ }
+ ancestorClear ();
+ deleteToken (token);
+ deleteToken (Parent);
+
+ return retry;
+}
+
+static void initialize (const langType language)
+{
+ Lang_fortran = language;
+ buildFortranKeywordHash ();
+}
+
+extern parserDefinition* FortranParser (void)
+{
+ static const char *const extensions [] = {
+ "f", "for", "ftn", "f77", "f90", "f95",
+#ifndef CASE_INSENSITIVE_FILENAMES
+ "F", "FOR", "FTN", "F77", "F90", "F95",
+#endif
+ NULL
+ };
+ parserDefinition* def = parserNew ("Fortran");
+ def->kinds = FortranKinds;
+ def->kindCount = KIND_COUNT (FortranKinds);
+ def->extensions = extensions;
+ def->parser2 = findFortranTags;
+ def->initialize = initialize;
+ return def;
+}
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/general.h b/general.h
new file mode 100644
index 0000000..2d1d629
--- /dev/null
+++ b/general.h
@@ -0,0 +1,127 @@
+/*
+* $Id: general.h 508 2007-05-03 03:20:59Z dhiebert $
+*
+* Copyright (c) 1998-2003, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* Provides the general (non-ctags-specific) environment assumed by all.
+*/
+#ifndef _GENERAL_H
+#define _GENERAL_H
+
+/*
+* INCLUDE FILES
+*/
+#if defined (HAVE_CONFIG_H)
+# include <config.h>
+#elif defined (AMIGA)
+# include "e_amiga.h"
+#elif defined (DJGPP)
+# include "e_djgpp.h"
+#elif defined (macintosh)
+# include "e_mac.h"
+#elif defined (MSDOS) || defined (WIN32)
+# include "e_msoft.h"
+#elif defined (OS2)
+# include "e_os2.h"
+#elif defined (QDOS)
+# include "e_qdos.h"
+#elif defined (RISCOS)
+# include "e_riscos.h"
+#elif defined (__vms) || defined (VMS)
+# include "e_vms.h"
+# ifndef VMS
+# define VMS 1
+# endif
+#endif
+
+
+/*
+* MACROS
+*/
+
+/* Define standard error destination
+ */
+#ifndef errout
+# define errout stderr
+#endif
+
+/* Define regex if supported */
+#if (defined (HAVE_REGCOMP) && !defined (REGCOMP_BROKEN))
+# define HAVE_REGEX 1
+#endif
+
+/* This is a helpful internal feature of later versions (> 2.7) of GCC
+ * to prevent warnings about unused variables.
+ */
+#if (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 7)) && !defined (__GNUG__)
+# define __unused__ __attribute__((unused))
+# define __printf__(s,f) __attribute__((format (printf, s, f)))
+#else
+# define __unused__
+# define __printf__(s,f)
+#endif
+
+/*
+ * Portability macros
+ */
+#if !defined(HAVE_STRCASECMP) && !defined(strcasecmp)
+# ifdef HAVE_STRICMP
+# define strcasecmp(s1,s2) stricmp(s1,s2)
+# else
+# define strcasecmp(s1,s2) struppercmp(s1,s2)
+# endif
+#endif
+
+#if !defined(HAVE_STRNCASECMP) && !defined(strncasecmp)
+# ifdef HAVE_STRNICMP
+# define strncasecmp(s1,s2,n) strnicmp(s1,s2,n)
+# else
+# define strncasecmp(s1,s2,n) strnuppercmp(s1,s2,n)
+# endif
+#endif
+
+/*
+* DATA DECLARATIONS
+*/
+
+#undef FALSE
+#undef TRUE
+#ifdef VAXC
+typedef enum { FALSE, TRUE } booleanType;
+typedef int boolean;
+#else
+# ifdef __cplusplus
+typedef bool boolean;
+#define FALSE false
+#define TRUE true
+# else
+typedef enum { FALSE, TRUE } boolean;
+# endif
+#endif
+
+#if ! defined (HAVE_FGETPOS) && ! defined (fpos_t)
+# define fpos_t long
+#endif
+
+/*
+* FUNCTION PROTOTYPES
+*/
+
+#if defined (NEED_PROTO_REMOVE) && defined (HAVE_REMOVE)
+extern int remove (const char *);
+#endif
+
+#if defined (NEED_PROTO_UNLINK) && ! defined (HAVE_REMOVE)
+extern void *unlink (const char *);
+#endif
+
+#ifdef NEED_PROTO_GETENV
+extern char *getenv (const char *);
+#endif
+
+#endif /* _GENERAL_H */
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/get.c b/get.c
new file mode 100644
index 0000000..d74ed95
--- /dev/null
+++ b/get.c
@@ -0,0 +1,669 @@
+/*
+* $Id: get.c 559 2007-06-17 03:30:09Z elliotth $
+*
+* Copyright (c) 1996-2002, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* This module contains the high level source read functions (preprocessor
+* directives are handled within this level).
+*/
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include <string.h>
+
+#include "debug.h"
+#include "entry.h"
+#include "get.h"
+#include "options.h"
+#include "read.h"
+#include "vstring.h"
+
+/*
+* MACROS
+*/
+#define stringMatch(s1,s2) (strcmp (s1,s2) == 0)
+#define isspacetab(c) ((c) == SPACE || (c) == TAB)
+
+/*
+* DATA DECLARATIONS
+*/
+typedef enum { COMMENT_NONE, COMMENT_C, COMMENT_CPLUS } Comment;
+
+enum eCppLimits {
+ MaxCppNestingLevel = 20,
+ MaxDirectiveName = 10
+};
+
+/* Defines the one nesting level of a preprocessor conditional.
+ */
+typedef struct sConditionalInfo {
+ boolean ignoreAllBranches; /* ignoring parent conditional branch */
+ boolean singleBranch; /* choose only one branch */
+ boolean branchChosen; /* branch already selected */
+ boolean ignoring; /* current ignore state */
+} conditionalInfo;
+
+enum eState {
+ DRCTV_NONE, /* no known directive - ignore to end of line */
+ DRCTV_DEFINE, /* "#define" encountered */
+ DRCTV_HASH, /* initial '#' read; determine directive */
+ DRCTV_IF, /* "#if" or "#ifdef" encountered */
+ DRCTV_PRAGMA, /* #pragma encountered */
+ DRCTV_UNDEF /* "#undef" encountered */
+};
+
+/* Defines the current state of the pre-processor.
+ */
+typedef struct sCppState {
+ int ungetch, ungetch2; /* ungotten characters, if any */
+ boolean resolveRequired; /* must resolve if/else/elif/endif branch */
+ boolean hasAtLiteralStrings; /* supports @"c:\" strings */
+ struct sDirective {
+ enum eState state; /* current directive being processed */
+ boolean accept; /* is a directive syntactically permitted? */
+ vString * name; /* macro name */
+ unsigned int nestLevel; /* level 0 is not used */
+ conditionalInfo ifdef [MaxCppNestingLevel];
+ } directive;
+} cppState;
+
+/*
+* DATA DEFINITIONS
+*/
+
+/* Use brace formatting to detect end of block.
+ */
+static boolean BraceFormat = FALSE;
+
+static cppState Cpp = {
+ '\0', '\0', /* ungetch characters */
+ FALSE, /* resolveRequired */
+ FALSE, /* hasAtLiteralStrings */
+ {
+ DRCTV_NONE, /* state */
+ FALSE, /* accept */
+ NULL, /* tag name */
+ 0, /* nestLevel */
+ { {FALSE,FALSE,FALSE,FALSE} } /* ifdef array */
+ } /* directive */
+};
+
+/*
+* FUNCTION DEFINITIONS
+*/
+
+extern boolean isBraceFormat (void)
+{
+ return BraceFormat;
+}
+
+extern unsigned int getDirectiveNestLevel (void)
+{
+ return Cpp.directive.nestLevel;
+}
+
+extern void cppInit (const boolean state, const boolean hasAtLiteralStrings)
+{
+ BraceFormat = state;
+
+ Cpp.ungetch = '\0';
+ Cpp.ungetch2 = '\0';
+ Cpp.resolveRequired = FALSE;
+ Cpp.hasAtLiteralStrings = hasAtLiteralStrings;
+
+ Cpp.directive.state = DRCTV_NONE;
+ Cpp.directive.accept = TRUE;
+ Cpp.directive.nestLevel = 0;
+
+ Cpp.directive.ifdef [0].ignoreAllBranches = FALSE;
+ Cpp.directive.ifdef [0].singleBranch = FALSE;
+ Cpp.directive.ifdef [0].branchChosen = FALSE;
+ Cpp.directive.ifdef [0].ignoring = FALSE;
+
+ if (Cpp.directive.name == NULL)
+ Cpp.directive.name = vStringNew ();
+ else
+ vStringClear (Cpp.directive.name);
+}
+
+extern void cppTerminate (void)
+{
+ if (Cpp.directive.name != NULL)
+ {
+ vStringDelete (Cpp.directive.name);
+ Cpp.directive.name = NULL;
+ }
+}
+
+extern void cppBeginStatement (void)
+{
+ Cpp.resolveRequired = TRUE;
+}
+
+extern void cppEndStatement (void)
+{
+ Cpp.resolveRequired = FALSE;
+}
+
+/*
+* Scanning functions
+*
+* This section handles preprocessor directives. It strips out all
+* directives and may emit a tag for #define directives.
+*/
+
+/* This puts a character back into the input queue for the source File.
+ * Up to two characters may be ungotten.
+ */
+extern void cppUngetc (const int c)
+{
+ Assert (Cpp.ungetch2 == '\0');
+ Cpp.ungetch2 = Cpp.ungetch;
+ Cpp.ungetch = c;
+}
+
+/* Reads a directive, whose first character is given by "c", into "name".
+ */
+static boolean readDirective (int c, char *const name, unsigned int maxLength)
+{
+ unsigned int i;
+
+ for (i = 0 ; i < maxLength - 1 ; ++i)
+ {
+ if (i > 0)
+ {
+ c = fileGetc ();
+ if (c == EOF || ! isalpha (c))
+ {
+ fileUngetc (c);
+ break;
+ }
+ }
+ name [i] = c;
+ }
+ name [i] = '\0'; /* null terminate */
+
+ return (boolean) isspacetab (c);
+}
+
+/* Reads an identifier, whose first character is given by "c", into "tag",
+ * together with the file location and corresponding line number.
+ */
+static void readIdentifier (int c, vString *const name)
+{
+ vStringClear (name);
+ do
+ {
+ vStringPut (name, c);
+ } while (c = fileGetc (), (c != EOF && isident (c)));
+ fileUngetc (c);
+ vStringTerminate (name);
+}
+
+static conditionalInfo *currentConditional (void)
+{
+ return &Cpp.directive.ifdef [Cpp.directive.nestLevel];
+}
+
+static boolean isIgnore (void)
+{
+ return Cpp.directive.ifdef [Cpp.directive.nestLevel].ignoring;
+}
+
+static boolean setIgnore (const boolean ignore)
+{
+ return Cpp.directive.ifdef [Cpp.directive.nestLevel].ignoring = ignore;
+}
+
+static boolean isIgnoreBranch (void)
+{
+ conditionalInfo *const ifdef = currentConditional ();
+
+ /* Force a single branch if an incomplete statement is discovered
+ * en route. This may have allowed earlier branches containing complete
+ * statements to be followed, but we must follow no further branches.
+ */
+ if (Cpp.resolveRequired && ! BraceFormat)
+ ifdef->singleBranch = TRUE;
+
+ /* We will ignore this branch in the following cases:
+ *
+ * 1. We are ignoring all branches (conditional was within an ignored
+ * branch of the parent conditional)
+ * 2. A branch has already been chosen and either of:
+ * a. A statement was incomplete upon entering the conditional
+ * b. A statement is incomplete upon encountering a branch
+ */
+ return (boolean) (ifdef->ignoreAllBranches ||
+ (ifdef->branchChosen && ifdef->singleBranch));
+}
+
+static void chooseBranch (void)
+{
+ if (! BraceFormat)
+ {
+ conditionalInfo *const ifdef = currentConditional ();
+
+ ifdef->branchChosen = (boolean) (ifdef->singleBranch ||
+ Cpp.resolveRequired);
+ }
+}
+
+/* Pushes one nesting level for an #if directive, indicating whether or not
+ * the branch should be ignored and whether a branch has already been chosen.
+ */
+static boolean pushConditional (const boolean firstBranchChosen)
+{
+ const boolean ignoreAllBranches = isIgnore (); /* current ignore */
+ boolean ignoreBranch = FALSE;
+
+ if (Cpp.directive.nestLevel < (unsigned int) MaxCppNestingLevel - 1)
+ {
+ conditionalInfo *ifdef;
+
+ ++Cpp.directive.nestLevel;
+ ifdef = currentConditional ();
+
+ /* We take a snapshot of whether there is an incomplete statement in
+ * progress upon encountering the preprocessor conditional. If so,
+ * then we will flag that only a single branch of the conditional
+ * should be followed.
+ */
+ ifdef->ignoreAllBranches = ignoreAllBranches;
+ ifdef->singleBranch = Cpp.resolveRequired;
+ ifdef->branchChosen = firstBranchChosen;
+ ifdef->ignoring = (boolean) (ignoreAllBranches || (
+ ! firstBranchChosen && ! BraceFormat &&
+ (ifdef->singleBranch || !Option.if0)));
+ ignoreBranch = ifdef->ignoring;
+ }
+ return ignoreBranch;
+}
+
+/* Pops one nesting level for an #endif directive.
+ */
+static boolean popConditional (void)
+{
+ if (Cpp.directive.nestLevel > 0)
+ --Cpp.directive.nestLevel;
+
+ return isIgnore ();
+}
+
+static void makeDefineTag (const char *const name)
+{
+ const boolean isFileScope = (boolean) (! isHeaderFile ());
+
+ if (includingDefineTags () &&
+ (! isFileScope || Option.include.fileScope))
+ {
+ tagEntryInfo e;
+ initTagEntry (&e, name);
+ e.lineNumberEntry = (boolean) (Option.locate != EX_PATTERN);
+ e.isFileScope = isFileScope;
+ e.truncateLine = TRUE;
+ e.kindName = "macro";
+ e.kind = 'd';
+ makeTagEntry (&e);
+ }
+}
+
+static void directiveDefine (const int c)
+{
+ if (isident1 (c))
+ {
+ readIdentifier (c, Cpp.directive.name);
+ if (! isIgnore ())
+ makeDefineTag (vStringValue (Cpp.directive.name));
+ }
+ Cpp.directive.state = DRCTV_NONE;
+}
+
+static void directivePragma (int c)
+{
+ if (isident1 (c))
+ {
+ readIdentifier (c, Cpp.directive.name);
+ if (stringMatch (vStringValue (Cpp.directive.name), "weak"))
+ {
+ /* generate macro tag for weak name */
+ do
+ {
+ c = fileGetc ();
+ } while (c == SPACE);
+ if (isident1 (c))
+ {
+ readIdentifier (c, Cpp.directive.name);
+ makeDefineTag (vStringValue (Cpp.directive.name));
+ }
+ }
+ }
+ Cpp.directive.state = DRCTV_NONE;
+}
+
+static boolean directiveIf (const int c)
+{
+ DebugStatement ( const boolean ignore0 = isIgnore (); )
+ const boolean ignore = pushConditional ((boolean) (c != '0'));
+
+ Cpp.directive.state = DRCTV_NONE;
+ DebugStatement ( debugCppNest (TRUE, Cpp.directive.nestLevel);
+ if (ignore != ignore0) debugCppIgnore (ignore); )
+
+ return ignore;
+}
+
+static boolean directiveHash (const int c)
+{
+ boolean ignore = FALSE;
+ char directive [MaxDirectiveName];
+ DebugStatement ( const boolean ignore0 = isIgnore (); )
+
+ readDirective (c, directive, MaxDirectiveName);
+ if (stringMatch (directive, "define"))
+ Cpp.directive.state = DRCTV_DEFINE;
+ else if (stringMatch (directive, "undef"))
+ Cpp.directive.state = DRCTV_UNDEF;
+ else if (strncmp (directive, "if", (size_t) 2) == 0)
+ Cpp.directive.state = DRCTV_IF;
+ else if (stringMatch (directive, "elif") ||
+ stringMatch (directive, "else"))
+ {
+ ignore = setIgnore (isIgnoreBranch ());
+ if (! ignore && stringMatch (directive, "else"))
+ chooseBranch ();
+ Cpp.directive.state = DRCTV_NONE;
+ DebugStatement ( if (ignore != ignore0) debugCppIgnore (ignore); )
+ }
+ else if (stringMatch (directive, "endif"))
+ {
+ DebugStatement ( debugCppNest (FALSE, Cpp.directive.nestLevel); )
+ ignore = popConditional ();
+ Cpp.directive.state = DRCTV_NONE;
+ DebugStatement ( if (ignore != ignore0) debugCppIgnore (ignore); )
+ }
+ else if (stringMatch (directive, "pragma"))
+ Cpp.directive.state = DRCTV_PRAGMA;
+ else
+ Cpp.directive.state = DRCTV_NONE;
+
+ return ignore;
+}
+
+/* Handles a pre-processor directive whose first character is given by "c".
+ */
+static boolean handleDirective (const int c)
+{
+ boolean ignore = isIgnore ();
+
+ switch (Cpp.directive.state)
+ {
+ case DRCTV_NONE: ignore = isIgnore (); break;
+ case DRCTV_DEFINE: directiveDefine (c); break;
+ case DRCTV_HASH: ignore = directiveHash (c); break;
+ case DRCTV_IF: ignore = directiveIf (c); break;
+ case DRCTV_PRAGMA: directivePragma (c); break;
+ case DRCTV_UNDEF: directiveDefine (c); break;
+ }
+ return ignore;
+}
+
+/* Called upon reading of a slash ('/') characters, determines whether a
+ * comment is encountered, and its type.
+ */
+static Comment isComment (void)
+{
+ Comment comment;
+ const int next = fileGetc ();
+
+ if (next == '*')
+ comment = COMMENT_C;
+ else if (next == '/')
+ comment = COMMENT_CPLUS;
+ else
+ {
+ fileUngetc (next);
+ comment = COMMENT_NONE;
+ }
+ return comment;
+}
+
+/* Skips over a C style comment. According to ANSI specification a comment
+ * is treated as white space, so we perform this substitution.
+ */
+int skipOverCComment (void)
+{
+ int c = fileGetc ();
+
+ while (c != EOF)
+ {
+ if (c != '*')
+ c = fileGetc ();
+ else
+ {
+ const int next = fileGetc ();
+
+ if (next != '/')
+ c = next;
+ else
+ {
+ c = SPACE; /* replace comment with space */
+ break;
+ }
+ }
+ }
+ return c;
+}
+
+/* Skips over a C++ style comment.
+ */
+static int skipOverCplusComment (void)
+{
+ int c;
+
+ while ((c = fileGetc ()) != EOF)
+ {
+ if (c == BACKSLASH)
+ fileGetc (); /* throw away next character, too */
+ else if (c == NEWLINE)
+ break;
+ }
+ return c;
+}
+
+/* Skips to the end of a string, returning a special character to
+ * symbolically represent a generic string.
+ */
+static int skipToEndOfString (boolean ignoreBackslash)
+{
+ int c;
+
+ while ((c = fileGetc ()) != EOF)
+ {
+ if (c == BACKSLASH && ! ignoreBackslash)
+ fileGetc (); /* throw away next character, too */
+ else if (c == DOUBLE_QUOTE)
+ break;
+ }
+ return STRING_SYMBOL; /* symbolic representation of string */
+}
+
+/* Skips to the end of the three (possibly four) 'c' sequence, returning a
+ * special character to symbolically represent a generic character.
+ * Also detects Vera numbers that include a base specifier (ie. 'b1010).
+ */
+static int skipToEndOfChar (void)
+{
+ int c;
+ int count = 0, veraBase = '\0';
+
+ while ((c = fileGetc ()) != EOF)
+ {
+ ++count;
+ if (c == BACKSLASH)
+ fileGetc (); /* throw away next character, too */
+ else if (c == SINGLE_QUOTE)
+ break;
+ else if (c == NEWLINE)
+ {
+ fileUngetc (c);
+ break;
+ }
+ else if (count == 1 && strchr ("DHOB", toupper (c)) != NULL)
+ veraBase = c;
+ else if (veraBase != '\0' && ! isalnum (c))
+ {
+ fileUngetc (c);
+ break;
+ }
+ }
+ return CHAR_SYMBOL; /* symbolic representation of character */
+}
+
+/* This function returns the next character, stripping out comments,
+ * C pre-processor directives, and the contents of single and double
+ * quoted strings. In short, strip anything which places a burden upon
+ * the tokenizer.
+ */
+extern int cppGetc (void)
+{
+ boolean directive = FALSE;
+ boolean ignore = FALSE;
+ int c;
+
+ if (Cpp.ungetch != '\0')
+ {
+ c = Cpp.ungetch;
+ Cpp.ungetch = Cpp.ungetch2;
+ Cpp.ungetch2 = '\0';
+ return c; /* return here to avoid re-calling debugPutc () */
+ }
+ else do
+ {
+ c = fileGetc ();
+process:
+ switch (c)
+ {
+ case EOF:
+ ignore = FALSE;
+ directive = FALSE;
+ break;
+
+ case TAB:
+ case SPACE:
+ break; /* ignore most white space */
+
+ case NEWLINE:
+ if (directive && ! ignore)
+ directive = FALSE;
+ Cpp.directive.accept = TRUE;
+ break;
+
+ case DOUBLE_QUOTE:
+ Cpp.directive.accept = FALSE;
+ c = skipToEndOfString (FALSE);
+ break;
+
+ case '#':
+ if (Cpp.directive.accept)
+ {
+ directive = TRUE;
+ Cpp.directive.state = DRCTV_HASH;
+ Cpp.directive.accept = FALSE;
+ }
+ break;
+
+ case SINGLE_QUOTE:
+ Cpp.directive.accept = FALSE;
+ c = skipToEndOfChar ();
+ break;
+
+ case '/':
+ {
+ const Comment comment = isComment ();
+
+ if (comment == COMMENT_C)
+ c = skipOverCComment ();
+ else if (comment == COMMENT_CPLUS)
+ {
+ c = skipOverCplusComment ();
+ if (c == NEWLINE)
+ fileUngetc (c);
+ }
+ else
+ Cpp.directive.accept = FALSE;
+ break;
+ }
+
+ case BACKSLASH:
+ {
+ int next = fileGetc ();
+
+ if (next == NEWLINE)
+ continue;
+ else if (next == '?')
+ cppUngetc (next);
+ else
+ fileUngetc (next);
+ break;
+ }
+
+ case '?':
+ {
+ int next = fileGetc ();
+ if (next != '?')
+ fileUngetc (next);
+ else
+ {
+ next = fileGetc ();
+ switch (next)
+ {
+ case '(': c = '['; break;
+ case ')': c = ']'; break;
+ case '<': c = '{'; break;
+ case '>': c = '}'; break;
+ case '/': c = BACKSLASH; goto process;
+ case '!': c = '|'; break;
+ case SINGLE_QUOTE: c = '^'; break;
+ case '-': c = '~'; break;
+ case '=': c = '#'; goto process;
+ default:
+ fileUngetc (next);
+ cppUngetc ('?');
+ break;
+ }
+ }
+ } break;
+
+ default:
+ if (c == '@' && Cpp.hasAtLiteralStrings)
+ {
+ int next = fileGetc ();
+ if (next == DOUBLE_QUOTE)
+ {
+ Cpp.directive.accept = FALSE;
+ c = skipToEndOfString (TRUE);
+ break;
+ }
+ }
+ Cpp.directive.accept = FALSE;
+ if (directive)
+ ignore = handleDirective (c);
+ break;
+ }
+ } while (directive || ignore);
+
+ DebugStatement ( debugPutc (DEBUG_CPP, c); )
+ DebugStatement ( if (c == NEWLINE)
+ debugPrintf (DEBUG_CPP, "%6ld: ", getInputLineNumber () + 1); )
+
+ return c;
+}
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/get.h b/get.h
new file mode 100644
index 0000000..d523437
--- /dev/null
+++ b/get.h
@@ -0,0 +1,50 @@
+/*
+* $Id: get.h 525 2007-05-28 01:50:41Z elliotth $
+*
+* Copyright (c) 1998-2002, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* External interface to get.c
+*/
+#ifndef _GET_H
+#define _GET_H
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include "ctags.h" /* to define langType */
+
+/*
+* MACROS
+*/
+/* Is the character valid as a character of a C identifier?
+ * VMS allows '$' in identifiers.
+ */
+#define isident(c) (isalnum(c) || (c) == '_' || (c) == '$')
+
+/* Is the character valid as the first character of a C identifier?
+ * C++ allows '~' in destructors.
+ * VMS allows '$' in identifiers.
+ */
+#define isident1(c) (isalpha(c) || (c) == '_' || (c) == '~' || (c) == '$')
+
+/*
+* FUNCTION PROTOTYPES
+*/
+extern boolean isBraceFormat (void);
+extern unsigned int getDirectiveNestLevel (void);
+extern void cppInit (const boolean state, const boolean hasAtLiteralStrings);
+extern void cppTerminate (void);
+extern void cppBeginStatement (void);
+extern void cppEndStatement (void);
+extern void cppUngetc (const int c);
+extern int cppGetc (void);
+extern int skipOverCComment (void);
+
+#endif /* _GET_H */
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/gnu_regex/.svn/all-wcprops b/gnu_regex/.svn/all-wcprops
new file mode 100644
index 0000000..950b571
--- /dev/null
+++ b/gnu_regex/.svn/all-wcprops
@@ -0,0 +1,47 @@
+K 25
+svn:wc:ra_dav:version-url
+V 43
+/svnroot/ctags/!svn/ver/710/trunk/gnu_regex
+END
+regex.h
+K 25
+svn:wc:ra_dav:version-url
+V 51
+/svnroot/ctags/!svn/ver/707/trunk/gnu_regex/regex.h
+END
+regexec.c
+K 25
+svn:wc:ra_dav:version-url
+V 53
+/svnroot/ctags/!svn/ver/710/trunk/gnu_regex/regexec.c
+END
+regex_internal.c
+K 25
+svn:wc:ra_dav:version-url
+V 60
+/svnroot/ctags/!svn/ver/710/trunk/gnu_regex/regex_internal.c
+END
+README.txt
+K 25
+svn:wc:ra_dav:version-url
+V 54
+/svnroot/ctags/!svn/ver/707/trunk/gnu_regex/README.txt
+END
+regex.c
+K 25
+svn:wc:ra_dav:version-url
+V 51
+/svnroot/ctags/!svn/ver/707/trunk/gnu_regex/regex.c
+END
+regex_internal.h
+K 25
+svn:wc:ra_dav:version-url
+V 60
+/svnroot/ctags/!svn/ver/710/trunk/gnu_regex/regex_internal.h
+END
+regcomp.c
+K 25
+svn:wc:ra_dav:version-url
+V 53
+/svnroot/ctags/!svn/ver/710/trunk/gnu_regex/regcomp.c
+END
diff --git a/gnu_regex/.svn/entries b/gnu_regex/.svn/entries
new file mode 100644
index 0000000..8b5cf56
--- /dev/null
+++ b/gnu_regex/.svn/entries
@@ -0,0 +1,112 @@
+10
+
+dir
+720
+https://ctags.svn.sourceforge.net/svnroot/ctags/trunk/gnu_regex
+https://ctags.svn.sourceforge.net/svnroot/ctags
+
+
+
+2009-07-04T05:53:16.648205Z
+710
+dhiebert
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+c5d04d22-be80-434c-894e-aa346cc9e8e8
+
+regex.h
+file
+
+
+
+
+2008-01-16T10:09:47.000000Z
+2d49479cad13fa2a1c106bde452bbf5f
+2009-07-04T05:26:42.065968Z
+707
+dhiebert
+
+regexec.c
+file
+
+
+
+
+2009-07-04T05:01:35.000000Z
+de02578fbe56d47c8d9ab3817b7766bc
+2009-07-04T05:53:16.648205Z
+710
+dhiebert
+
+regex_internal.c
+file
+
+
+
+
+2009-01-08T00:22:50.000000Z
+73d91ed18d6fed41faa69ecd116338e0
+2009-07-04T05:53:16.648205Z
+710
+dhiebert
+
+README.txt
+file
+
+
+
+
+2009-07-04T05:09:02.000000Z
+248f348e18c15aabc6595f807e117388
+2009-07-04T05:26:42.065968Z
+707
+dhiebert
+
+regex.c
+file
+
+
+
+
+2009-07-03T17:10:52.000000Z
+170f8405dec70b235f2551325b43cd58
+2009-07-04T05:26:42.065968Z
+707
+dhiebert
+
+regex_internal.h
+file
+
+
+
+
+2009-07-04T05:23:55.000000Z
+9a4cbd70cb786603c081be91fb697f12
+2009-07-04T05:53:16.648205Z
+710
+dhiebert
+
+regcomp.c
+file
+
+
+
+
+2009-07-04T05:08:27.000000Z
+7fefcec74cd7fe150ffd275f589cb7db
+2009-07-04T05:53:16.648205Z
+710
+dhiebert
+
diff --git a/gnu_regex/.svn/text-base/README.txt.svn-base b/gnu_regex/.svn/text-base/README.txt.svn-base
new file mode 100644
index 0000000..8fccbea
--- /dev/null
+++ b/gnu_regex/.svn/text-base/README.txt.svn-base
@@ -0,0 +1,5 @@
+These source files were taken from the GNU glibc-2.10.1 package.
+
+ ftp://ftp.gnu.org/gnu/glibc/glibc-2.10.1.tar.bz2
+
+Minor changes were made to eliminate compiler errors and warnings.
diff --git a/gnu_regex/.svn/text-base/regcomp.c.svn-base b/gnu_regex/.svn/text-base/regcomp.c.svn-base
new file mode 100644
index 0000000..1f3daf2
--- /dev/null
+++ b/gnu_regex/.svn/text-base/regcomp.c.svn-base
@@ -0,0 +1,3818 @@
+/* Extended regular expression matching and search library.
+ Copyright (C) 2002,2003,2004,2005,2006,2007,2009
+ Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern,
+ size_t length, reg_syntax_t syntax);
+static void re_compile_fastmap_iter (regex_t *bufp,
+ const re_dfastate_t *init_state,
+ char *fastmap);
+static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len);
+#ifdef RE_ENABLE_I18N
+static void free_charset (re_charset_t *cset);
+#endif /* RE_ENABLE_I18N */
+static void free_workarea_compile (regex_t *preg);
+static reg_errcode_t create_initial_state (re_dfa_t *dfa);
+#ifdef RE_ENABLE_I18N
+static void optimize_utf8 (re_dfa_t *dfa);
+#endif
+static reg_errcode_t analyze (regex_t *preg);
+static reg_errcode_t preorder (bin_tree_t *root,
+ reg_errcode_t (fn (void *, bin_tree_t *)),
+ void *extra);
+static reg_errcode_t postorder (bin_tree_t *root,
+ reg_errcode_t (fn (void *, bin_tree_t *)),
+ void *extra);
+static reg_errcode_t optimize_subexps (void *extra, bin_tree_t *node);
+static reg_errcode_t lower_subexps (void *extra, bin_tree_t *node);
+static bin_tree_t *lower_subexp (reg_errcode_t *err, regex_t *preg,
+ bin_tree_t *node);
+static reg_errcode_t calc_first (void *extra, bin_tree_t *node);
+static reg_errcode_t calc_next (void *extra, bin_tree_t *node);
+static reg_errcode_t link_nfa_nodes (void *extra, bin_tree_t *node);
+static int duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint);
+static int search_duplicated_node (const re_dfa_t *dfa, int org_node,
+ unsigned int constraint);
+static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
+static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa,
+ int node, int root);
+static reg_errcode_t calc_inveclosure (re_dfa_t *dfa);
+static int fetch_number (re_string_t *input, re_token_t *token,
+ reg_syntax_t syntax);
+static int peek_token (re_token_t *token, re_string_t *input,
+ reg_syntax_t syntax) internal_function;
+static bin_tree_t *parse (re_string_t *regexp, regex_t *preg,
+ reg_syntax_t syntax, reg_errcode_t *err);
+static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg,
+ re_token_t *token, reg_syntax_t syntax,
+ int nest, reg_errcode_t *err);
+static bin_tree_t *parse_branch (re_string_t *regexp, regex_t *preg,
+ re_token_t *token, reg_syntax_t syntax,
+ int nest, reg_errcode_t *err);
+static bin_tree_t *parse_expression (re_string_t *regexp, regex_t *preg,
+ re_token_t *token, reg_syntax_t syntax,
+ int nest, reg_errcode_t *err);
+static bin_tree_t *parse_sub_exp (re_string_t *regexp, regex_t *preg,
+ re_token_t *token, reg_syntax_t syntax,
+ int nest, reg_errcode_t *err);
+static bin_tree_t *parse_dup_op (bin_tree_t *dup_elem, re_string_t *regexp,
+ re_dfa_t *dfa, re_token_t *token,
+ reg_syntax_t syntax, reg_errcode_t *err);
+static bin_tree_t *parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa,
+ re_token_t *token, reg_syntax_t syntax,
+ reg_errcode_t *err);
+static reg_errcode_t parse_bracket_element (bracket_elem_t *elem,
+ re_string_t *regexp,
+ re_token_t *token, int token_len,
+ re_dfa_t *dfa,
+ reg_syntax_t syntax,
+ int accept_hyphen);
+static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem,
+ re_string_t *regexp,
+ re_token_t *token);
+#ifdef RE_ENABLE_I18N
+static reg_errcode_t build_equiv_class (bitset_t sbcset,
+ re_charset_t *mbcset,
+ int *equiv_class_alloc,
+ const unsigned char *name);
+static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
+ bitset_t sbcset,
+ re_charset_t *mbcset,
+ int *char_class_alloc,
+ const unsigned char *class_name,
+ reg_syntax_t syntax);
+#else /* not RE_ENABLE_I18N */
+static reg_errcode_t build_equiv_class (bitset_t sbcset,
+ const unsigned char *name);
+static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
+ bitset_t sbcset,
+ const unsigned char *class_name,
+ reg_syntax_t syntax);
+#endif /* not RE_ENABLE_I18N */
+static bin_tree_t *build_charclass_op (re_dfa_t *dfa,
+ RE_TRANSLATE_TYPE trans,
+ const unsigned char *class_name,
+ const unsigned char *extra,
+ int non_match, reg_errcode_t *err);
+static bin_tree_t *create_tree (re_dfa_t *dfa,
+ bin_tree_t *left, bin_tree_t *right,
+ re_token_type_t type);
+static bin_tree_t *create_token_tree (re_dfa_t *dfa,
+ bin_tree_t *left, bin_tree_t *right,
+ const re_token_t *token);
+static bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa);
+static void free_token (re_token_t *node);
+static reg_errcode_t free_tree (void *extra, bin_tree_t *node);
+static reg_errcode_t mark_opt_subexp (void *extra, bin_tree_t *node);
+
+/* This table gives an error message for each of the error codes listed
+ in regex.h. Obviously the order here has to be same as there.
+ POSIX doesn't require that we do anything for REG_NOERROR,
+ but why not be nice? */
+
+const char __re_error_msgid[] attribute_hidden =
+ {
+#define REG_NOERROR_IDX 0
+ gettext_noop ("Success") /* REG_NOERROR */
+ "\0"
+#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
+ gettext_noop ("No match") /* REG_NOMATCH */
+ "\0"
+#define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match")
+ gettext_noop ("Invalid regular expression") /* REG_BADPAT */
+ "\0"
+#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
+ gettext_noop ("Invalid collation character") /* REG_ECOLLATE */
+ "\0"
+#define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character")
+ gettext_noop ("Invalid character class name") /* REG_ECTYPE */
+ "\0"
+#define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name")
+ gettext_noop ("Trailing backslash") /* REG_EESCAPE */
+ "\0"
+#define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash")
+ gettext_noop ("Invalid back reference") /* REG_ESUBREG */
+ "\0"
+#define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference")
+ gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */
+ "\0"
+#define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
+ gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
+ "\0"
+#define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
+ gettext_noop ("Unmatched \\{") /* REG_EBRACE */
+ "\0"
+#define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{")
+ gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
+ "\0"
+#define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
+ gettext_noop ("Invalid range end") /* REG_ERANGE */
+ "\0"
+#define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end")
+ gettext_noop ("Memory exhausted") /* REG_ESPACE */
+ "\0"
+#define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted")
+ gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
+ "\0"
+#define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
+ gettext_noop ("Premature end of regular expression") /* REG_EEND */
+ "\0"
+#define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression")
+ gettext_noop ("Regular expression too big") /* REG_ESIZE */
+ "\0"
+#define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big")
+ gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
+ };
+
+const size_t __re_error_msgid_idx[] attribute_hidden =
+ {
+ REG_NOERROR_IDX,
+ REG_NOMATCH_IDX,
+ REG_BADPAT_IDX,
+ REG_ECOLLATE_IDX,
+ REG_ECTYPE_IDX,
+ REG_EESCAPE_IDX,
+ REG_ESUBREG_IDX,
+ REG_EBRACK_IDX,
+ REG_EPAREN_IDX,
+ REG_EBRACE_IDX,
+ REG_BADBR_IDX,
+ REG_ERANGE_IDX,
+ REG_ESPACE_IDX,
+ REG_BADRPT_IDX,
+ REG_EEND_IDX,
+ REG_ESIZE_IDX,
+ REG_ERPAREN_IDX
+ };
+
+/* Entry points for GNU code. */
+
+/* re_compile_pattern is the GNU regular expression compiler: it
+ compiles PATTERN (of length LENGTH) and puts the result in BUFP.
+ Returns 0 if the pattern was valid, otherwise an error string.
+
+ Assumes the `allocated' (and perhaps `buffer') and `translate' fields
+ are set in BUFP on entry. */
+
+const char *
+re_compile_pattern (pattern, length, bufp)
+ const char *pattern;
+ size_t length;
+ struct re_pattern_buffer *bufp;
+{
+ reg_errcode_t ret;
+
+ /* And GNU code determines whether or not to get register information
+ by passing null for the REGS argument to re_match, etc., not by
+ setting no_sub, unless RE_NO_SUB is set. */
+ bufp->no_sub = !!(re_syntax_options & RE_NO_SUB);
+
+ /* Match anchors at newline. */
+ bufp->newline_anchor = 1;
+
+ ret = re_compile_internal (bufp, pattern, length, re_syntax_options);
+
+ if (!ret)
+ return NULL;
+ return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
+}
+#ifdef _LIBC
+weak_alias (__re_compile_pattern, re_compile_pattern)
+#endif
+
+/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
+ also be assigned to arbitrarily: each pattern buffer stores its own
+ syntax, so it can be changed between regex compilations. */
+/* This has no initializer because initialized variables in Emacs
+ become read-only after dumping. */
+reg_syntax_t re_syntax_options;
+
+
+/* Specify the precise syntax of regexps for compilation. This provides
+ for compatibility for various utilities which historically have
+ different, incompatible syntaxes.
+
+ The argument SYNTAX is a bit mask comprised of the various bits
+ defined in regex.h. We return the old syntax. */
+
+reg_syntax_t
+re_set_syntax (syntax)
+ reg_syntax_t syntax;
+{
+ reg_syntax_t ret = re_syntax_options;
+
+ re_syntax_options = syntax;
+ return ret;
+}
+#ifdef _LIBC
+weak_alias (__re_set_syntax, re_set_syntax)
+#endif
+
+int
+re_compile_fastmap (bufp)
+ struct re_pattern_buffer *bufp;
+{
+ re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
+ char *fastmap = bufp->fastmap;
+
+ memset (fastmap, '\0', sizeof (char) * SBC_MAX);
+ re_compile_fastmap_iter (bufp, dfa->init_state, fastmap);
+ if (dfa->init_state != dfa->init_state_word)
+ re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap);
+ if (dfa->init_state != dfa->init_state_nl)
+ re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap);
+ if (dfa->init_state != dfa->init_state_begbuf)
+ re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap);
+ bufp->fastmap_accurate = 1;
+ return 0;
+}
+#ifdef _LIBC
+weak_alias (__re_compile_fastmap, re_compile_fastmap)
+#endif
+
+static inline void
+__attribute ((always_inline))
+re_set_fastmap (char *fastmap, int icase, int ch)
+{
+ fastmap[ch] = 1;
+ if (icase)
+ fastmap[tolower (ch)] = 1;
+}
+
+/* Helper function for re_compile_fastmap.
+ Compile fastmap for the initial_state INIT_STATE. */
+
+static void
+re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
+ char *fastmap)
+{
+ re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
+ int node_cnt;
+ int icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE));
+ for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt)
+ {
+ int node = init_state->nodes.elems[node_cnt];
+ re_token_type_t type = dfa->nodes[node].type;
+
+ if (type == CHARACTER)
+ {
+ re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c);
+#ifdef RE_ENABLE_I18N
+ if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
+ {
+ unsigned char *buf = alloca (dfa->mb_cur_max), *p;
+ wchar_t wc;
+ mbstate_t state;
+
+ p = buf;
+ *p++ = dfa->nodes[node].opr.c;
+ while (++node < dfa->nodes_len
+ && dfa->nodes[node].type == CHARACTER
+ && dfa->nodes[node].mb_partial)
+ *p++ = dfa->nodes[node].opr.c;
+ memset (&state, '\0', sizeof (state));
+ if (__mbrtowc (&wc, (const char *) buf, p - buf,
+ &state) == p - buf
+ && (__wcrtomb ((char *) buf, towlower (wc), &state)
+ != (size_t) -1))
+ re_set_fastmap (fastmap, 0, buf[0]);
+ }
+#endif
+ }
+ else if (type == SIMPLE_BRACKET)
+ {
+ int i, ch;
+ for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
+ {
+ int j;
+ bitset_word_t w = dfa->nodes[node].opr.sbcset[i];
+ for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
+ if (w & ((bitset_word_t) 1 << j))
+ re_set_fastmap (fastmap, icase, ch);
+ }
+ }
+#ifdef RE_ENABLE_I18N
+ else if (type == COMPLEX_BRACKET)
+ {
+ re_charset_t *cset = dfa->nodes[node].opr.mbcset;
+ int i;
+
+# ifdef _LIBC
+ /* See if we have to try all bytes which start multiple collation
+ elements.
+ e.g. In da_DK, we want to catch 'a' since "aa" is a valid
+ collation element, and don't catch 'b' since 'b' is
+ the only collation element which starts from 'b' (and
+ it is caught by SIMPLE_BRACKET). */
+ if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0
+ && (cset->ncoll_syms || cset->nranges))
+ {
+ const int32_t *table = (const int32_t *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
+ for (i = 0; i < SBC_MAX; ++i)
+ if (table[i] < 0)
+ re_set_fastmap (fastmap, icase, i);
+ }
+# endif /* _LIBC */
+
+ /* See if we have to start the match at all multibyte characters,
+ i.e. where we would not find an invalid sequence. This only
+ applies to multibyte character sets; for single byte character
+ sets, the SIMPLE_BRACKET again suffices. */
+ if (dfa->mb_cur_max > 1
+ && (cset->nchar_classes || cset->non_match
+# ifdef _LIBC
+ || cset->nequiv_classes
+# endif /* _LIBC */
+ ))
+ {
+ unsigned char c = 0;
+ do
+ {
+ mbstate_t mbs;
+ memset (&mbs, 0, sizeof (mbs));
+ if (__mbrtowc (NULL, (char *) &c, 1, &mbs) == (size_t) -2)
+ re_set_fastmap (fastmap, false, (int) c);
+ }
+ while (++c != 0);
+ }
+
+ else
+ {
+ /* ... Else catch all bytes which can start the mbchars. */
+ for (i = 0; i < cset->nmbchars; ++i)
+ {
+ char buf[256];
+ mbstate_t state;
+ memset (&state, '\0', sizeof (state));
+ if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1)
+ re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
+ if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
+ {
+ if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state)
+ != (size_t) -1)
+ re_set_fastmap (fastmap, false, *(unsigned char *) buf);
+ }
+ }
+ }
+ }
+#endif /* RE_ENABLE_I18N */
+ else if (type == OP_PERIOD
+#ifdef RE_ENABLE_I18N
+ || type == OP_UTF8_PERIOD
+#endif /* RE_ENABLE_I18N */
+ || type == END_OF_RE)
+ {
+ memset (fastmap, '\1', sizeof (char) * SBC_MAX);
+ if (type == END_OF_RE)
+ bufp->can_be_null = 1;
+ return;
+ }
+ }
+}
+
+/* Entry point for POSIX code. */
+/* regcomp takes a regular expression as a string and compiles it.
+
+ PREG is a regex_t *. We do not expect any fields to be initialized,
+ since POSIX says we shouldn't. Thus, we set
+
+ `buffer' to the compiled pattern;
+ `used' to the length of the compiled pattern;
+ `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
+ REG_EXTENDED bit in CFLAGS is set; otherwise, to
+ RE_SYNTAX_POSIX_BASIC;
+ `newline_anchor' to REG_NEWLINE being set in CFLAGS;
+ `fastmap' to an allocated space for the fastmap;
+ `fastmap_accurate' to zero;
+ `re_nsub' to the number of subexpressions in PATTERN.
+
+ PATTERN is the address of the pattern string.
+
+ CFLAGS is a series of bits which affect compilation.
+
+ If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
+ use POSIX basic syntax.
+
+ If REG_NEWLINE is set, then . and [^...] don't match newline.
+ Also, regexec will try a match beginning after every newline.
+
+ If REG_ICASE is set, then we considers upper- and lowercase
+ versions of letters to be equivalent when matching.
+
+ If REG_NOSUB is set, then when PREG is passed to regexec, that
+ routine will report only success or failure, and nothing about the
+ registers.
+
+ It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
+ the return codes and their meanings.) */
+
+int
+regcomp (preg, pattern, cflags)
+ regex_t *__restrict preg;
+ const char *__restrict pattern;
+ int cflags;
+{
+ reg_errcode_t ret;
+ reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED
+ : RE_SYNTAX_POSIX_BASIC);
+
+ preg->buffer = NULL;
+ preg->allocated = 0;
+ preg->used = 0;
+
+ /* Try to allocate space for the fastmap. */
+ preg->fastmap = re_malloc (char, SBC_MAX);
+ if (BE (preg->fastmap == NULL, 0))
+ return REG_ESPACE;
+
+ syntax |= (cflags & REG_ICASE) ? RE_ICASE : 0;
+
+ /* If REG_NEWLINE is set, newlines are treated differently. */
+ if (cflags & REG_NEWLINE)
+ { /* REG_NEWLINE implies neither . nor [^...] match newline. */
+ syntax &= ~RE_DOT_NEWLINE;
+ syntax |= RE_HAT_LISTS_NOT_NEWLINE;
+ /* It also changes the matching behavior. */
+ preg->newline_anchor = 1;
+ }
+ else
+ preg->newline_anchor = 0;
+ preg->no_sub = !!(cflags & REG_NOSUB);
+ preg->translate = NULL;
+
+ ret = re_compile_internal (preg, pattern, strlen (pattern), syntax);
+
+ /* POSIX doesn't distinguish between an unmatched open-group and an
+ unmatched close-group: both are REG_EPAREN. */
+ if (ret == REG_ERPAREN)
+ ret = REG_EPAREN;
+
+ /* We have already checked preg->fastmap != NULL. */
+ if (BE (ret == REG_NOERROR, 1))
+ /* Compute the fastmap now, since regexec cannot modify the pattern
+ buffer. This function never fails in this implementation. */
+ (void) re_compile_fastmap (preg);
+ else
+ {
+ /* Some error occurred while compiling the expression. */
+ re_free (preg->fastmap);
+ preg->fastmap = NULL;
+ }
+
+ return (int) ret;
+}
+#ifdef _LIBC
+weak_alias (__regcomp, regcomp)
+#endif
+
+/* Returns a message corresponding to an error code, ERRCODE, returned
+ from either regcomp or regexec. We don't use PREG here. */
+
+size_t
+regerror (
+ int errcode,
+ const regex_t *__restrict preg,
+ char *__restrict errbuf,
+ size_t errbuf_size)
+{
+ const char *msg;
+ size_t msg_size;
+
+ if (BE (errcode < 0
+ || errcode >= (int) (sizeof (__re_error_msgid_idx)
+ / sizeof (__re_error_msgid_idx[0])), 0))
+ /* Only error codes returned by the rest of the code should be passed
+ to this routine. If we are given anything else, or if other regex
+ code generates an invalid error code, then the program has a bug.
+ Dump core so we can fix it. */
+ abort ();
+
+ msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]);
+
+ msg_size = strlen (msg) + 1; /* Includes the null. */
+
+ if (BE (errbuf_size != 0, 1))
+ {
+ if (BE (msg_size > errbuf_size, 0))
+ {
+#if defined HAVE_MEMPCPY || defined _LIBC
+ *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
+#else
+ memcpy (errbuf, msg, errbuf_size - 1);
+ errbuf[errbuf_size - 1] = 0;
+#endif
+ }
+ else
+ memcpy (errbuf, msg, msg_size);
+ }
+
+ return msg_size;
+}
+#ifdef _LIBC
+weak_alias (__regerror, regerror)
+#endif
+
+
+#ifdef RE_ENABLE_I18N
+/* This static array is used for the map to single-byte characters when
+ UTF-8 is used. Otherwise we would allocate memory just to initialize
+ it the same all the time. UTF-8 is the preferred encoding so this is
+ a worthwhile optimization. */
+static const bitset_t utf8_sb_map =
+{
+ /* Set the first 128 bits. */
+ [0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX
+};
+#endif
+
+
+static void
+free_dfa_content (re_dfa_t *dfa)
+{
+ int i, j;
+
+ if (dfa->nodes)
+ for (i = 0; i < dfa->nodes_len; ++i)
+ free_token (dfa->nodes + i);
+ re_free (dfa->nexts);
+ for (i = 0; i < dfa->nodes_len; ++i)
+ {
+ if (dfa->eclosures != NULL)
+ re_node_set_free (dfa->eclosures + i);
+ if (dfa->inveclosures != NULL)
+ re_node_set_free (dfa->inveclosures + i);
+ if (dfa->edests != NULL)
+ re_node_set_free (dfa->edests + i);
+ }
+ re_free (dfa->edests);
+ re_free (dfa->eclosures);
+ re_free (dfa->inveclosures);
+ re_free (dfa->nodes);
+
+ if (dfa->state_table)
+ for (i = 0; i <= dfa->state_hash_mask; ++i)
+ {
+ struct re_state_table_entry *entry = dfa->state_table + i;
+ for (j = 0; j < entry->num; ++j)
+ {
+ re_dfastate_t *state = entry->array[j];
+ free_state (state);
+ }
+ re_free (entry->array);
+ }
+ re_free (dfa->state_table);
+#ifdef RE_ENABLE_I18N
+ if (dfa->sb_char != utf8_sb_map)
+ re_free (dfa->sb_char);
+#endif
+ re_free (dfa->subexp_map);
+#ifdef DEBUG
+ re_free (dfa->re_str);
+#endif
+
+ re_free (dfa);
+}
+
+
+/* Free dynamically allocated space used by PREG. */
+
+void
+regfree (preg)
+ regex_t *preg;
+{
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ if (BE (dfa != NULL, 1))
+ free_dfa_content (dfa);
+ preg->buffer = NULL;
+ preg->allocated = 0;
+
+ re_free (preg->fastmap);
+ preg->fastmap = NULL;
+
+ re_free (preg->translate);
+ preg->translate = NULL;
+}
+#ifdef _LIBC
+weak_alias (__regfree, regfree)
+#endif
+
+/* Entry points compatible with 4.2 BSD regex library. We don't define
+ them unless specifically requested. */
+
+#if defined _REGEX_RE_COMP || defined _LIBC
+
+/* BSD has one and only one pattern buffer. */
+static struct re_pattern_buffer re_comp_buf;
+
+char *
+# ifdef _LIBC
+/* Make these definitions weak in libc, so POSIX programs can redefine
+ these names if they don't use our functions, and still use
+ regcomp/regexec above without link errors. */
+weak_function
+# endif
+re_comp (s)
+ const char *s;
+{
+ reg_errcode_t ret;
+ char *fastmap;
+
+ if (!s)
+ {
+ if (!re_comp_buf.buffer)
+ return gettext ("No previous regular expression");
+ return 0;
+ }
+
+ if (re_comp_buf.buffer)
+ {
+ fastmap = re_comp_buf.fastmap;
+ re_comp_buf.fastmap = NULL;
+ __regfree (&re_comp_buf);
+ memset (&re_comp_buf, '\0', sizeof (re_comp_buf));
+ re_comp_buf.fastmap = fastmap;
+ }
+
+ if (re_comp_buf.fastmap == NULL)
+ {
+ re_comp_buf.fastmap = (char *) malloc (SBC_MAX);
+ if (re_comp_buf.fastmap == NULL)
+ return (char *) gettext (__re_error_msgid
+ + __re_error_msgid_idx[(int) REG_ESPACE]);
+ }
+
+ /* Since `re_exec' always passes NULL for the `regs' argument, we
+ don't need to initialize the pattern buffer fields which affect it. */
+
+ /* Match anchors at newlines. */
+ re_comp_buf.newline_anchor = 1;
+
+ ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options);
+
+ if (!ret)
+ return NULL;
+
+ /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
+ return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
+}
+
+#ifdef _LIBC
+libc_freeres_fn (free_mem)
+{
+ __regfree (&re_comp_buf);
+}
+#endif
+
+#endif /* _REGEX_RE_COMP */
+
+/* Internal entry point.
+ Compile the regular expression PATTERN, whose length is LENGTH.
+ SYNTAX indicate regular expression's syntax. */
+
+static reg_errcode_t
+re_compile_internal (regex_t *preg, const char * pattern, size_t length,
+ reg_syntax_t syntax)
+{
+ reg_errcode_t err = REG_NOERROR;
+ re_dfa_t *dfa;
+ re_string_t regexp;
+
+ /* Initialize the pattern buffer. */
+ preg->fastmap_accurate = 0;
+ preg->syntax = syntax;
+ preg->not_bol = preg->not_eol = 0;
+ preg->used = 0;
+ preg->re_nsub = 0;
+ preg->can_be_null = 0;
+ preg->regs_allocated = REGS_UNALLOCATED;
+
+ /* Initialize the dfa. */
+ dfa = (re_dfa_t *) preg->buffer;
+ if (BE (preg->allocated < sizeof (re_dfa_t), 0))
+ {
+ /* If zero allocated, but buffer is non-null, try to realloc
+ enough space. This loses if buffer's address is bogus, but
+ that is the user's responsibility. If ->buffer is NULL this
+ is a simple allocation. */
+ dfa = re_realloc (preg->buffer, re_dfa_t, 1);
+ if (dfa == NULL)
+ return REG_ESPACE;
+ preg->allocated = sizeof (re_dfa_t);
+ preg->buffer = (unsigned char *) dfa;
+ }
+ preg->used = sizeof (re_dfa_t);
+
+ err = init_dfa (dfa, length);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ free_dfa_content (dfa);
+ preg->buffer = NULL;
+ preg->allocated = 0;
+ return err;
+ }
+#ifdef DEBUG
+ /* Note: length+1 will not overflow since it is checked in init_dfa. */
+ dfa->re_str = re_malloc (char, length + 1);
+ strncpy (dfa->re_str, pattern, length + 1);
+#endif
+
+ __libc_lock_init (dfa->lock);
+
+ err = re_string_construct (&regexp, pattern, length, preg->translate,
+ syntax & RE_ICASE, dfa);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_compile_internal_free_return:
+ free_workarea_compile (preg);
+ re_string_destruct (&regexp);
+ free_dfa_content (dfa);
+ preg->buffer = NULL;
+ preg->allocated = 0;
+ return err;
+ }
+
+ /* Parse the regular expression, and build a structure tree. */
+ preg->re_nsub = 0;
+ dfa->str_tree = parse (&regexp, preg, syntax, &err);
+ if (BE (dfa->str_tree == NULL, 0))
+ goto re_compile_internal_free_return;
+
+ /* Analyze the tree and create the nfa. */
+ err = analyze (preg);
+ if (BE (err != REG_NOERROR, 0))
+ goto re_compile_internal_free_return;
+
+#ifdef RE_ENABLE_I18N
+ /* If possible, do searching in single byte encoding to speed things up. */
+ if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL)
+ optimize_utf8 (dfa);
+#endif
+
+ /* Then create the initial state of the dfa. */
+ err = create_initial_state (dfa);
+
+ /* Release work areas. */
+ free_workarea_compile (preg);
+ re_string_destruct (&regexp);
+
+ if (BE (err != REG_NOERROR, 0))
+ {
+ free_dfa_content (dfa);
+ preg->buffer = NULL;
+ preg->allocated = 0;
+ }
+
+ return err;
+}
+
+/* Initialize DFA. We use the length of the regular expression PAT_LEN
+ as the initial length of some arrays. */
+
+static reg_errcode_t
+init_dfa (re_dfa_t *dfa, size_t pat_len)
+{
+ unsigned int table_size;
+#ifndef _LIBC
+ char *codeset_name;
+#endif
+
+ memset (dfa, '\0', sizeof (re_dfa_t));
+
+ /* Force allocation of str_tree_storage the first time. */
+ dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
+
+ /* Avoid overflows. */
+ if (pat_len == SIZE_MAX)
+ return REG_ESPACE;
+
+ dfa->nodes_alloc = pat_len + 1;
+ dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc);
+
+ /* table_size = 2 ^ ceil(log pat_len) */
+ for (table_size = 1; ; table_size <<= 1)
+ if (table_size > pat_len)
+ break;
+
+ dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size);
+ dfa->state_hash_mask = table_size - 1;
+
+ dfa->mb_cur_max = MB_CUR_MAX;
+#ifdef _LIBC
+ if (dfa->mb_cur_max == 6
+ && strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0)
+ dfa->is_utf8 = 1;
+ dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII)
+ != 0);
+#else
+# ifdef HAVE_LANGINFO_CODESET
+ codeset_name = nl_langinfo (CODESET);
+# else
+ codeset_name = getenv ("LC_ALL");
+ if (codeset_name == NULL || codeset_name[0] == '\0')
+ codeset_name = getenv ("LC_CTYPE");
+ if (codeset_name == NULL || codeset_name[0] == '\0')
+ codeset_name = getenv ("LANG");
+ if (codeset_name == NULL)
+ codeset_name = "";
+ else if (strchr (codeset_name, '.') != NULL)
+ codeset_name = strchr (codeset_name, '.') + 1;
+# endif
+
+ if (strcasecmp (codeset_name, "UTF-8") == 0
+ || strcasecmp (codeset_name, "UTF8") == 0)
+ dfa->is_utf8 = 1;
+
+ /* We check exhaustively in the loop below if this charset is a
+ superset of ASCII. */
+ dfa->map_notascii = 0;
+#endif
+
+#ifdef RE_ENABLE_I18N
+ if (dfa->mb_cur_max > 1)
+ {
+ if (dfa->is_utf8)
+ dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map;
+ else
+ {
+ int i, j, ch;
+
+ dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
+ if (BE (dfa->sb_char == NULL, 0))
+ return REG_ESPACE;
+
+ /* Set the bits corresponding to single byte chars. */
+ for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
+ for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
+ {
+ wint_t wch = __btowc (ch);
+ if (wch != WEOF)
+ dfa->sb_char[i] |= (bitset_word_t) 1 << j;
+# ifndef _LIBC
+ if (isascii (ch) && wch != ch)
+ dfa->map_notascii = 1;
+# endif
+ }
+ }
+ }
+#endif
+
+ if (BE (dfa->nodes == NULL || dfa->state_table == NULL, 0))
+ return REG_ESPACE;
+ return REG_NOERROR;
+}
+
+/* Initialize WORD_CHAR table, which indicate which character is
+ "word". In this case "word" means that it is the word construction
+ character used by some operators like "\<", "\>", etc. */
+
+static void
+internal_function
+init_word_char (re_dfa_t *dfa)
+{
+ int i, j, ch;
+ dfa->word_ops_used = 1;
+ for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
+ for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
+ if (isalnum (ch) || ch == '_')
+ dfa->word_char[i] |= (bitset_word_t) 1 << j;
+}
+
+/* Free the work area which are only used while compiling. */
+
+static void
+free_workarea_compile (regex_t *preg)
+{
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ bin_tree_storage_t *storage, *next;
+ for (storage = dfa->str_tree_storage; storage; storage = next)
+ {
+ next = storage->next;
+ re_free (storage);
+ }
+ dfa->str_tree_storage = NULL;
+ dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
+ dfa->str_tree = NULL;
+ re_free (dfa->org_indices);
+ dfa->org_indices = NULL;
+}
+
+/* Create initial states for all contexts. */
+
+static reg_errcode_t
+create_initial_state (re_dfa_t *dfa)
+{
+ int first, i;
+ reg_errcode_t err;
+ re_node_set init_nodes;
+
+ /* Initial states have the epsilon closure of the node which is
+ the first node of the regular expression. */
+ first = dfa->str_tree->first->node_idx;
+ dfa->init_node = first;
+ err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+
+ /* The back-references which are in initial states can epsilon transit,
+ since in this case all of the subexpressions can be null.
+ Then we add epsilon closures of the nodes which are the next nodes of
+ the back-references. */
+ if (dfa->nbackref > 0)
+ for (i = 0; i < init_nodes.nelem; ++i)
+ {
+ int node_idx = init_nodes.elems[i];
+ re_token_type_t type = dfa->nodes[node_idx].type;
+
+ int clexp_idx;
+ if (type != OP_BACK_REF)
+ continue;
+ for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx)
+ {
+ re_token_t *clexp_node;
+ clexp_node = dfa->nodes + init_nodes.elems[clexp_idx];
+ if (clexp_node->type == OP_CLOSE_SUBEXP
+ && clexp_node->opr.idx == dfa->nodes[node_idx].opr.idx)
+ break;
+ }
+ if (clexp_idx == init_nodes.nelem)
+ continue;
+
+ if (type == OP_BACK_REF)
+ {
+ int dest_idx = dfa->edests[node_idx].elems[0];
+ if (!re_node_set_contains (&init_nodes, dest_idx))
+ {
+ re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx);
+ i = 0;
+ }
+ }
+ }
+
+ /* It must be the first time to invoke acquire_state. */
+ dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0);
+ /* We don't check ERR here, since the initial state must not be NULL. */
+ if (BE (dfa->init_state == NULL, 0))
+ return err;
+ if (dfa->init_state->has_constraint)
+ {
+ dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes,
+ CONTEXT_WORD);
+ dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes,
+ CONTEXT_NEWLINE);
+ dfa->init_state_begbuf = re_acquire_state_context (&err, dfa,
+ &init_nodes,
+ CONTEXT_NEWLINE
+ | CONTEXT_BEGBUF);
+ if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL
+ || dfa->init_state_begbuf == NULL, 0))
+ return err;
+ }
+ else
+ dfa->init_state_word = dfa->init_state_nl
+ = dfa->init_state_begbuf = dfa->init_state;
+
+ re_node_set_free (&init_nodes);
+ return REG_NOERROR;
+}
+
+#ifdef RE_ENABLE_I18N
+/* If it is possible to do searching in single byte encoding instead of UTF-8
+ to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change
+ DFA nodes where needed. */
+
+static void
+optimize_utf8 (re_dfa_t *dfa)
+{
+ int node, i, mb_chars = 0, has_period = 0;
+
+ for (node = 0; node < dfa->nodes_len; ++node)
+ switch (dfa->nodes[node].type)
+ {
+ case CHARACTER:
+ if (dfa->nodes[node].opr.c >= 0x80)
+ mb_chars = 1;
+ break;
+ case ANCHOR:
+ switch (dfa->nodes[node].opr.ctx_type)
+ {
+ case LINE_FIRST:
+ case LINE_LAST:
+ case BUF_FIRST:
+ case BUF_LAST:
+ break;
+ default:
+ /* Word anchors etc. cannot be handled. It's okay to test
+ opr.ctx_type since constraints (for all DFA nodes) are
+ created by ORing one or more opr.ctx_type values. */
+ return;
+ }
+ break;
+ case OP_PERIOD:
+ has_period = 1;
+ break;
+ case OP_BACK_REF:
+ case OP_ALT:
+ case END_OF_RE:
+ case OP_DUP_ASTERISK:
+ case OP_OPEN_SUBEXP:
+ case OP_CLOSE_SUBEXP:
+ break;
+ case COMPLEX_BRACKET:
+ return;
+ case SIMPLE_BRACKET:
+ /* Just double check. The non-ASCII range starts at 0x80. */
+ assert (0x80 % BITSET_WORD_BITS == 0);
+ for (i = 0x80 / BITSET_WORD_BITS; i < BITSET_WORDS; ++i)
+ if (dfa->nodes[node].opr.sbcset[i])
+ return;
+ break;
+ default:
+ abort ();
+ }
+
+ if (mb_chars || has_period)
+ for (node = 0; node < dfa->nodes_len; ++node)
+ {
+ if (dfa->nodes[node].type == CHARACTER
+ && dfa->nodes[node].opr.c >= 0x80)
+ dfa->nodes[node].mb_partial = 0;
+ else if (dfa->nodes[node].type == OP_PERIOD)
+ dfa->nodes[node].type = OP_UTF8_PERIOD;
+ }
+
+ /* The search can be in single byte locale. */
+ dfa->mb_cur_max = 1;
+ dfa->is_utf8 = 0;
+ dfa->has_mb_node = dfa->nbackref > 0 || has_period;
+}
+#endif
+
+/* Analyze the structure tree, and calculate "first", "next", "edest",
+ "eclosure", and "inveclosure". */
+
+static reg_errcode_t
+analyze (regex_t *preg)
+{
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ reg_errcode_t ret;
+
+ /* Allocate arrays. */
+ dfa->nexts = re_malloc (int, dfa->nodes_alloc);
+ dfa->org_indices = re_malloc (int, dfa->nodes_alloc);
+ dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc);
+ dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc);
+ if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL
+ || dfa->eclosures == NULL, 0))
+ return REG_ESPACE;
+
+ dfa->subexp_map = re_malloc (int, preg->re_nsub);
+ if (dfa->subexp_map != NULL)
+ {
+ int i;
+ for (i = 0; i < preg->re_nsub; i++)
+ dfa->subexp_map[i] = i;
+ preorder (dfa->str_tree, optimize_subexps, dfa);
+ for (i = 0; i < preg->re_nsub; i++)
+ if (dfa->subexp_map[i] != i)
+ break;
+ if (i == preg->re_nsub)
+ {
+ free (dfa->subexp_map);
+ dfa->subexp_map = NULL;
+ }
+ }
+
+ ret = postorder (dfa->str_tree, lower_subexps, preg);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+ ret = postorder (dfa->str_tree, calc_first, dfa);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+ preorder (dfa->str_tree, calc_next, dfa);
+ ret = preorder (dfa->str_tree, link_nfa_nodes, dfa);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+ ret = calc_eclosure (dfa);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+
+ /* We only need this during the prune_impossible_nodes pass in regexec.c;
+ skip it if p_i_n will not run, as calc_inveclosure can be quadratic. */
+ if ((!preg->no_sub && preg->re_nsub > 0 && dfa->has_plural_match)
+ || dfa->nbackref)
+ {
+ dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len);
+ if (BE (dfa->inveclosures == NULL, 0))
+ return REG_ESPACE;
+ ret = calc_inveclosure (dfa);
+ }
+
+ return ret;
+}
+
+/* Our parse trees are very unbalanced, so we cannot use a stack to
+ implement parse tree visits. Instead, we use parent pointers and
+ some hairy code in these two functions. */
+static reg_errcode_t
+postorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
+ void *extra)
+{
+ bin_tree_t *node, *prev;
+
+ for (node = root; ; )
+ {
+ /* Descend down the tree, preferably to the left (or to the right
+ if that's the only child). */
+ while (node->left || node->right)
+ if (node->left)
+ node = node->left;
+ else
+ node = node->right;
+
+ do
+ {
+ reg_errcode_t err = fn (extra, node);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ if (node->parent == NULL)
+ return REG_NOERROR;
+ prev = node;
+ node = node->parent;
+ }
+ /* Go up while we have a node that is reached from the right. */
+ while (node->right == prev || node->right == NULL);
+ node = node->right;
+ }
+}
+
+static reg_errcode_t
+preorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
+ void *extra)
+{
+ bin_tree_t *node;
+
+ for (node = root; ; )
+ {
+ reg_errcode_t err = fn (extra, node);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+
+ /* Go to the left node, or up and to the right. */
+ if (node->left)
+ node = node->left;
+ else
+ {
+ bin_tree_t *prev = NULL;
+ while (node->right == prev || node->right == NULL)
+ {
+ prev = node;
+ node = node->parent;
+ if (!node)
+ return REG_NOERROR;
+ }
+ node = node->right;
+ }
+ }
+}
+
+/* Optimization pass: if a SUBEXP is entirely contained, strip it and tell
+ re_search_internal to map the inner one's opr.idx to this one's. Adjust
+ backreferences as well. Requires a preorder visit. */
+static reg_errcode_t
+optimize_subexps (void *extra, bin_tree_t *node)
+{
+ re_dfa_t *dfa = (re_dfa_t *) extra;
+
+ if (node->token.type == OP_BACK_REF && dfa->subexp_map)
+ {
+ int idx = node->token.opr.idx;
+ node->token.opr.idx = dfa->subexp_map[idx];
+ dfa->used_bkref_map |= 1 << node->token.opr.idx;
+ }
+
+ else if (node->token.type == SUBEXP
+ && node->left && node->left->token.type == SUBEXP)
+ {
+ int other_idx = node->left->token.opr.idx;
+
+ node->left = node->left->left;
+ if (node->left)
+ node->left->parent = node;
+
+ dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx];
+ if (other_idx < BITSET_WORD_BITS)
+ dfa->used_bkref_map &= ~((bitset_word_t) 1 << other_idx);
+ }
+
+ return REG_NOERROR;
+}
+
+/* Lowering pass: Turn each SUBEXP node into the appropriate concatenation
+ of OP_OPEN_SUBEXP, the body of the SUBEXP (if any) and OP_CLOSE_SUBEXP. */
+static reg_errcode_t
+lower_subexps (void *extra, bin_tree_t *node)
+{
+ regex_t *preg = (regex_t *) extra;
+ reg_errcode_t err = REG_NOERROR;
+
+ if (node->left && node->left->token.type == SUBEXP)
+ {
+ node->left = lower_subexp (&err, preg, node->left);
+ if (node->left)
+ node->left->parent = node;
+ }
+ if (node->right && node->right->token.type == SUBEXP)
+ {
+ node->right = lower_subexp (&err, preg, node->right);
+ if (node->right)
+ node->right->parent = node;
+ }
+
+ return err;
+}
+
+static bin_tree_t *
+lower_subexp (reg_errcode_t *err, regex_t *preg, bin_tree_t *node)
+{
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ bin_tree_t *body = node->left;
+ bin_tree_t *op, *cls, *tree1, *tree;
+
+ if (preg->no_sub
+ /* We do not optimize empty subexpressions, because otherwise we may
+ have bad CONCAT nodes with NULL children. This is obviously not
+ very common, so we do not lose much. An example that triggers
+ this case is the sed "script" /\(\)/x. */
+ && node->left != NULL
+ && (node->token.opr.idx >= BITSET_WORD_BITS
+ || !(dfa->used_bkref_map
+ & ((bitset_word_t) 1 << node->token.opr.idx))))
+ return node->left;
+
+ /* Convert the SUBEXP node to the concatenation of an
+ OP_OPEN_SUBEXP, the contents, and an OP_CLOSE_SUBEXP. */
+ op = create_tree (dfa, NULL, NULL, OP_OPEN_SUBEXP);
+ cls = create_tree (dfa, NULL, NULL, OP_CLOSE_SUBEXP);
+ tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls;
+ tree = create_tree (dfa, op, tree1, CONCAT);
+ if (BE (tree == NULL || tree1 == NULL || op == NULL || cls == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+
+ op->token.opr.idx = cls->token.opr.idx = node->token.opr.idx;
+ op->token.opt_subexp = cls->token.opt_subexp = node->token.opt_subexp;
+ return tree;
+}
+
+/* Pass 1 in building the NFA: compute FIRST and create unlinked automaton
+ nodes. Requires a postorder visit. */
+static reg_errcode_t
+calc_first (void *extra, bin_tree_t *node)
+{
+ re_dfa_t *dfa = (re_dfa_t *) extra;
+ if (node->token.type == CONCAT)
+ {
+ node->first = node->left->first;
+ node->node_idx = node->left->node_idx;
+ }
+ else
+ {
+ node->first = node;
+ node->node_idx = re_dfa_add_node (dfa, node->token);
+ if (BE (node->node_idx == -1, 0))
+ return REG_ESPACE;
+ if (node->token.type == ANCHOR)
+ dfa->nodes[node->node_idx].constraint = node->token.opr.ctx_type;
+ }
+ return REG_NOERROR;
+}
+
+/* Pass 2: compute NEXT on the tree. Preorder visit. */
+static reg_errcode_t
+calc_next (void *extra, bin_tree_t *node)
+{
+ switch (node->token.type)
+ {
+ case OP_DUP_ASTERISK:
+ node->left->next = node;
+ break;
+ case CONCAT:
+ node->left->next = node->right->first;
+ node->right->next = node->next;
+ break;
+ default:
+ if (node->left)
+ node->left->next = node->next;
+ if (node->right)
+ node->right->next = node->next;
+ break;
+ }
+ return REG_NOERROR;
+}
+
+/* Pass 3: link all DFA nodes to their NEXT node (any order will do). */
+static reg_errcode_t
+link_nfa_nodes (void *extra, bin_tree_t *node)
+{
+ re_dfa_t *dfa = (re_dfa_t *) extra;
+ int idx = node->node_idx;
+ reg_errcode_t err = REG_NOERROR;
+
+ switch (node->token.type)
+ {
+ case CONCAT:
+ break;
+
+ case END_OF_RE:
+ assert (node->next == NULL);
+ break;
+
+ case OP_DUP_ASTERISK:
+ case OP_ALT:
+ {
+ int left, right;
+ dfa->has_plural_match = 1;
+ if (node->left != NULL)
+ left = node->left->first->node_idx;
+ else
+ left = node->next->node_idx;
+ if (node->right != NULL)
+ right = node->right->first->node_idx;
+ else
+ right = node->next->node_idx;
+ assert (left > -1);
+ assert (right > -1);
+ err = re_node_set_init_2 (dfa->edests + idx, left, right);
+ }
+ break;
+
+ case ANCHOR:
+ case OP_OPEN_SUBEXP:
+ case OP_CLOSE_SUBEXP:
+ err = re_node_set_init_1 (dfa->edests + idx, node->next->node_idx);
+ break;
+
+ case OP_BACK_REF:
+ dfa->nexts[idx] = node->next->node_idx;
+ if (node->token.type == OP_BACK_REF)
+ re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]);
+ break;
+
+ default:
+ assert (!IS_EPSILON_NODE (node->token.type));
+ dfa->nexts[idx] = node->next->node_idx;
+ break;
+ }
+
+ return err;
+}
+
+/* Duplicate the epsilon closure of the node ROOT_NODE.
+ Note that duplicated nodes have constraint INIT_CONSTRAINT in addition
+ to their own constraint. */
+
+static reg_errcode_t
+internal_function
+duplicate_node_closure (re_dfa_t *dfa, int top_org_node, int top_clone_node,
+ int root_node, unsigned int init_constraint)
+{
+ int org_node, clone_node, ret;
+ unsigned int constraint = init_constraint;
+ for (org_node = top_org_node, clone_node = top_clone_node;;)
+ {
+ int org_dest, clone_dest;
+ if (dfa->nodes[org_node].type == OP_BACK_REF)
+ {
+ /* If the back reference epsilon-transit, its destination must
+ also have the constraint. Then duplicate the epsilon closure
+ of the destination of the back reference, and store it in
+ edests of the back reference. */
+ org_dest = dfa->nexts[org_node];
+ re_node_set_empty (dfa->edests + clone_node);
+ clone_dest = duplicate_node (dfa, org_dest, constraint);
+ if (BE (clone_dest == -1, 0))
+ return REG_ESPACE;
+ dfa->nexts[clone_node] = dfa->nexts[org_node];
+ ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+ if (BE (ret < 0, 0))
+ return REG_ESPACE;
+ }
+ else if (dfa->edests[org_node].nelem == 0)
+ {
+ /* In case of the node can't epsilon-transit, don't duplicate the
+ destination and store the original destination as the
+ destination of the node. */
+ dfa->nexts[clone_node] = dfa->nexts[org_node];
+ break;
+ }
+ else if (dfa->edests[org_node].nelem == 1)
+ {
+ /* In case of the node can epsilon-transit, and it has only one
+ destination. */
+ org_dest = dfa->edests[org_node].elems[0];
+ re_node_set_empty (dfa->edests + clone_node);
+ /* If the node is root_node itself, it means the epsilon clsoure
+ has a loop. Then tie it to the destination of the root_node. */
+ if (org_node == root_node && clone_node != org_node)
+ {
+ ret = re_node_set_insert (dfa->edests + clone_node, org_dest);
+ if (BE (ret < 0, 0))
+ return REG_ESPACE;
+ break;
+ }
+ /* In case of the node has another constraint, add it. */
+ constraint |= dfa->nodes[org_node].constraint;
+ clone_dest = duplicate_node (dfa, org_dest, constraint);
+ if (BE (clone_dest == -1, 0))
+ return REG_ESPACE;
+ ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+ if (BE (ret < 0, 0))
+ return REG_ESPACE;
+ }
+ else /* dfa->edests[org_node].nelem == 2 */
+ {
+ /* In case of the node can epsilon-transit, and it has two
+ destinations. In the bin_tree_t and DFA, that's '|' and '*'. */
+ org_dest = dfa->edests[org_node].elems[0];
+ re_node_set_empty (dfa->edests + clone_node);
+ /* Search for a duplicated node which satisfies the constraint. */
+ clone_dest = search_duplicated_node (dfa, org_dest, constraint);
+ if (clone_dest == -1)
+ {
+ /* There is no such duplicated node, create a new one. */
+ reg_errcode_t err;
+ clone_dest = duplicate_node (dfa, org_dest, constraint);
+ if (BE (clone_dest == -1, 0))
+ return REG_ESPACE;
+ ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+ if (BE (ret < 0, 0))
+ return REG_ESPACE;
+ err = duplicate_node_closure (dfa, org_dest, clone_dest,
+ root_node, constraint);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ else
+ {
+ /* There is a duplicated node which satisfies the constraint,
+ use it to avoid infinite loop. */
+ ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+ if (BE (ret < 0, 0))
+ return REG_ESPACE;
+ }
+
+ org_dest = dfa->edests[org_node].elems[1];
+ clone_dest = duplicate_node (dfa, org_dest, constraint);
+ if (BE (clone_dest == -1, 0))
+ return REG_ESPACE;
+ ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+ if (BE (ret < 0, 0))
+ return REG_ESPACE;
+ }
+ org_node = org_dest;
+ clone_node = clone_dest;
+ }
+ return REG_NOERROR;
+}
+
+/* Search for a node which is duplicated from the node ORG_NODE, and
+ satisfies the constraint CONSTRAINT. */
+
+static int
+search_duplicated_node (const re_dfa_t *dfa, int org_node,
+ unsigned int constraint)
+{
+ int idx;
+ for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx)
+ {
+ if (org_node == dfa->org_indices[idx]
+ && constraint == dfa->nodes[idx].constraint)
+ return idx; /* Found. */
+ }
+ return -1; /* Not found. */
+}
+
+/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.
+ Return the index of the new node, or -1 if insufficient storage is
+ available. */
+
+static int
+duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint)
+{
+ int dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]);
+ if (BE (dup_idx != -1, 1))
+ {
+ dfa->nodes[dup_idx].constraint = constraint;
+ dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].constraint;
+ dfa->nodes[dup_idx].duplicated = 1;
+
+ /* Store the index of the original node. */
+ dfa->org_indices[dup_idx] = org_idx;
+ }
+ return dup_idx;
+}
+
+static reg_errcode_t
+calc_inveclosure (re_dfa_t *dfa)
+{
+ int src, idx, ret;
+ for (idx = 0; idx < dfa->nodes_len; ++idx)
+ re_node_set_init_empty (dfa->inveclosures + idx);
+
+ for (src = 0; src < dfa->nodes_len; ++src)
+ {
+ int *elems = dfa->eclosures[src].elems;
+ for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx)
+ {
+ ret = re_node_set_insert_last (dfa->inveclosures + elems[idx], src);
+ if (BE (ret == -1, 0))
+ return REG_ESPACE;
+ }
+ }
+
+ return REG_NOERROR;
+}
+
+/* Calculate "eclosure" for all the node in DFA. */
+
+static reg_errcode_t
+calc_eclosure (re_dfa_t *dfa)
+{
+ int node_idx, incomplete;
+#ifdef DEBUG
+ assert (dfa->nodes_len > 0);
+#endif
+ incomplete = 0;
+ /* For each nodes, calculate epsilon closure. */
+ for (node_idx = 0; ; ++node_idx)
+ {
+ reg_errcode_t err;
+ re_node_set eclosure_elem;
+ if (node_idx == dfa->nodes_len)
+ {
+ if (!incomplete)
+ break;
+ incomplete = 0;
+ node_idx = 0;
+ }
+
+#ifdef DEBUG
+ assert (dfa->eclosures[node_idx].nelem != -1);
+#endif
+
+ /* If we have already calculated, skip it. */
+ if (dfa->eclosures[node_idx].nelem != 0)
+ continue;
+ /* Calculate epsilon closure of `node_idx'. */
+ err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+
+ if (dfa->eclosures[node_idx].nelem == 0)
+ {
+ incomplete = 1;
+ re_node_set_free (&eclosure_elem);
+ }
+ }
+ return REG_NOERROR;
+}
+
+/* Calculate epsilon closure of NODE. */
+
+static reg_errcode_t
+calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, int node, int root)
+{
+ reg_errcode_t err;
+ int i, incomplete;
+ re_node_set eclosure;
+ incomplete = 0;
+ err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+
+ /* This indicates that we are calculating this node now.
+ We reference this value to avoid infinite loop. */
+ dfa->eclosures[node].nelem = -1;
+
+ /* If the current node has constraints, duplicate all nodes
+ since they must inherit the constraints. */
+ if (dfa->nodes[node].constraint
+ && dfa->edests[node].nelem
+ && !dfa->nodes[dfa->edests[node].elems[0]].duplicated)
+ {
+ err = duplicate_node_closure (dfa, node, node, node,
+ dfa->nodes[node].constraint);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+
+ /* Expand each epsilon destination nodes. */
+ if (IS_EPSILON_NODE(dfa->nodes[node].type))
+ for (i = 0; i < dfa->edests[node].nelem; ++i)
+ {
+ re_node_set eclosure_elem;
+ int edest = dfa->edests[node].elems[i];
+ /* If calculating the epsilon closure of `edest' is in progress,
+ return intermediate result. */
+ if (dfa->eclosures[edest].nelem == -1)
+ {
+ incomplete = 1;
+ continue;
+ }
+ /* If we haven't calculated the epsilon closure of `edest' yet,
+ calculate now. Otherwise use calculated epsilon closure. */
+ if (dfa->eclosures[edest].nelem == 0)
+ {
+ err = calc_eclosure_iter (&eclosure_elem, dfa, edest, 0);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ else
+ eclosure_elem = dfa->eclosures[edest];
+ /* Merge the epsilon closure of `edest'. */
+ re_node_set_merge (&eclosure, &eclosure_elem);
+ /* If the epsilon closure of `edest' is incomplete,
+ the epsilon closure of this node is also incomplete. */
+ if (dfa->eclosures[edest].nelem == 0)
+ {
+ incomplete = 1;
+ re_node_set_free (&eclosure_elem);
+ }
+ }
+
+ /* Epsilon closures include itself. */
+ re_node_set_insert (&eclosure, node);
+ if (incomplete && !root)
+ dfa->eclosures[node].nelem = 0;
+ else
+ dfa->eclosures[node] = eclosure;
+ *new_set = eclosure;
+ return REG_NOERROR;
+}
+
+/* Functions for token which are used in the parser. */
+
+/* Fetch a token from INPUT.
+ We must not use this function inside bracket expressions. */
+
+static void
+internal_function
+fetch_token (re_token_t *result, re_string_t *input, reg_syntax_t syntax)
+{
+ re_string_skip_bytes (input, peek_token (result, input, syntax));
+}
+
+/* Peek a token from INPUT, and return the length of the token.
+ We must not use this function inside bracket expressions. */
+
+static int
+internal_function
+peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
+{
+ unsigned char c;
+
+ if (re_string_eoi (input))
+ {
+ token->type = END_OF_RE;
+ return 0;
+ }
+
+ c = re_string_peek_byte (input, 0);
+ token->opr.c = c;
+
+ token->word_char = 0;
+#ifdef RE_ENABLE_I18N
+ token->mb_partial = 0;
+ if (input->mb_cur_max > 1 &&
+ !re_string_first_byte (input, re_string_cur_idx (input)))
+ {
+ token->type = CHARACTER;
+ token->mb_partial = 1;
+ return 1;
+ }
+#endif
+ if (c == '\\')
+ {
+ unsigned char c2;
+ if (re_string_cur_idx (input) + 1 >= re_string_length (input))
+ {
+ token->type = BACK_SLASH;
+ return 1;
+ }
+
+ c2 = re_string_peek_byte_case (input, 1);
+ token->opr.c = c2;
+ token->type = CHARACTER;
+#ifdef RE_ENABLE_I18N
+ if (input->mb_cur_max > 1)
+ {
+ wint_t wc = re_string_wchar_at (input,
+ re_string_cur_idx (input) + 1);
+ token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
+ }
+ else
+#endif
+ token->word_char = IS_WORD_CHAR (c2) != 0;
+
+ switch (c2)
+ {
+ case '|':
+ if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR))
+ token->type = OP_ALT;
+ break;
+ case '1': case '2': case '3': case '4': case '5':
+ case '6': case '7': case '8': case '9':
+ if (!(syntax & RE_NO_BK_REFS))
+ {
+ token->type = OP_BACK_REF;
+ token->opr.idx = c2 - '1';
+ }
+ break;
+ case '<':
+ if (!(syntax & RE_NO_GNU_OPS))
+ {
+ token->type = ANCHOR;
+ token->opr.ctx_type = WORD_FIRST;
+ }
+ break;
+ case '>':
+ if (!(syntax & RE_NO_GNU_OPS))
+ {
+ token->type = ANCHOR;
+ token->opr.ctx_type = WORD_LAST;
+ }
+ break;
+ case 'b':
+ if (!(syntax & RE_NO_GNU_OPS))
+ {
+ token->type = ANCHOR;
+ token->opr.ctx_type = WORD_DELIM;
+ }
+ break;
+ case 'B':
+ if (!(syntax & RE_NO_GNU_OPS))
+ {
+ token->type = ANCHOR;
+ token->opr.ctx_type = NOT_WORD_DELIM;
+ }
+ break;
+ case 'w':
+ if (!(syntax & RE_NO_GNU_OPS))
+ token->type = OP_WORD;
+ break;
+ case 'W':
+ if (!(syntax & RE_NO_GNU_OPS))
+ token->type = OP_NOTWORD;
+ break;
+ case 's':
+ if (!(syntax & RE_NO_GNU_OPS))
+ token->type = OP_SPACE;
+ break;
+ case 'S':
+ if (!(syntax & RE_NO_GNU_OPS))
+ token->type = OP_NOTSPACE;
+ break;
+ case '`':
+ if (!(syntax & RE_NO_GNU_OPS))
+ {
+ token->type = ANCHOR;
+ token->opr.ctx_type = BUF_FIRST;
+ }
+ break;
+ case '\'':
+ if (!(syntax & RE_NO_GNU_OPS))
+ {
+ token->type = ANCHOR;
+ token->opr.ctx_type = BUF_LAST;
+ }
+ break;
+ case '(':
+ if (!(syntax & RE_NO_BK_PARENS))
+ token->type = OP_OPEN_SUBEXP;
+ break;
+ case ')':
+ if (!(syntax & RE_NO_BK_PARENS))
+ token->type = OP_CLOSE_SUBEXP;
+ break;
+ case '+':
+ if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
+ token->type = OP_DUP_PLUS;
+ break;
+ case '?':
+ if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
+ token->type = OP_DUP_QUESTION;
+ break;
+ case '{':
+ if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
+ token->type = OP_OPEN_DUP_NUM;
+ break;
+ case '}':
+ if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
+ token->type = OP_CLOSE_DUP_NUM;
+ break;
+ default:
+ break;
+ }
+ return 2;
+ }
+
+ token->type = CHARACTER;
+#ifdef RE_ENABLE_I18N
+ if (input->mb_cur_max > 1)
+ {
+ wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input));
+ token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
+ }
+ else
+#endif
+ token->word_char = IS_WORD_CHAR (token->opr.c);
+
+ switch (c)
+ {
+ case '\n':
+ if (syntax & RE_NEWLINE_ALT)
+ token->type = OP_ALT;
+ break;
+ case '|':
+ if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR))
+ token->type = OP_ALT;
+ break;
+ case '*':
+ token->type = OP_DUP_ASTERISK;
+ break;
+ case '+':
+ if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
+ token->type = OP_DUP_PLUS;
+ break;
+ case '?':
+ if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
+ token->type = OP_DUP_QUESTION;
+ break;
+ case '{':
+ if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
+ token->type = OP_OPEN_DUP_NUM;
+ break;
+ case '}':
+ if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
+ token->type = OP_CLOSE_DUP_NUM;
+ break;
+ case '(':
+ if (syntax & RE_NO_BK_PARENS)
+ token->type = OP_OPEN_SUBEXP;
+ break;
+ case ')':
+ if (syntax & RE_NO_BK_PARENS)
+ token->type = OP_CLOSE_SUBEXP;
+ break;
+ case '[':
+ token->type = OP_OPEN_BRACKET;
+ break;
+ case '.':
+ token->type = OP_PERIOD;
+ break;
+ case '^':
+ if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS | RE_CARET_ANCHORS_HERE)) &&
+ re_string_cur_idx (input) != 0)
+ {
+ char prev = re_string_peek_byte (input, -1);
+ if (!(syntax & RE_NEWLINE_ALT) || prev != '\n')
+ break;
+ }
+ token->type = ANCHOR;
+ token->opr.ctx_type = LINE_FIRST;
+ break;
+ case '$':
+ if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) &&
+ re_string_cur_idx (input) + 1 != re_string_length (input))
+ {
+ re_token_t next;
+ re_string_skip_bytes (input, 1);
+ peek_token (&next, input, syntax);
+ re_string_skip_bytes (input, -1);
+ if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP)
+ break;
+ }
+ token->type = ANCHOR;
+ token->opr.ctx_type = LINE_LAST;
+ break;
+ default:
+ break;
+ }
+ return 1;
+}
+
+/* Peek a token from INPUT, and return the length of the token.
+ We must not use this function out of bracket expressions. */
+
+static int
+internal_function
+peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
+{
+ unsigned char c;
+ if (re_string_eoi (input))
+ {
+ token->type = END_OF_RE;
+ return 0;
+ }
+ c = re_string_peek_byte (input, 0);
+ token->opr.c = c;
+
+#ifdef RE_ENABLE_I18N
+ if (input->mb_cur_max > 1 &&
+ !re_string_first_byte (input, re_string_cur_idx (input)))
+ {
+ token->type = CHARACTER;
+ return 1;
+ }
+#endif /* RE_ENABLE_I18N */
+
+ if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS)
+ && re_string_cur_idx (input) + 1 < re_string_length (input))
+ {
+ /* In this case, '\' escape a character. */
+ unsigned char c2;
+ re_string_skip_bytes (input, 1);
+ c2 = re_string_peek_byte (input, 0);
+ token->opr.c = c2;
+ token->type = CHARACTER;
+ return 1;
+ }
+ if (c == '[') /* '[' is a special char in a bracket exps. */
+ {
+ unsigned char c2;
+ int token_len;
+ if (re_string_cur_idx (input) + 1 < re_string_length (input))
+ c2 = re_string_peek_byte (input, 1);
+ else
+ c2 = 0;
+ token->opr.c = c2;
+ token_len = 2;
+ switch (c2)
+ {
+ case '.':
+ token->type = OP_OPEN_COLL_ELEM;
+ break;
+ case '=':
+ token->type = OP_OPEN_EQUIV_CLASS;
+ break;
+ case ':':
+ if (syntax & RE_CHAR_CLASSES)
+ {
+ token->type = OP_OPEN_CHAR_CLASS;
+ break;
+ }
+ /* else fall through. */
+ default:
+ token->type = CHARACTER;
+ token->opr.c = c;
+ token_len = 1;
+ break;
+ }
+ return token_len;
+ }
+ switch (c)
+ {
+ case '-':
+ token->type = OP_CHARSET_RANGE;
+ break;
+ case ']':
+ token->type = OP_CLOSE_BRACKET;
+ break;
+ case '^':
+ token->type = OP_NON_MATCH_LIST;
+ break;
+ default:
+ token->type = CHARACTER;
+ }
+ return 1;
+}
+
+/* Functions for parser. */
+
+/* Entry point of the parser.
+ Parse the regular expression REGEXP and return the structure tree.
+ If an error is occured, ERR is set by error code, and return NULL.
+ This function build the following tree, from regular expression <reg_exp>:
+ CAT
+ / \
+ / \
+ <reg_exp> EOR
+
+ CAT means concatenation.
+ EOR means end of regular expression. */
+
+static bin_tree_t *
+parse (re_string_t *regexp, regex_t *preg, reg_syntax_t syntax,
+ reg_errcode_t *err)
+{
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ bin_tree_t *tree, *eor, *root;
+ re_token_t current_token;
+ dfa->syntax = syntax;
+ fetch_token (&current_token, regexp, syntax | RE_CARET_ANCHORS_HERE);
+ tree = parse_reg_exp (regexp, preg, &current_token, syntax, 0, err);
+ if (BE (*err != REG_NOERROR && tree == NULL, 0))
+ return NULL;
+ eor = create_tree (dfa, NULL, NULL, END_OF_RE);
+ if (tree != NULL)
+ root = create_tree (dfa, tree, eor, CONCAT);
+ else
+ root = eor;
+ if (BE (eor == NULL || root == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ return root;
+}
+
+/* This function build the following tree, from regular expression
+ <branch1>|<branch2>:
+ ALT
+ / \
+ / \
+ <branch1> <branch2>
+
+ ALT means alternative, which represents the operator `|'. */
+
+static bin_tree_t *
+parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
+ reg_syntax_t syntax, int nest, reg_errcode_t *err)
+{
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ bin_tree_t *tree, *branch = NULL;
+ tree = parse_branch (regexp, preg, token, syntax, nest, err);
+ if (BE (*err != REG_NOERROR && tree == NULL, 0))
+ return NULL;
+
+ while (token->type == OP_ALT)
+ {
+ fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE);
+ if (token->type != OP_ALT && token->type != END_OF_RE
+ && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
+ {
+ branch = parse_branch (regexp, preg, token, syntax, nest, err);
+ if (BE (*err != REG_NOERROR && branch == NULL, 0))
+ return NULL;
+ }
+ else
+ branch = NULL;
+ tree = create_tree (dfa, tree, branch, OP_ALT);
+ if (BE (tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ }
+ return tree;
+}
+
+/* This function build the following tree, from regular expression
+ <exp1><exp2>:
+ CAT
+ / \
+ / \
+ <exp1> <exp2>
+
+ CAT means concatenation. */
+
+static bin_tree_t *
+parse_branch (re_string_t *regexp, regex_t *preg, re_token_t *token,
+ reg_syntax_t syntax, int nest, reg_errcode_t *err)
+{
+ bin_tree_t *tree, *exp;
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ tree = parse_expression (regexp, preg, token, syntax, nest, err);
+ if (BE (*err != REG_NOERROR && tree == NULL, 0))
+ return NULL;
+
+ while (token->type != OP_ALT && token->type != END_OF_RE
+ && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
+ {
+ exp = parse_expression (regexp, preg, token, syntax, nest, err);
+ if (BE (*err != REG_NOERROR && exp == NULL, 0))
+ {
+ return NULL;
+ }
+ if (tree != NULL && exp != NULL)
+ {
+ tree = create_tree (dfa, tree, exp, CONCAT);
+ if (tree == NULL)
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ }
+ else if (tree == NULL)
+ tree = exp;
+ /* Otherwise exp == NULL, we don't need to create new tree. */
+ }
+ return tree;
+}
+
+/* This function build the following tree, from regular expression a*:
+ *
+ |
+ a
+*/
+
+static bin_tree_t *
+parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
+ reg_syntax_t syntax, int nest, reg_errcode_t *err)
+{
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ bin_tree_t *tree;
+ switch (token->type)
+ {
+ case CHARACTER:
+ tree = create_token_tree (dfa, NULL, NULL, token);
+ if (BE (tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+#ifdef RE_ENABLE_I18N
+ if (dfa->mb_cur_max > 1)
+ {
+ while (!re_string_eoi (regexp)
+ && !re_string_first_byte (regexp, re_string_cur_idx (regexp)))
+ {
+ bin_tree_t *mbc_remain;
+ fetch_token (token, regexp, syntax);
+ mbc_remain = create_token_tree (dfa, NULL, NULL, token);
+ tree = create_tree (dfa, tree, mbc_remain, CONCAT);
+ if (BE (mbc_remain == NULL || tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ }
+ }
+#endif
+ break;
+ case OP_OPEN_SUBEXP:
+ tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err);
+ if (BE (*err != REG_NOERROR && tree == NULL, 0))
+ return NULL;
+ break;
+ case OP_OPEN_BRACKET:
+ tree = parse_bracket_exp (regexp, dfa, token, syntax, err);
+ if (BE (*err != REG_NOERROR && tree == NULL, 0))
+ return NULL;
+ break;
+ case OP_BACK_REF:
+ if (!BE (dfa->completed_bkref_map & (1 << token->opr.idx), 1))
+ {
+ *err = REG_ESUBREG;
+ return NULL;
+ }
+ dfa->used_bkref_map |= 1 << token->opr.idx;
+ tree = create_token_tree (dfa, NULL, NULL, token);
+ if (BE (tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ ++dfa->nbackref;
+ dfa->has_mb_node = 1;
+ break;
+ case OP_OPEN_DUP_NUM:
+ if (syntax & RE_CONTEXT_INVALID_DUP)
+ {
+ *err = REG_BADRPT;
+ return NULL;
+ }
+ /* FALLTHROUGH */
+ case OP_DUP_ASTERISK:
+ case OP_DUP_PLUS:
+ case OP_DUP_QUESTION:
+ if (syntax & RE_CONTEXT_INVALID_OPS)
+ {
+ *err = REG_BADRPT;
+ return NULL;
+ }
+ else if (syntax & RE_CONTEXT_INDEP_OPS)
+ {
+ fetch_token (token, regexp, syntax);
+ return parse_expression (regexp, preg, token, syntax, nest, err);
+ }
+ /* else fall through */
+ case OP_CLOSE_SUBEXP:
+ if ((token->type == OP_CLOSE_SUBEXP) &&
+ !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD))
+ {
+ *err = REG_ERPAREN;
+ return NULL;
+ }
+ /* else fall through */
+ case OP_CLOSE_DUP_NUM:
+ /* We treat it as a normal character. */
+
+ /* Then we can these characters as normal characters. */
+ token->type = CHARACTER;
+ /* mb_partial and word_char bits should be initialized already
+ by peek_token. */
+ tree = create_token_tree (dfa, NULL, NULL, token);
+ if (BE (tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ break;
+ case ANCHOR:
+ if ((token->opr.ctx_type
+ & (WORD_DELIM | NOT_WORD_DELIM | WORD_FIRST | WORD_LAST))
+ && dfa->word_ops_used == 0)
+ init_word_char (dfa);
+ if (token->opr.ctx_type == WORD_DELIM
+ || token->opr.ctx_type == NOT_WORD_DELIM)
+ {
+ bin_tree_t *tree_first, *tree_last;
+ if (token->opr.ctx_type == WORD_DELIM)
+ {
+ token->opr.ctx_type = WORD_FIRST;
+ tree_first = create_token_tree (dfa, NULL, NULL, token);
+ token->opr.ctx_type = WORD_LAST;
+ }
+ else
+ {
+ token->opr.ctx_type = INSIDE_WORD;
+ tree_first = create_token_tree (dfa, NULL, NULL, token);
+ token->opr.ctx_type = INSIDE_NOTWORD;
+ }
+ tree_last = create_token_tree (dfa, NULL, NULL, token);
+ tree = create_tree (dfa, tree_first, tree_last, OP_ALT);
+ if (BE (tree_first == NULL || tree_last == NULL || tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ }
+ else
+ {
+ tree = create_token_tree (dfa, NULL, NULL, token);
+ if (BE (tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ }
+ /* We must return here, since ANCHORs can't be followed
+ by repetition operators.
+ eg. RE"^*" is invalid or "<ANCHOR(^)><CHAR(*)>",
+ it must not be "<ANCHOR(^)><REPEAT(*)>". */
+ fetch_token (token, regexp, syntax);
+ return tree;
+ case OP_PERIOD:
+ tree = create_token_tree (dfa, NULL, NULL, token);
+ if (BE (tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ if (dfa->mb_cur_max > 1)
+ dfa->has_mb_node = 1;
+ break;
+ case OP_WORD:
+ case OP_NOTWORD:
+ tree = build_charclass_op (dfa, regexp->trans,
+ (const unsigned char *) "alnum",
+ (const unsigned char *) "_",
+ token->type == OP_NOTWORD, err);
+ if (BE (*err != REG_NOERROR && tree == NULL, 0))
+ return NULL;
+ break;
+ case OP_SPACE:
+ case OP_NOTSPACE:
+ tree = build_charclass_op (dfa, regexp->trans,
+ (const unsigned char *) "space",
+ (const unsigned char *) "",
+ token->type == OP_NOTSPACE, err);
+ if (BE (*err != REG_NOERROR && tree == NULL, 0))
+ return NULL;
+ break;
+ case OP_ALT:
+ case END_OF_RE:
+ return NULL;
+ case BACK_SLASH:
+ *err = REG_EESCAPE;
+ return NULL;
+ default:
+ /* Must not happen? */
+#ifdef DEBUG
+ assert (0);
+#endif
+ return NULL;
+ }
+ fetch_token (token, regexp, syntax);
+
+ while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS
+ || token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM)
+ {
+ tree = parse_dup_op (tree, regexp, dfa, token, syntax, err);
+ if (BE (*err != REG_NOERROR && tree == NULL, 0))
+ return NULL;
+ /* In BRE consecutive duplications are not allowed. */
+ if ((syntax & RE_CONTEXT_INVALID_DUP)
+ && (token->type == OP_DUP_ASTERISK
+ || token->type == OP_OPEN_DUP_NUM))
+ {
+ *err = REG_BADRPT;
+ return NULL;
+ }
+ }
+
+ return tree;
+}
+
+/* This function build the following tree, from regular expression
+ (<reg_exp>):
+ SUBEXP
+ |
+ <reg_exp>
+*/
+
+static bin_tree_t *
+parse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
+ reg_syntax_t syntax, int nest, reg_errcode_t *err)
+{
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ bin_tree_t *tree;
+ size_t cur_nsub;
+ cur_nsub = preg->re_nsub++;
+
+ fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE);
+
+ /* The subexpression may be a null string. */
+ if (token->type == OP_CLOSE_SUBEXP)
+ tree = NULL;
+ else
+ {
+ tree = parse_reg_exp (regexp, preg, token, syntax, nest, err);
+ if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0))
+ *err = REG_EPAREN;
+ if (BE (*err != REG_NOERROR, 0))
+ return NULL;
+ }
+
+ if (cur_nsub <= '9' - '1')
+ dfa->completed_bkref_map |= 1 << cur_nsub;
+
+ tree = create_tree (dfa, tree, NULL, SUBEXP);
+ if (BE (tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ tree->token.opr.idx = cur_nsub;
+ return tree;
+}
+
+/* This function parse repetition operators like "*", "+", "{1,3}" etc. */
+
+static bin_tree_t *
+parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
+ re_token_t *token, reg_syntax_t syntax, reg_errcode_t *err)
+{
+ bin_tree_t *tree = NULL, *old_tree = NULL;
+ int i, start, end, start_idx = re_string_cur_idx (regexp);
+ re_token_t start_token = *token;
+
+ if (token->type == OP_OPEN_DUP_NUM)
+ {
+ end = 0;
+ start = fetch_number (regexp, token, syntax);
+ if (start == -1)
+ {
+ if (token->type == CHARACTER && token->opr.c == ',')
+ start = 0; /* We treat "{,m}" as "{0,m}". */
+ else
+ {
+ *err = REG_BADBR; /* <re>{} is invalid. */
+ return NULL;
+ }
+ }
+ if (BE (start != -2, 1))
+ {
+ /* We treat "{n}" as "{n,n}". */
+ end = ((token->type == OP_CLOSE_DUP_NUM) ? start
+ : ((token->type == CHARACTER && token->opr.c == ',')
+ ? fetch_number (regexp, token, syntax) : -2));
+ }
+ if (BE (start == -2 || end == -2, 0))
+ {
+ /* Invalid sequence. */
+ if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0))
+ {
+ if (token->type == END_OF_RE)
+ *err = REG_EBRACE;
+ else
+ *err = REG_BADBR;
+
+ return NULL;
+ }
+
+ /* If the syntax bit is set, rollback. */
+ re_string_set_index (regexp, start_idx);
+ *token = start_token;
+ token->type = CHARACTER;
+ /* mb_partial and word_char bits should be already initialized by
+ peek_token. */
+ return elem;
+ }
+
+ if (BE (end != -1 && start > end, 0))
+ {
+ /* First number greater than second. */
+ *err = REG_BADBR;
+ return NULL;
+ }
+ }
+ else
+ {
+ start = (token->type == OP_DUP_PLUS) ? 1 : 0;
+ end = (token->type == OP_DUP_QUESTION) ? 1 : -1;
+ }
+
+ fetch_token (token, regexp, syntax);
+
+ if (BE (elem == NULL, 0))
+ return NULL;
+ if (BE (start == 0 && end == 0, 0))
+ {
+ postorder (elem, free_tree, NULL);
+ return NULL;
+ }
+
+ /* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}". */
+ if (BE (start > 0, 0))
+ {
+ tree = elem;
+ for (i = 2; i <= start; ++i)
+ {
+ elem = duplicate_tree (elem, dfa);
+ tree = create_tree (dfa, tree, elem, CONCAT);
+ if (BE (elem == NULL || tree == NULL, 0))
+ goto parse_dup_op_espace;
+ }
+
+ if (start == end)
+ return tree;
+
+ /* Duplicate ELEM before it is marked optional. */
+ elem = duplicate_tree (elem, dfa);
+ old_tree = tree;
+ }
+ else
+ old_tree = NULL;
+
+ if (elem->token.type == SUBEXP)
+ postorder (elem, mark_opt_subexp, (void *) (long) elem->token.opr.idx);
+
+ tree = create_tree (dfa, elem, NULL, (end == -1 ? OP_DUP_ASTERISK : OP_ALT));
+ if (BE (tree == NULL, 0))
+ goto parse_dup_op_espace;
+
+ /* This loop is actually executed only when end != -1,
+ to rewrite <re>{0,n} as (<re>(<re>...<re>?)?)?... We have
+ already created the start+1-th copy. */
+ for (i = start + 2; i <= end; ++i)
+ {
+ elem = duplicate_tree (elem, dfa);
+ tree = create_tree (dfa, tree, elem, CONCAT);
+ if (BE (elem == NULL || tree == NULL, 0))
+ goto parse_dup_op_espace;
+
+ tree = create_tree (dfa, tree, NULL, OP_ALT);
+ if (BE (tree == NULL, 0))
+ goto parse_dup_op_espace;
+ }
+
+ if (old_tree)
+ tree = create_tree (dfa, old_tree, tree, CONCAT);
+
+ return tree;
+
+ parse_dup_op_espace:
+ *err = REG_ESPACE;
+ return NULL;
+}
+
+/* Size of the names for collating symbol/equivalence_class/character_class.
+ I'm not sure, but maybe enough. */
+#define BRACKET_NAME_BUF_SIZE 32
+
+#ifndef _LIBC
+ /* Local function for parse_bracket_exp only used in case of NOT _LIBC.
+ Build the range expression which starts from START_ELEM, and ends
+ at END_ELEM. The result are written to MBCSET and SBCSET.
+ RANGE_ALLOC is the allocated size of mbcset->range_starts, and
+ mbcset->range_ends, is a pointer argument sinse we may
+ update it. */
+
+static reg_errcode_t
+internal_function
+# ifdef RE_ENABLE_I18N
+build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,
+ bracket_elem_t *start_elem, bracket_elem_t *end_elem)
+# else /* not RE_ENABLE_I18N */
+build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem,
+ bracket_elem_t *end_elem)
+# endif /* not RE_ENABLE_I18N */
+{
+ unsigned int start_ch, end_ch;
+ /* Equivalence Classes and Character Classes can't be a range start/end. */
+ if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
+ || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
+ 0))
+ return REG_ERANGE;
+
+ /* We can handle no multi character collating elements without libc
+ support. */
+ if (BE ((start_elem->type == COLL_SYM
+ && strlen ((char *) start_elem->opr.name) > 1)
+ || (end_elem->type == COLL_SYM
+ && strlen ((char *) end_elem->opr.name) > 1), 0))
+ return REG_ECOLLATE;
+
+# ifdef RE_ENABLE_I18N
+ {
+ wchar_t wc;
+ wint_t start_wc;
+ wint_t end_wc;
+ wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
+
+ start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch
+ : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
+ : 0));
+ end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch
+ : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
+ : 0));
+ start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM)
+ ? __btowc (start_ch) : start_elem->opr.wch);
+ end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM)
+ ? __btowc (end_ch) : end_elem->opr.wch);
+ if (start_wc == WEOF || end_wc == WEOF)
+ return REG_ECOLLATE;
+ cmp_buf[0] = start_wc;
+ cmp_buf[4] = end_wc;
+ if (wcscoll (cmp_buf, cmp_buf + 4) > 0)
+ return REG_ERANGE;
+
+ /* Got valid collation sequence values, add them as a new entry.
+ However, for !_LIBC we have no collation elements: if the
+ character set is single byte, the single byte character set
+ that we build below suffices. parse_bracket_exp passes
+ no MBCSET if dfa->mb_cur_max == 1. */
+ if (mbcset)
+ {
+ /* Check the space of the arrays. */
+ if (BE (*range_alloc == mbcset->nranges, 0))
+ {
+ /* There is not enough space, need realloc. */
+ wchar_t *new_array_start, *new_array_end;
+ int new_nranges;
+
+ /* +1 in case of mbcset->nranges is 0. */
+ new_nranges = 2 * mbcset->nranges + 1;
+ /* Use realloc since mbcset->range_starts and mbcset->range_ends
+ are NULL if *range_alloc == 0. */
+ new_array_start = re_realloc (mbcset->range_starts, wchar_t,
+ new_nranges);
+ new_array_end = re_realloc (mbcset->range_ends, wchar_t,
+ new_nranges);
+
+ if (BE (new_array_start == NULL || new_array_end == NULL, 0))
+ return REG_ESPACE;
+
+ mbcset->range_starts = new_array_start;
+ mbcset->range_ends = new_array_end;
+ *range_alloc = new_nranges;
+ }
+
+ mbcset->range_starts[mbcset->nranges] = start_wc;
+ mbcset->range_ends[mbcset->nranges++] = end_wc;
+ }
+
+ /* Build the table for single byte characters. */
+ for (wc = 0; wc < SBC_MAX; ++wc)
+ {
+ cmp_buf[2] = wc;
+ if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
+ && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
+ bitset_set (sbcset, wc);
+ }
+ }
+# else /* not RE_ENABLE_I18N */
+ {
+ unsigned int ch;
+ start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch
+ : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
+ : 0));
+ end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch
+ : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
+ : 0));
+ if (start_ch > end_ch)
+ return REG_ERANGE;
+ /* Build the table for single byte characters. */
+ for (ch = 0; ch < SBC_MAX; ++ch)
+ if (start_ch <= ch && ch <= end_ch)
+ bitset_set (sbcset, ch);
+ }
+# endif /* not RE_ENABLE_I18N */
+ return REG_NOERROR;
+}
+#endif /* not _LIBC */
+
+#ifndef _LIBC
+/* Helper function for parse_bracket_exp only used in case of NOT _LIBC..
+ Build the collating element which is represented by NAME.
+ The result are written to MBCSET and SBCSET.
+ COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
+ pointer argument since we may update it. */
+
+static reg_errcode_t
+internal_function
+# ifdef RE_ENABLE_I18N
+build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
+ int *coll_sym_alloc, const unsigned char *name)
+# else /* not RE_ENABLE_I18N */
+build_collating_symbol (bitset_t sbcset, const unsigned char *name)
+# endif /* not RE_ENABLE_I18N */
+{
+ size_t name_len = strlen ((const char *) name);
+ if (BE (name_len != 1, 0))
+ return REG_ECOLLATE;
+ else
+ {
+ bitset_set (sbcset, name[0]);
+ return REG_NOERROR;
+ }
+}
+#endif /* not _LIBC */
+
+/* This function parse bracket expression like "[abc]", "[a-c]",
+ "[[.a-a.]]" etc. */
+
+static bin_tree_t *
+parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
+ reg_syntax_t syntax, reg_errcode_t *err)
+{
+#ifdef _LIBC
+ const unsigned char *collseqmb;
+ const char *collseqwc;
+ uint32_t nrules;
+ int32_t table_size;
+ const int32_t *symb_table;
+ const unsigned char *extra;
+
+ /* Local function for parse_bracket_exp used in _LIBC environement.
+ Seek the collating symbol entry correspondings to NAME.
+ Return the index of the symbol in the SYMB_TABLE. */
+
+ auto inline int32_t
+ __attribute ((always_inline))
+ seek_collating_symbol_entry (name, name_len)
+ const unsigned char *name;
+ size_t name_len;
+ {
+ int32_t hash = elem_hash ((const char *) name, name_len);
+ int32_t elem = hash % table_size;
+ if (symb_table[2 * elem] != 0)
+ {
+ int32_t second = hash % (table_size - 2) + 1;
+
+ do
+ {
+ /* First compare the hashing value. */
+ if (symb_table[2 * elem] == hash
+ /* Compare the length of the name. */
+ && name_len == extra[symb_table[2 * elem + 1]]
+ /* Compare the name. */
+ && memcmp (name, &extra[symb_table[2 * elem + 1] + 1],
+ name_len) == 0)
+ {
+ /* Yep, this is the entry. */
+ break;
+ }
+
+ /* Next entry. */
+ elem += second;
+ }
+ while (symb_table[2 * elem] != 0);
+ }
+ return elem;
+ }
+
+ /* Local function for parse_bracket_exp used in _LIBC environment.
+ Look up the collation sequence value of BR_ELEM.
+ Return the value if succeeded, UINT_MAX otherwise. */
+
+ auto inline unsigned int
+ __attribute ((always_inline))
+ lookup_collation_sequence_value (br_elem)
+ bracket_elem_t *br_elem;
+ {
+ if (br_elem->type == SB_CHAR)
+ {
+ /*
+ if (MB_CUR_MAX == 1)
+ */
+ if (nrules == 0)
+ return collseqmb[br_elem->opr.ch];
+ else
+ {
+ wint_t wc = __btowc (br_elem->opr.ch);
+ return __collseq_table_lookup (collseqwc, wc);
+ }
+ }
+ else if (br_elem->type == MB_CHAR)
+ {
+ if (nrules != 0)
+ return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
+ }
+ else if (br_elem->type == COLL_SYM)
+ {
+ size_t sym_name_len = strlen ((char *) br_elem->opr.name);
+ if (nrules != 0)
+ {
+ int32_t elem, idx;
+ elem = seek_collating_symbol_entry (br_elem->opr.name,
+ sym_name_len);
+ if (symb_table[2 * elem] != 0)
+ {
+ /* We found the entry. */
+ idx = symb_table[2 * elem + 1];
+ /* Skip the name of collating element name. */
+ idx += 1 + extra[idx];
+ /* Skip the byte sequence of the collating element. */
+ idx += 1 + extra[idx];
+ /* Adjust for the alignment. */
+ idx = (idx + 3) & ~3;
+ /* Skip the multibyte collation sequence value. */
+ idx += sizeof (unsigned int);
+ /* Skip the wide char sequence of the collating element. */
+ idx += sizeof (unsigned int) *
+ (1 + *(unsigned int *) (extra + idx));
+ /* Return the collation sequence value. */
+ return *(unsigned int *) (extra + idx);
+ }
+ else if (symb_table[2 * elem] == 0 && sym_name_len == 1)
+ {
+ /* No valid character. Match it as a single byte
+ character. */
+ return collseqmb[br_elem->opr.name[0]];
+ }
+ }
+ else if (sym_name_len == 1)
+ return collseqmb[br_elem->opr.name[0]];
+ }
+ return UINT_MAX;
+ }
+
+ /* Local function for parse_bracket_exp used in _LIBC environement.
+ Build the range expression which starts from START_ELEM, and ends
+ at END_ELEM. The result are written to MBCSET and SBCSET.
+ RANGE_ALLOC is the allocated size of mbcset->range_starts, and
+ mbcset->range_ends, is a pointer argument sinse we may
+ update it. */
+
+ auto inline reg_errcode_t
+ __attribute ((always_inline))
+ build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem)
+ re_charset_t *mbcset;
+ int *range_alloc;
+ bitset_t sbcset;
+ bracket_elem_t *start_elem, *end_elem;
+ {
+ unsigned int ch;
+ uint32_t start_collseq;
+ uint32_t end_collseq;
+
+ /* Equivalence Classes and Character Classes can't be a range
+ start/end. */
+ if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
+ || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
+ 0))
+ return REG_ERANGE;
+
+ start_collseq = lookup_collation_sequence_value (start_elem);
+ end_collseq = lookup_collation_sequence_value (end_elem);
+ /* Check start/end collation sequence values. */
+ if (BE (start_collseq == UINT_MAX || end_collseq == UINT_MAX, 0))
+ return REG_ECOLLATE;
+ if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0))
+ return REG_ERANGE;
+
+ /* Got valid collation sequence values, add them as a new entry.
+ However, if we have no collation elements, and the character set
+ is single byte, the single byte character set that we
+ build below suffices. */
+ if (nrules > 0 || dfa->mb_cur_max > 1)
+ {
+ /* Check the space of the arrays. */
+ if (BE (*range_alloc == mbcset->nranges, 0))
+ {
+ /* There is not enough space, need realloc. */
+ uint32_t *new_array_start;
+ uint32_t *new_array_end;
+ int new_nranges;
+
+ /* +1 in case of mbcset->nranges is 0. */
+ new_nranges = 2 * mbcset->nranges + 1;
+ new_array_start = re_realloc (mbcset->range_starts, uint32_t,
+ new_nranges);
+ new_array_end = re_realloc (mbcset->range_ends, uint32_t,
+ new_nranges);
+
+ if (BE (new_array_start == NULL || new_array_end == NULL, 0))
+ return REG_ESPACE;
+
+ mbcset->range_starts = new_array_start;
+ mbcset->range_ends = new_array_end;
+ *range_alloc = new_nranges;
+ }
+
+ mbcset->range_starts[mbcset->nranges] = start_collseq;
+ mbcset->range_ends[mbcset->nranges++] = end_collseq;
+ }
+
+ /* Build the table for single byte characters. */
+ for (ch = 0; ch < SBC_MAX; ch++)
+ {
+ uint32_t ch_collseq;
+ /*
+ if (MB_CUR_MAX == 1)
+ */
+ if (nrules == 0)
+ ch_collseq = collseqmb[ch];
+ else
+ ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
+ if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
+ bitset_set (sbcset, ch);
+ }
+ return REG_NOERROR;
+ }
+
+ /* Local function for parse_bracket_exp used in _LIBC environement.
+ Build the collating element which is represented by NAME.
+ The result are written to MBCSET and SBCSET.
+ COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
+ pointer argument sinse we may update it. */
+
+ auto inline reg_errcode_t
+ __attribute ((always_inline))
+ build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name)
+ re_charset_t *mbcset;
+ int *coll_sym_alloc;
+ bitset_t sbcset;
+ const unsigned char *name;
+ {
+ int32_t elem, idx;
+ size_t name_len = strlen ((const char *) name);
+ if (nrules != 0)
+ {
+ elem = seek_collating_symbol_entry (name, name_len);
+ if (symb_table[2 * elem] != 0)
+ {
+ /* We found the entry. */
+ idx = symb_table[2 * elem + 1];
+ /* Skip the name of collating element name. */
+ idx += 1 + extra[idx];
+ }
+ else if (symb_table[2 * elem] == 0 && name_len == 1)
+ {
+ /* No valid character, treat it as a normal
+ character. */
+ bitset_set (sbcset, name[0]);
+ return REG_NOERROR;
+ }
+ else
+ return REG_ECOLLATE;
+
+ /* Got valid collation sequence, add it as a new entry. */
+ /* Check the space of the arrays. */
+ if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0))
+ {
+ /* Not enough, realloc it. */
+ /* +1 in case of mbcset->ncoll_syms is 0. */
+ int new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
+ /* Use realloc since mbcset->coll_syms is NULL
+ if *alloc == 0. */
+ int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
+ new_coll_sym_alloc);
+ if (BE (new_coll_syms == NULL, 0))
+ return REG_ESPACE;
+ mbcset->coll_syms = new_coll_syms;
+ *coll_sym_alloc = new_coll_sym_alloc;
+ }
+ mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
+ return REG_NOERROR;
+ }
+ else
+ {
+ if (BE (name_len != 1, 0))
+ return REG_ECOLLATE;
+ else
+ {
+ bitset_set (sbcset, name[0]);
+ return REG_NOERROR;
+ }
+ }
+ }
+#endif
+
+ re_token_t br_token;
+ re_bitset_ptr_t sbcset;
+#ifdef RE_ENABLE_I18N
+ re_charset_t *mbcset;
+ int coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0;
+ int equiv_class_alloc = 0, char_class_alloc = 0;
+#endif /* not RE_ENABLE_I18N */
+ int non_match = 0;
+ bin_tree_t *work_tree;
+ int token_len;
+ int first_round = 1;
+#ifdef _LIBC
+ collseqmb = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
+ nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+ if (nrules)
+ {
+ /*
+ if (MB_CUR_MAX > 1)
+ */
+ collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
+ table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB);
+ symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE,
+ _NL_COLLATE_SYMB_TABLEMB);
+ extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
+ _NL_COLLATE_SYMB_EXTRAMB);
+ }
+#endif
+ sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
+#ifdef RE_ENABLE_I18N
+ mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
+#endif /* RE_ENABLE_I18N */
+#ifdef RE_ENABLE_I18N
+ if (BE (sbcset == NULL || mbcset == NULL, 0))
+#else
+ if (BE (sbcset == NULL, 0))
+#endif /* RE_ENABLE_I18N */
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+
+ token_len = peek_token_bracket (token, regexp, syntax);
+ if (BE (token->type == END_OF_RE, 0))
+ {
+ *err = REG_BADPAT;
+ goto parse_bracket_exp_free_return;
+ }
+ if (token->type == OP_NON_MATCH_LIST)
+ {
+#ifdef RE_ENABLE_I18N
+ mbcset->non_match = 1;
+#endif /* not RE_ENABLE_I18N */
+ non_match = 1;
+ if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
+ bitset_set (sbcset, '\n');
+ re_string_skip_bytes (regexp, token_len); /* Skip a token. */
+ token_len = peek_token_bracket (token, regexp, syntax);
+ if (BE (token->type == END_OF_RE, 0))
+ {
+ *err = REG_BADPAT;
+ goto parse_bracket_exp_free_return;
+ }
+ }
+
+ /* We treat the first ']' as a normal character. */
+ if (token->type == OP_CLOSE_BRACKET)
+ token->type = CHARACTER;
+
+ while (1)
+ {
+ bracket_elem_t start_elem, end_elem;
+ unsigned char start_name_buf[BRACKET_NAME_BUF_SIZE];
+ unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE];
+ reg_errcode_t ret;
+ int token_len2 = 0, is_range_exp = 0;
+ re_token_t token2;
+
+ start_elem.opr.name = start_name_buf;
+ ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa,
+ syntax, first_round);
+ if (BE (ret != REG_NOERROR, 0))
+ {
+ *err = ret;
+ goto parse_bracket_exp_free_return;
+ }
+ first_round = 0;
+
+ /* Get information about the next token. We need it in any case. */
+ token_len = peek_token_bracket (token, regexp, syntax);
+
+ /* Do not check for ranges if we know they are not allowed. */
+ if (start_elem.type != CHAR_CLASS && start_elem.type != EQUIV_CLASS)
+ {
+ if (BE (token->type == END_OF_RE, 0))
+ {
+ *err = REG_EBRACK;
+ goto parse_bracket_exp_free_return;
+ }
+ if (token->type == OP_CHARSET_RANGE)
+ {
+ re_string_skip_bytes (regexp, token_len); /* Skip '-'. */
+ token_len2 = peek_token_bracket (&token2, regexp, syntax);
+ if (BE (token2.type == END_OF_RE, 0))
+ {
+ *err = REG_EBRACK;
+ goto parse_bracket_exp_free_return;
+ }
+ if (token2.type == OP_CLOSE_BRACKET)
+ {
+ /* We treat the last '-' as a normal character. */
+ re_string_skip_bytes (regexp, -token_len);
+ token->type = CHARACTER;
+ }
+ else
+ is_range_exp = 1;
+ }
+ }
+
+ if (is_range_exp == 1)
+ {
+ end_elem.opr.name = end_name_buf;
+ ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2,
+ dfa, syntax, 1);
+ if (BE (ret != REG_NOERROR, 0))
+ {
+ *err = ret;
+ goto parse_bracket_exp_free_return;
+ }
+
+ token_len = peek_token_bracket (token, regexp, syntax);
+
+#ifdef _LIBC
+ *err = build_range_exp (sbcset, mbcset, &range_alloc,
+ &start_elem, &end_elem);
+#else
+# ifdef RE_ENABLE_I18N
+ *err = build_range_exp (sbcset,
+ dfa->mb_cur_max > 1 ? mbcset : NULL,
+ &range_alloc, &start_elem, &end_elem);
+# else
+ *err = build_range_exp (sbcset, &start_elem, &end_elem);
+# endif
+#endif /* RE_ENABLE_I18N */
+ if (BE (*err != REG_NOERROR, 0))
+ goto parse_bracket_exp_free_return;
+ }
+ else
+ {
+ switch (start_elem.type)
+ {
+ case SB_CHAR:
+ bitset_set (sbcset, start_elem.opr.ch);
+ break;
+#ifdef RE_ENABLE_I18N
+ case MB_CHAR:
+ /* Check whether the array has enough space. */
+ if (BE (mbchar_alloc == mbcset->nmbchars, 0))
+ {
+ wchar_t *new_mbchars;
+ /* Not enough, realloc it. */
+ /* +1 in case of mbcset->nmbchars is 0. */
+ mbchar_alloc = 2 * mbcset->nmbchars + 1;
+ /* Use realloc since array is NULL if *alloc == 0. */
+ new_mbchars = re_realloc (mbcset->mbchars, wchar_t,
+ mbchar_alloc);
+ if (BE (new_mbchars == NULL, 0))
+ goto parse_bracket_exp_espace;
+ mbcset->mbchars = new_mbchars;
+ }
+ mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch;
+ break;
+#endif /* RE_ENABLE_I18N */
+ case EQUIV_CLASS:
+ *err = build_equiv_class (sbcset,
+#ifdef RE_ENABLE_I18N
+ mbcset, &equiv_class_alloc,
+#endif /* RE_ENABLE_I18N */
+ start_elem.opr.name);
+ if (BE (*err != REG_NOERROR, 0))
+ goto parse_bracket_exp_free_return;
+ break;
+ case COLL_SYM:
+ *err = build_collating_symbol (sbcset,
+#ifdef RE_ENABLE_I18N
+ mbcset, &coll_sym_alloc,
+#endif /* RE_ENABLE_I18N */
+ start_elem.opr.name);
+ if (BE (*err != REG_NOERROR, 0))
+ goto parse_bracket_exp_free_return;
+ break;
+ case CHAR_CLASS:
+ *err = build_charclass (regexp->trans, sbcset,
+#ifdef RE_ENABLE_I18N
+ mbcset, &char_class_alloc,
+#endif /* RE_ENABLE_I18N */
+ start_elem.opr.name, syntax);
+ if (BE (*err != REG_NOERROR, 0))
+ goto parse_bracket_exp_free_return;
+ break;
+ default:
+ assert (0);
+ break;
+ }
+ }
+ if (BE (token->type == END_OF_RE, 0))
+ {
+ *err = REG_EBRACK;
+ goto parse_bracket_exp_free_return;
+ }
+ if (token->type == OP_CLOSE_BRACKET)
+ break;
+ }
+
+ re_string_skip_bytes (regexp, token_len); /* Skip a token. */
+
+ /* If it is non-matching list. */
+ if (non_match)
+ bitset_not (sbcset);
+
+#ifdef RE_ENABLE_I18N
+ /* Ensure only single byte characters are set. */
+ if (dfa->mb_cur_max > 1)
+ bitset_mask (sbcset, dfa->sb_char);
+
+ if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes
+ || mbcset->nranges || (dfa->mb_cur_max > 1 && (mbcset->nchar_classes
+ || mbcset->non_match)))
+ {
+ bin_tree_t *mbc_tree;
+ int sbc_idx;
+ /* Build a tree for complex bracket. */
+ dfa->has_mb_node = 1;
+ br_token.type = COMPLEX_BRACKET;
+ br_token.opr.mbcset = mbcset;
+ mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
+ if (BE (mbc_tree == NULL, 0))
+ goto parse_bracket_exp_espace;
+ for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx)
+ if (sbcset[sbc_idx])
+ break;
+ /* If there are no bits set in sbcset, there is no point
+ of having both SIMPLE_BRACKET and COMPLEX_BRACKET. */
+ if (sbc_idx < BITSET_WORDS)
+ {
+ /* Build a tree for simple bracket. */
+ br_token.type = SIMPLE_BRACKET;
+ br_token.opr.sbcset = sbcset;
+ work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
+ if (BE (work_tree == NULL, 0))
+ goto parse_bracket_exp_espace;
+
+ /* Then join them by ALT node. */
+ work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT);
+ if (BE (work_tree == NULL, 0))
+ goto parse_bracket_exp_espace;
+ }
+ else
+ {
+ re_free (sbcset);
+ work_tree = mbc_tree;
+ }
+ }
+ else
+#endif /* not RE_ENABLE_I18N */
+ {
+#ifdef RE_ENABLE_I18N
+ free_charset (mbcset);
+#endif
+ /* Build a tree for simple bracket. */
+ br_token.type = SIMPLE_BRACKET;
+ br_token.opr.sbcset = sbcset;
+ work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
+ if (BE (work_tree == NULL, 0))
+ goto parse_bracket_exp_espace;
+ }
+ return work_tree;
+
+ parse_bracket_exp_espace:
+ *err = REG_ESPACE;
+ parse_bracket_exp_free_return:
+ re_free (sbcset);
+#ifdef RE_ENABLE_I18N
+ free_charset (mbcset);
+#endif /* RE_ENABLE_I18N */
+ return NULL;
+}
+
+/* Parse an element in the bracket expression. */
+
+static reg_errcode_t
+parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp,
+ re_token_t *token, int token_len, re_dfa_t *dfa,
+ reg_syntax_t syntax, int accept_hyphen)
+{
+#ifdef RE_ENABLE_I18N
+ int cur_char_size;
+ cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp));
+ if (cur_char_size > 1)
+ {
+ elem->type = MB_CHAR;
+ elem->opr.wch = re_string_wchar_at (regexp, re_string_cur_idx (regexp));
+ re_string_skip_bytes (regexp, cur_char_size);
+ return REG_NOERROR;
+ }
+#endif /* RE_ENABLE_I18N */
+ re_string_skip_bytes (regexp, token_len); /* Skip a token. */
+ if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS
+ || token->type == OP_OPEN_EQUIV_CLASS)
+ return parse_bracket_symbol (elem, regexp, token);
+ if (BE (token->type == OP_CHARSET_RANGE, 0) && !accept_hyphen)
+ {
+ /* A '-' must only appear as anything but a range indicator before
+ the closing bracket. Everything else is an error. */
+ re_token_t token2;
+ (void) peek_token_bracket (&token2, regexp, syntax);
+ if (token2.type != OP_CLOSE_BRACKET)
+ /* The actual error value is not standardized since this whole
+ case is undefined. But ERANGE makes good sense. */
+ return REG_ERANGE;
+ }
+ elem->type = SB_CHAR;
+ elem->opr.ch = token->opr.c;
+ return REG_NOERROR;
+}
+
+/* Parse a bracket symbol in the bracket expression. Bracket symbols are
+ such as [:<character_class>:], [.<collating_element>.], and
+ [=<equivalent_class>=]. */
+
+static reg_errcode_t
+parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp,
+ re_token_t *token)
+{
+ unsigned char ch, delim = token->opr.c;
+ int i = 0;
+ if (re_string_eoi(regexp))
+ return REG_EBRACK;
+ for (;; ++i)
+ {
+ if (i >= BRACKET_NAME_BUF_SIZE)
+ return REG_EBRACK;
+ if (token->type == OP_OPEN_CHAR_CLASS)
+ ch = re_string_fetch_byte_case (regexp);
+ else
+ ch = re_string_fetch_byte (regexp);
+ if (re_string_eoi(regexp))
+ return REG_EBRACK;
+ if (ch == delim && re_string_peek_byte (regexp, 0) == ']')
+ break;
+ elem->opr.name[i] = ch;
+ }
+ re_string_skip_bytes (regexp, 1);
+ elem->opr.name[i] = '\0';
+ switch (token->type)
+ {
+ case OP_OPEN_COLL_ELEM:
+ elem->type = COLL_SYM;
+ break;
+ case OP_OPEN_EQUIV_CLASS:
+ elem->type = EQUIV_CLASS;
+ break;
+ case OP_OPEN_CHAR_CLASS:
+ elem->type = CHAR_CLASS;
+ break;
+ default:
+ break;
+ }
+ return REG_NOERROR;
+}
+
+ /* Helper function for parse_bracket_exp.
+ Build the equivalence class which is represented by NAME.
+ The result are written to MBCSET and SBCSET.
+ EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes,
+ is a pointer argument sinse we may update it. */
+
+static reg_errcode_t
+#ifdef RE_ENABLE_I18N
+build_equiv_class (bitset_t sbcset, re_charset_t *mbcset,
+ int *equiv_class_alloc, const unsigned char *name)
+#else /* not RE_ENABLE_I18N */
+build_equiv_class (bitset_t sbcset, const unsigned char *name)
+#endif /* not RE_ENABLE_I18N */
+{
+#ifdef _LIBC
+ uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+ if (nrules != 0)
+ {
+ const int32_t *table, *indirect;
+ const unsigned char *weights, *extra, *cp;
+ unsigned char char_buf[2];
+ int32_t idx1, idx2;
+ unsigned int ch;
+ size_t len;
+ /* This #include defines a local function! */
+# include <locale/weight.h>
+ /* Calculate the index for equivalence class. */
+ cp = name;
+ table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
+ weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
+ _NL_COLLATE_WEIGHTMB);
+ extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
+ _NL_COLLATE_EXTRAMB);
+ indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
+ _NL_COLLATE_INDIRECTMB);
+ idx1 = findidx (&cp);
+ if (BE (idx1 == 0 || cp < name + strlen ((const char *) name), 0))
+ /* This isn't a valid character. */
+ return REG_ECOLLATE;
+
+ /* Build single byte matcing table for this equivalence class. */
+ char_buf[1] = (unsigned char) '\0';
+ len = weights[idx1 & 0xffffff];
+ for (ch = 0; ch < SBC_MAX; ++ch)
+ {
+ char_buf[0] = ch;
+ cp = char_buf;
+ idx2 = findidx (&cp);
+/*
+ idx2 = table[ch];
+*/
+ if (idx2 == 0)
+ /* This isn't a valid character. */
+ continue;
+ /* Compare only if the length matches and the collation rule
+ index is the same. */
+ if (len == weights[idx2 & 0xffffff] && (idx1 >> 24) == (idx2 >> 24))
+ {
+ int cnt = 0;
+
+ while (cnt <= len &&
+ weights[(idx1 & 0xffffff) + 1 + cnt]
+ == weights[(idx2 & 0xffffff) + 1 + cnt])
+ ++cnt;
+
+ if (cnt > len)
+ bitset_set (sbcset, ch);
+ }
+ }
+ /* Check whether the array has enough space. */
+ if (BE (*equiv_class_alloc == mbcset->nequiv_classes, 0))
+ {
+ /* Not enough, realloc it. */
+ /* +1 in case of mbcset->nequiv_classes is 0. */
+ int new_equiv_class_alloc = 2 * mbcset->nequiv_classes + 1;
+ /* Use realloc since the array is NULL if *alloc == 0. */
+ int32_t *new_equiv_classes = re_realloc (mbcset->equiv_classes,
+ int32_t,
+ new_equiv_class_alloc);
+ if (BE (new_equiv_classes == NULL, 0))
+ return REG_ESPACE;
+ mbcset->equiv_classes = new_equiv_classes;
+ *equiv_class_alloc = new_equiv_class_alloc;
+ }
+ mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1;
+ }
+ else
+#endif /* _LIBC */
+ {
+ if (BE (strlen ((const char *) name) != 1, 0))
+ return REG_ECOLLATE;
+ bitset_set (sbcset, *name);
+ }
+ return REG_NOERROR;
+}
+
+ /* Helper function for parse_bracket_exp.
+ Build the character class which is represented by NAME.
+ The result are written to MBCSET and SBCSET.
+ CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes,
+ is a pointer argument sinse we may update it. */
+
+static reg_errcode_t
+#ifdef RE_ENABLE_I18N
+build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
+ re_charset_t *mbcset, int *char_class_alloc,
+ const unsigned char *class_name, reg_syntax_t syntax)
+#else /* not RE_ENABLE_I18N */
+build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
+ const unsigned char *class_name, reg_syntax_t syntax)
+#endif /* not RE_ENABLE_I18N */
+{
+ int i;
+ const char *name = (const char *) class_name;
+
+ /* In case of REG_ICASE "upper" and "lower" match the both of
+ upper and lower cases. */
+ if ((syntax & RE_ICASE)
+ && (strcmp (name, "upper") == 0 || strcmp (name, "lower") == 0))
+ name = "alpha";
+
+#ifdef RE_ENABLE_I18N
+ /* Check the space of the arrays. */
+ if (BE (*char_class_alloc == mbcset->nchar_classes, 0))
+ {
+ /* Not enough, realloc it. */
+ /* +1 in case of mbcset->nchar_classes is 0. */
+ int new_char_class_alloc = 2 * mbcset->nchar_classes + 1;
+ /* Use realloc since array is NULL if *alloc == 0. */
+ wctype_t *new_char_classes = re_realloc (mbcset->char_classes, wctype_t,
+ new_char_class_alloc);
+ if (BE (new_char_classes == NULL, 0))
+ return REG_ESPACE;
+ mbcset->char_classes = new_char_classes;
+ *char_class_alloc = new_char_class_alloc;
+ }
+ mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name);
+#endif /* RE_ENABLE_I18N */
+
+#define BUILD_CHARCLASS_LOOP(ctype_func) \
+ do { \
+ if (BE (trans != NULL, 0)) \
+ { \
+ for (i = 0; i < SBC_MAX; ++i) \
+ if (ctype_func (i)) \
+ bitset_set (sbcset, trans[i]); \
+ } \
+ else \
+ { \
+ for (i = 0; i < SBC_MAX; ++i) \
+ if (ctype_func (i)) \
+ bitset_set (sbcset, i); \
+ } \
+ } while (0)
+
+ if (strcmp (name, "alnum") == 0)
+ BUILD_CHARCLASS_LOOP (isalnum);
+ else if (strcmp (name, "cntrl") == 0)
+ BUILD_CHARCLASS_LOOP (iscntrl);
+ else if (strcmp (name, "lower") == 0)
+ BUILD_CHARCLASS_LOOP (islower);
+ else if (strcmp (name, "space") == 0)
+ BUILD_CHARCLASS_LOOP (isspace);
+ else if (strcmp (name, "alpha") == 0)
+ BUILD_CHARCLASS_LOOP (isalpha);
+ else if (strcmp (name, "digit") == 0)
+ BUILD_CHARCLASS_LOOP (isdigit);
+ else if (strcmp (name, "print") == 0)
+ BUILD_CHARCLASS_LOOP (isprint);
+ else if (strcmp (name, "upper") == 0)
+ BUILD_CHARCLASS_LOOP (isupper);
+ else if (strcmp (name, "blank") == 0)
+ BUILD_CHARCLASS_LOOP (isblank);
+ else if (strcmp (name, "graph") == 0)
+ BUILD_CHARCLASS_LOOP (isgraph);
+ else if (strcmp (name, "punct") == 0)
+ BUILD_CHARCLASS_LOOP (ispunct);
+ else if (strcmp (name, "xdigit") == 0)
+ BUILD_CHARCLASS_LOOP (isxdigit);
+ else
+ return REG_ECTYPE;
+
+ return REG_NOERROR;
+}
+
+static bin_tree_t *
+build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
+ const unsigned char *class_name,
+ const unsigned char *extra, int non_match,
+ reg_errcode_t *err)
+{
+ re_bitset_ptr_t sbcset;
+#ifdef RE_ENABLE_I18N
+ re_charset_t *mbcset;
+ int alloc = 0;
+#endif /* not RE_ENABLE_I18N */
+ reg_errcode_t ret;
+ re_token_t br_token;
+ bin_tree_t *tree;
+
+ sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
+#ifdef RE_ENABLE_I18N
+ mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
+#endif /* RE_ENABLE_I18N */
+
+#ifdef RE_ENABLE_I18N
+ if (BE (sbcset == NULL || mbcset == NULL, 0))
+#else /* not RE_ENABLE_I18N */
+ if (BE (sbcset == NULL, 0))
+#endif /* not RE_ENABLE_I18N */
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+
+ if (non_match)
+ {
+#ifdef RE_ENABLE_I18N
+ mbcset->non_match = 1;
+#endif /* not RE_ENABLE_I18N */
+ }
+
+ /* We don't care the syntax in this case. */
+ ret = build_charclass (trans, sbcset,
+#ifdef RE_ENABLE_I18N
+ mbcset, &alloc,
+#endif /* RE_ENABLE_I18N */
+ class_name, 0);
+
+ if (BE (ret != REG_NOERROR, 0))
+ {
+ re_free (sbcset);
+#ifdef RE_ENABLE_I18N
+ free_charset (mbcset);
+#endif /* RE_ENABLE_I18N */
+ *err = ret;
+ return NULL;
+ }
+ /* \w match '_' also. */
+ for (; *extra; extra++)
+ bitset_set (sbcset, *extra);
+
+ /* If it is non-matching list. */
+ if (non_match)
+ bitset_not (sbcset);
+
+#ifdef RE_ENABLE_I18N
+ /* Ensure only single byte characters are set. */
+ if (dfa->mb_cur_max > 1)
+ bitset_mask (sbcset, dfa->sb_char);
+#endif
+
+ /* Build a tree for simple bracket. */
+ br_token.type = SIMPLE_BRACKET;
+ br_token.opr.sbcset = sbcset;
+ tree = create_token_tree (dfa, NULL, NULL, &br_token);
+ if (BE (tree == NULL, 0))
+ goto build_word_op_espace;
+
+#ifdef RE_ENABLE_I18N
+ if (dfa->mb_cur_max > 1)
+ {
+ bin_tree_t *mbc_tree;
+ /* Build a tree for complex bracket. */
+ br_token.type = COMPLEX_BRACKET;
+ br_token.opr.mbcset = mbcset;
+ dfa->has_mb_node = 1;
+ mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
+ if (BE (mbc_tree == NULL, 0))
+ goto build_word_op_espace;
+ /* Then join them by ALT node. */
+ tree = create_tree (dfa, tree, mbc_tree, OP_ALT);
+ if (BE (mbc_tree != NULL, 1))
+ return tree;
+ }
+ else
+ {
+ free_charset (mbcset);
+ return tree;
+ }
+#else /* not RE_ENABLE_I18N */
+ return tree;
+#endif /* not RE_ENABLE_I18N */
+
+ build_word_op_espace:
+ re_free (sbcset);
+#ifdef RE_ENABLE_I18N
+ free_charset (mbcset);
+#endif /* RE_ENABLE_I18N */
+ *err = REG_ESPACE;
+ return NULL;
+}
+
+/* This is intended for the expressions like "a{1,3}".
+ Fetch a number from `input', and return the number.
+ Return -1, if the number field is empty like "{,1}".
+ Return -2, If an error is occured. */
+
+static int
+fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax)
+{
+ int num = -1;
+ unsigned char c;
+ while (1)
+ {
+ fetch_token (token, input, syntax);
+ c = token->opr.c;
+ if (BE (token->type == END_OF_RE, 0))
+ return -2;
+ if (token->type == OP_CLOSE_DUP_NUM || c == ',')
+ break;
+ num = ((token->type != CHARACTER || c < '0' || '9' < c || num == -2)
+ ? -2 : ((num == -1) ? c - '0' : num * 10 + c - '0'));
+ num = (num > RE_DUP_MAX) ? -2 : num;
+ }
+ return num;
+}
+
+#ifdef RE_ENABLE_I18N
+static void
+free_charset (re_charset_t *cset)
+{
+ re_free (cset->mbchars);
+# ifdef _LIBC
+ re_free (cset->coll_syms);
+ re_free (cset->equiv_classes);
+ re_free (cset->range_starts);
+ re_free (cset->range_ends);
+# endif
+ re_free (cset->char_classes);
+ re_free (cset);
+}
+#endif /* RE_ENABLE_I18N */
+
+/* Functions for binary tree operation. */
+
+/* Create a tree node. */
+
+static bin_tree_t *
+create_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
+ re_token_type_t type)
+{
+ re_token_t t;
+ t.type = type;
+ return create_token_tree (dfa, left, right, &t);
+}
+
+static bin_tree_t *
+create_token_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
+ const re_token_t *token)
+{
+ bin_tree_t *tree;
+ if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0))
+ {
+ bin_tree_storage_t *storage = re_malloc (bin_tree_storage_t, 1);
+
+ if (storage == NULL)
+ return NULL;
+ storage->next = dfa->str_tree_storage;
+ dfa->str_tree_storage = storage;
+ dfa->str_tree_storage_idx = 0;
+ }
+ tree = &dfa->str_tree_storage->data[dfa->str_tree_storage_idx++];
+
+ tree->parent = NULL;
+ tree->left = left;
+ tree->right = right;
+ tree->token = *token;
+ tree->token.duplicated = 0;
+ tree->token.opt_subexp = 0;
+ tree->first = NULL;
+ tree->next = NULL;
+ tree->node_idx = -1;
+
+ if (left != NULL)
+ left->parent = tree;
+ if (right != NULL)
+ right->parent = tree;
+ return tree;
+}
+
+/* Mark the tree SRC as an optional subexpression.
+ To be called from preorder or postorder. */
+
+static reg_errcode_t
+mark_opt_subexp (void *extra, bin_tree_t *node)
+{
+ int idx = (int) (long) extra;
+ if (node->token.type == SUBEXP && node->token.opr.idx == idx)
+ node->token.opt_subexp = 1;
+
+ return REG_NOERROR;
+}
+
+/* Free the allocated memory inside NODE. */
+
+static void
+free_token (re_token_t *node)
+{
+#ifdef RE_ENABLE_I18N
+ if (node->type == COMPLEX_BRACKET && node->duplicated == 0)
+ free_charset (node->opr.mbcset);
+ else
+#endif /* RE_ENABLE_I18N */
+ if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
+ re_free (node->opr.sbcset);
+}
+
+/* Worker function for tree walking. Free the allocated memory inside NODE
+ and its children. */
+
+static reg_errcode_t
+free_tree (void *extra, bin_tree_t *node)
+{
+ free_token (&node->token);
+ return REG_NOERROR;
+}
+
+
+/* Duplicate the node SRC, and return new node. This is a preorder
+ visit similar to the one implemented by the generic visitor, but
+ we need more infrastructure to maintain two parallel trees --- so,
+ it's easier to duplicate. */
+
+static bin_tree_t *
+duplicate_tree (const bin_tree_t *root, re_dfa_t *dfa)
+{
+ const bin_tree_t *node;
+ bin_tree_t *dup_root;
+ bin_tree_t **p_new = &dup_root, *dup_node = root->parent;
+
+ for (node = root; ; )
+ {
+ /* Create a new tree and link it back to the current parent. */
+ *p_new = create_token_tree (dfa, NULL, NULL, &node->token);
+ if (*p_new == NULL)
+ return NULL;
+ (*p_new)->parent = dup_node;
+ (*p_new)->token.duplicated = 1;
+ dup_node = *p_new;
+
+ /* Go to the left node, or up and to the right. */
+ if (node->left)
+ {
+ node = node->left;
+ p_new = &dup_node->left;
+ }
+ else
+ {
+ const bin_tree_t *prev = NULL;
+ while (node->right == prev || node->right == NULL)
+ {
+ prev = node;
+ node = node->parent;
+ dup_node = dup_node->parent;
+ if (!node)
+ return dup_root;
+ }
+ node = node->right;
+ p_new = &dup_node->right;
+ }
+ }
+}
diff --git a/gnu_regex/.svn/text-base/regex.c.svn-base b/gnu_regex/.svn/text-base/regex.c.svn-base
new file mode 100644
index 0000000..bec9f9d
--- /dev/null
+++ b/gnu_regex/.svn/text-base/regex.c.svn-base
@@ -0,0 +1,74 @@
+/* Extended regular expression matching and search library.
+ Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+/* Make sure noone compiles this code with a C++ compiler. */
+#ifdef __cplusplus
+# error "This is C code, use a C compiler"
+#endif
+
+#ifdef _LIBC
+/* We have to keep the namespace clean. */
+# define regfree(preg) __regfree (preg)
+# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
+# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
+# define regerror(errcode, preg, errbuf, errbuf_size) \
+ __regerror(errcode, preg, errbuf, errbuf_size)
+# define re_set_registers(bu, re, nu, st, en) \
+ __re_set_registers (bu, re, nu, st, en)
+# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
+ __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
+# define re_match(bufp, string, size, pos, regs) \
+ __re_match (bufp, string, size, pos, regs)
+# define re_search(bufp, string, size, startpos, range, regs) \
+ __re_search (bufp, string, size, startpos, range, regs)
+# define re_compile_pattern(pattern, length, bufp) \
+ __re_compile_pattern (pattern, length, bufp)
+# define re_set_syntax(syntax) __re_set_syntax (syntax)
+# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
+ __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
+# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
+
+# include "../locale/localeinfo.h"
+#endif
+
+/* On some systems, limits.h sets RE_DUP_MAX to a lower value than
+ GNU regex allows. Include it before <regex.h>, which correctly
+ #undefs RE_DUP_MAX and sets it to the right value. */
+#include <limits.h>
+
+#include "regex.h"
+#include "regex_internal.h"
+
+#include "regex_internal.c"
+#include "regcomp.c"
+#include "regexec.c"
+
+/* Binary backward compatibility. */
+#if _LIBC
+# include <shlib-compat.h>
+# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3)
+link_warning (re_max_failures, "the 're_max_failures' variable is obsolete and will go away.")
+int re_max_failures = 2000;
+# endif
+#endif
diff --git a/gnu_regex/.svn/text-base/regex.h.svn-base b/gnu_regex/.svn/text-base/regex.h.svn-base
new file mode 100644
index 0000000..2132772
--- /dev/null
+++ b/gnu_regex/.svn/text-base/regex.h.svn-base
@@ -0,0 +1,575 @@
+/* Definitions for data structures and routines for the regular
+ expression library.
+ Copyright (C) 1985,1989-93,1995-98,2000,2001,2002,2003,2005,2006,2008
+ Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifndef _REGEX_H
+#define _REGEX_H 1
+
+#include <sys/types.h>
+
+/* Allow the use in C++ code. */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* The following two types have to be signed and unsigned integer type
+ wide enough to hold a value of a pointer. For most ANSI compilers
+ ptrdiff_t and size_t should be likely OK. Still size of these two
+ types is 2 for Microsoft C. Ugh... */
+typedef long int s_reg_t;
+typedef unsigned long int active_reg_t;
+
+/* The following bits are used to determine the regexp syntax we
+ recognize. The set/not-set meanings are chosen so that Emacs syntax
+ remains the value 0. The bits are given in alphabetical order, and
+ the definitions shifted by one from the previous bit; thus, when we
+ add or remove a bit, only one other definition need change. */
+typedef unsigned long int reg_syntax_t;
+
+#ifdef __USE_GNU
+/* If this bit is not set, then \ inside a bracket expression is literal.
+ If set, then such a \ quotes the following character. */
+# define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1)
+
+/* If this bit is not set, then + and ? are operators, and \+ and \? are
+ literals.
+ If set, then \+ and \? are operators and + and ? are literals. */
+# define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
+
+/* If this bit is set, then character classes are supported. They are:
+ [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
+ [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
+ If not set, then character classes are not supported. */
+# define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
+
+/* If this bit is set, then ^ and $ are always anchors (outside bracket
+ expressions, of course).
+ If this bit is not set, then it depends:
+ ^ is an anchor if it is at the beginning of a regular
+ expression or after an open-group or an alternation operator;
+ $ is an anchor if it is at the end of a regular expression, or
+ before a close-group or an alternation operator.
+
+ This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
+ POSIX draft 11.2 says that * etc. in leading positions is undefined.
+ We already implemented a previous draft which made those constructs
+ invalid, though, so we haven't changed the code back. */
+# define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
+
+/* If this bit is set, then special characters are always special
+ regardless of where they are in the pattern.
+ If this bit is not set, then special characters are special only in
+ some contexts; otherwise they are ordinary. Specifically,
+ * + ? and intervals are only special when not after the beginning,
+ open-group, or alternation operator. */
+# define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
+
+/* If this bit is set, then *, +, ?, and { cannot be first in an re or
+ immediately after an alternation or begin-group operator. */
+# define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
+
+/* If this bit is set, then . matches newline.
+ If not set, then it doesn't. */
+# define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
+
+/* If this bit is set, then . doesn't match NUL.
+ If not set, then it does. */
+# define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
+
+/* If this bit is set, nonmatching lists [^...] do not match newline.
+ If not set, they do. */
+# define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
+
+/* If this bit is set, either \{...\} or {...} defines an
+ interval, depending on RE_NO_BK_BRACES.
+ If not set, \{, \}, {, and } are literals. */
+# define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
+
+/* If this bit is set, +, ? and | aren't recognized as operators.
+ If not set, they are. */
+# define RE_LIMITED_OPS (RE_INTERVALS << 1)
+
+/* If this bit is set, newline is an alternation operator.
+ If not set, newline is literal. */
+# define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
+
+/* If this bit is set, then `{...}' defines an interval, and \{ and \}
+ are literals.
+ If not set, then `\{...\}' defines an interval. */
+# define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
+
+/* If this bit is set, (...) defines a group, and \( and \) are literals.
+ If not set, \(...\) defines a group, and ( and ) are literals. */
+# define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
+
+/* If this bit is set, then \<digit> matches <digit>.
+ If not set, then \<digit> is a back-reference. */
+# define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
+
+/* If this bit is set, then | is an alternation operator, and \| is literal.
+ If not set, then \| is an alternation operator, and | is literal. */
+# define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
+
+/* If this bit is set, then an ending range point collating higher
+ than the starting range point, as in [z-a], is invalid.
+ If not set, then when ending range point collates higher than the
+ starting range point, the range is ignored. */
+# define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
+
+/* If this bit is set, then an unmatched ) is ordinary.
+ If not set, then an unmatched ) is invalid. */
+# define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
+
+/* If this bit is set, succeed as soon as we match the whole pattern,
+ without further backtracking. */
+# define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
+
+/* If this bit is set, do not process the GNU regex operators.
+ If not set, then the GNU regex operators are recognized. */
+# define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1)
+
+/* If this bit is set, turn on internal regex debugging.
+ If not set, and debugging was on, turn it off.
+ This only works if regex.c is compiled -DDEBUG.
+ We define this bit always, so that all that's needed to turn on
+ debugging is to recompile regex.c; the calling code can always have
+ this bit set, and it won't affect anything in the normal case. */
+# define RE_DEBUG (RE_NO_GNU_OPS << 1)
+
+/* If this bit is set, a syntactically invalid interval is treated as
+ a string of ordinary characters. For example, the ERE 'a{1' is
+ treated as 'a\{1'. */
+# define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1)
+
+/* If this bit is set, then ignore case when matching.
+ If not set, then case is significant. */
+# define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1)
+
+/* This bit is used internally like RE_CONTEXT_INDEP_ANCHORS but only
+ for ^, because it is difficult to scan the regex backwards to find
+ whether ^ should be special. */
+# define RE_CARET_ANCHORS_HERE (RE_ICASE << 1)
+
+/* If this bit is set, then \{ cannot be first in an bre or
+ immediately after an alternation or begin-group operator. */
+# define RE_CONTEXT_INVALID_DUP (RE_CARET_ANCHORS_HERE << 1)
+
+/* If this bit is set, then no_sub will be set to 1 during
+ re_compile_pattern. */
+# define RE_NO_SUB (RE_CONTEXT_INVALID_DUP << 1)
+#endif
+
+/* This global variable defines the particular regexp syntax to use (for
+ some interfaces). When a regexp is compiled, the syntax used is
+ stored in the pattern buffer, so changing this does not affect
+ already-compiled regexps. */
+extern reg_syntax_t re_syntax_options;
+
+#ifdef __USE_GNU
+/* Define combinations of the above bits for the standard possibilities.
+ (The [[[ comments delimit what gets put into the Texinfo file, so
+ don't delete them!) */
+/* [[[begin syntaxes]]] */
+#define RE_SYNTAX_EMACS 0
+
+#define RE_SYNTAX_AWK \
+ (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \
+ | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \
+ | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
+
+#define RE_SYNTAX_GNU_AWK \
+ ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG) \
+ & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS \
+ | RE_CONTEXT_INVALID_OPS ))
+
+#define RE_SYNTAX_POSIX_AWK \
+ (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \
+ | RE_INTERVALS | RE_NO_GNU_OPS)
+
+#define RE_SYNTAX_GREP \
+ (RE_BK_PLUS_QM | RE_CHAR_CLASSES \
+ | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \
+ | RE_NEWLINE_ALT)
+
+#define RE_SYNTAX_EGREP \
+ (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \
+ | RE_NEWLINE_ALT | RE_NO_BK_PARENS \
+ | RE_NO_BK_VBAR)
+
+#define RE_SYNTAX_POSIX_EGREP \
+ (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES \
+ | RE_INVALID_INTERVAL_ORD)
+
+/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
+#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
+
+#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
+
+/* Syntax bits common to both basic and extended POSIX regex syntax. */
+#define _RE_SYNTAX_POSIX_COMMON \
+ (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \
+ | RE_INTERVALS | RE_NO_EMPTY_RANGES)
+
+#define RE_SYNTAX_POSIX_BASIC \
+ (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM | RE_CONTEXT_INVALID_DUP)
+
+/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
+ RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this
+ isn't minimal, since other operators, such as \`, aren't disabled. */
+#define RE_SYNTAX_POSIX_MINIMAL_BASIC \
+ (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
+
+#define RE_SYNTAX_POSIX_EXTENDED \
+ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \
+ | RE_NO_BK_PARENS | RE_NO_BK_VBAR \
+ | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD)
+
+/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is
+ removed and RE_NO_BK_REFS is added. */
+#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \
+ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \
+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \
+ | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD)
+/* [[[end syntaxes]]] */
+
+/* Maximum number of duplicates an interval can allow. Some systems
+ (erroneously) define this in other header files, but we want our
+ value, so remove any previous define. */
+# ifdef RE_DUP_MAX
+# undef RE_DUP_MAX
+# endif
+/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows. */
+# define RE_DUP_MAX (0x7fff)
+#endif
+
+
+/* POSIX `cflags' bits (i.e., information for `regcomp'). */
+
+/* If this bit is set, then use extended regular expression syntax.
+ If not set, then use basic regular expression syntax. */
+#define REG_EXTENDED 1
+
+/* If this bit is set, then ignore case when matching.
+ If not set, then case is significant. */
+#define REG_ICASE (REG_EXTENDED << 1)
+
+/* If this bit is set, then anchors do not match at newline
+ characters in the string.
+ If not set, then anchors do match at newlines. */
+#define REG_NEWLINE (REG_ICASE << 1)
+
+/* If this bit is set, then report only success or fail in regexec.
+ If not set, then returns differ between not matching and errors. */
+#define REG_NOSUB (REG_NEWLINE << 1)
+
+
+/* POSIX `eflags' bits (i.e., information for regexec). */
+
+/* If this bit is set, then the beginning-of-line operator doesn't match
+ the beginning of the string (presumably because it's not the
+ beginning of a line).
+ If not set, then the beginning-of-line operator does match the
+ beginning of the string. */
+#define REG_NOTBOL 1
+
+/* Like REG_NOTBOL, except for the end-of-line. */
+#define REG_NOTEOL (1 << 1)
+
+/* Use PMATCH[0] to delimit the start and end of the search in the
+ buffer. */
+#define REG_STARTEND (1 << 2)
+
+
+/* If any error codes are removed, changed, or added, update the
+ `re_error_msg' table in regex.c. */
+typedef enum
+{
+#if defined _XOPEN_SOURCE || defined __USE_XOPEN2K
+ REG_ENOSYS = -1, /* This will never happen for this implementation. */
+#endif
+
+ REG_NOERROR = 0, /* Success. */
+ REG_NOMATCH, /* Didn't find a match (for regexec). */
+
+ /* POSIX regcomp return error codes. (In the order listed in the
+ standard.) */
+ REG_BADPAT, /* Invalid pattern. */
+ REG_ECOLLATE, /* Inalid collating element. */
+ REG_ECTYPE, /* Invalid character class name. */
+ REG_EESCAPE, /* Trailing backslash. */
+ REG_ESUBREG, /* Invalid back reference. */
+ REG_EBRACK, /* Unmatched left bracket. */
+ REG_EPAREN, /* Parenthesis imbalance. */
+ REG_EBRACE, /* Unmatched \{. */
+ REG_BADBR, /* Invalid contents of \{\}. */
+ REG_ERANGE, /* Invalid range end. */
+ REG_ESPACE, /* Ran out of memory. */
+ REG_BADRPT, /* No preceding re for repetition op. */
+
+ /* Error codes we've added. */
+ REG_EEND, /* Premature end. */
+ REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */
+ REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */
+} reg_errcode_t;
+
+/* This data structure represents a compiled pattern. Before calling
+ the pattern compiler, the fields `buffer', `allocated', `fastmap',
+ `translate', and `no_sub' can be set. After the pattern has been
+ compiled, the `re_nsub' field is available. All other fields are
+ private to the regex routines. */
+
+#ifndef RE_TRANSLATE_TYPE
+# define __RE_TRANSLATE_TYPE unsigned char *
+# ifdef __USE_GNU
+# define RE_TRANSLATE_TYPE __RE_TRANSLATE_TYPE
+# endif
+#endif
+
+#ifdef __USE_GNU
+# define __REPB_PREFIX(name) name
+#else
+# define __REPB_PREFIX(name) __##name
+#endif
+
+struct re_pattern_buffer
+{
+ /* Space that holds the compiled pattern. It is declared as
+ `unsigned char *' because its elements are sometimes used as
+ array indexes. */
+ unsigned char *__REPB_PREFIX(buffer);
+
+ /* Number of bytes to which `buffer' points. */
+ unsigned long int __REPB_PREFIX(allocated);
+
+ /* Number of bytes actually used in `buffer'. */
+ unsigned long int __REPB_PREFIX(used);
+
+ /* Syntax setting with which the pattern was compiled. */
+ reg_syntax_t __REPB_PREFIX(syntax);
+
+ /* Pointer to a fastmap, if any, otherwise zero. re_search uses the
+ fastmap, if there is one, to skip over impossible starting points
+ for matches. */
+ char *__REPB_PREFIX(fastmap);
+
+ /* Either a translate table to apply to all characters before
+ comparing them, or zero for no translation. The translation is
+ applied to a pattern when it is compiled and to a string when it
+ is matched. */
+ __RE_TRANSLATE_TYPE __REPB_PREFIX(translate);
+
+ /* Number of subexpressions found by the compiler. */
+ size_t re_nsub;
+
+ /* Zero if this pattern cannot match the empty string, one else.
+ Well, in truth it's used only in `re_search_2', to see whether or
+ not we should use the fastmap, so we don't set this absolutely
+ perfectly; see `re_compile_fastmap' (the `duplicate' case). */
+ unsigned __REPB_PREFIX(can_be_null) : 1;
+
+ /* If REGS_UNALLOCATED, allocate space in the `regs' structure
+ for `max (RE_NREGS, re_nsub + 1)' groups.
+ If REGS_REALLOCATE, reallocate space if necessary.
+ If REGS_FIXED, use what's there. */
+#ifdef __USE_GNU
+# define REGS_UNALLOCATED 0
+# define REGS_REALLOCATE 1
+# define REGS_FIXED 2
+#endif
+ unsigned __REPB_PREFIX(regs_allocated) : 2;
+
+ /* Set to zero when `regex_compile' compiles a pattern; set to one
+ by `re_compile_fastmap' if it updates the fastmap. */
+ unsigned __REPB_PREFIX(fastmap_accurate) : 1;
+
+ /* If set, `re_match_2' does not return information about
+ subexpressions. */
+ unsigned __REPB_PREFIX(no_sub) : 1;
+
+ /* If set, a beginning-of-line anchor doesn't match at the beginning
+ of the string. */
+ unsigned __REPB_PREFIX(not_bol) : 1;
+
+ /* Similarly for an end-of-line anchor. */
+ unsigned __REPB_PREFIX(not_eol) : 1;
+
+ /* If true, an anchor at a newline matches. */
+ unsigned __REPB_PREFIX(newline_anchor) : 1;
+};
+
+typedef struct re_pattern_buffer regex_t;
+
+/* Type for byte offsets within the string. POSIX mandates this. */
+typedef int regoff_t;
+
+
+#ifdef __USE_GNU
+/* This is the structure we store register match data in. See
+ regex.texinfo for a full description of what registers match. */
+struct re_registers
+{
+ unsigned num_regs;
+ regoff_t *start;
+ regoff_t *end;
+};
+
+
+/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
+ `re_match_2' returns information about at least this many registers
+ the first time a `regs' structure is passed. */
+# ifndef RE_NREGS
+# define RE_NREGS 30
+# endif
+#endif
+
+
+/* POSIX specification for registers. Aside from the different names than
+ `re_registers', POSIX uses an array of structures, instead of a
+ structure of arrays. */
+typedef struct
+{
+ regoff_t rm_so; /* Byte offset from string's start to substring's start. */
+ regoff_t rm_eo; /* Byte offset from string's start to substring's end. */
+} regmatch_t;
+
+/* Declarations for routines. */
+
+#ifdef __USE_GNU
+/* Sets the current default syntax to SYNTAX, and return the old syntax.
+ You can also simply assign to the `re_syntax_options' variable. */
+extern reg_syntax_t re_set_syntax (reg_syntax_t __syntax);
+
+/* Compile the regular expression PATTERN, with length LENGTH
+ and syntax given by the global `re_syntax_options', into the buffer
+ BUFFER. Return NULL if successful, and an error string if not. */
+extern const char *re_compile_pattern (const char *__pattern, size_t __length,
+ struct re_pattern_buffer *__buffer);
+
+
+/* Compile a fastmap for the compiled pattern in BUFFER; used to
+ accelerate searches. Return 0 if successful and -2 if was an
+ internal error. */
+extern int re_compile_fastmap (struct re_pattern_buffer *__buffer);
+
+
+/* Search in the string STRING (with length LENGTH) for the pattern
+ compiled into BUFFER. Start searching at position START, for RANGE
+ characters. Return the starting position of the match, -1 for no
+ match, or -2 for an internal error. Also return register
+ information in REGS (if REGS and BUFFER->no_sub are nonzero). */
+extern int re_search (struct re_pattern_buffer *__buffer, const char *__string,
+ int __length, int __start, int __range,
+ struct re_registers *__regs);
+
+
+/* Like `re_search', but search in the concatenation of STRING1 and
+ STRING2. Also, stop searching at index START + STOP. */
+extern int re_search_2 (struct re_pattern_buffer *__buffer,
+ const char *__string1, int __length1,
+ const char *__string2, int __length2, int __start,
+ int __range, struct re_registers *__regs, int __stop);
+
+
+/* Like `re_search', but return how many characters in STRING the regexp
+ in BUFFER matched, starting at position START. */
+extern int re_match (struct re_pattern_buffer *__buffer, const char *__string,
+ int __length, int __start, struct re_registers *__regs);
+
+
+/* Relates to `re_match' as `re_search_2' relates to `re_search'. */
+extern int re_match_2 (struct re_pattern_buffer *__buffer,
+ const char *__string1, int __length1,
+ const char *__string2, int __length2, int __start,
+ struct re_registers *__regs, int __stop);
+
+
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+ ENDS. Subsequent matches using BUFFER and REGS will use this memory
+ for recording register information. STARTS and ENDS must be
+ allocated with malloc, and must each be at least `NUM_REGS * sizeof
+ (regoff_t)' bytes long.
+
+ If NUM_REGS == 0, then subsequent matches should allocate their own
+ register data.
+
+ Unless this function is called, the first search or match using
+ PATTERN_BUFFER will allocate its own register data, without
+ freeing the old data. */
+extern void re_set_registers (struct re_pattern_buffer *__buffer,
+ struct re_registers *__regs,
+ unsigned int __num_regs,
+ regoff_t *__starts, regoff_t *__ends);
+#endif /* Use GNU */
+
+#if defined _REGEX_RE_COMP || (defined _LIBC && defined __USE_BSD)
+# ifndef _CRAY
+/* 4.2 bsd compatibility. */
+extern char *re_comp (const char *);
+extern int re_exec (const char *);
+# endif
+#endif
+
+/* GCC 2.95 and later have "__restrict"; C99 compilers have
+ "restrict", and "configure" may have defined "restrict". */
+#ifndef __restrict
+# if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__))
+# if defined restrict || 199901L <= __STDC_VERSION__
+# define __restrict restrict
+# else
+# define __restrict
+# endif
+# endif
+#endif
+/* gcc 3.1 and up support the [restrict] syntax. */
+#ifndef __restrict_arr
+# if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) \
+ && !defined __GNUG__
+# define __restrict_arr __restrict
+# else
+# define __restrict_arr
+# endif
+#endif
+
+/* POSIX compatibility. */
+extern int regcomp (regex_t *__restrict __preg,
+ const char *__restrict __pattern,
+ int __cflags);
+
+extern int regexec (const regex_t *__restrict __preg,
+ const char *__restrict __string, size_t __nmatch,
+ regmatch_t __pmatch[__restrict_arr],
+ int __eflags);
+
+extern size_t regerror (int __errcode, const regex_t *__restrict __preg,
+ char *__restrict __errbuf, size_t __errbuf_size);
+
+extern void regfree (regex_t *__preg);
+
+
+#ifdef __cplusplus
+}
+#endif /* C++ */
+
+#endif /* regex.h */
diff --git a/gnu_regex/.svn/text-base/regex_internal.c.svn-base b/gnu_regex/.svn/text-base/regex_internal.c.svn-base
new file mode 100644
index 0000000..c9da2b9
--- /dev/null
+++ b/gnu_regex/.svn/text-base/regex_internal.c.svn-base
@@ -0,0 +1,1713 @@
+/* Extended regular expression matching and search library.
+ Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+static void re_string_construct_common (const char *str, int len,
+ re_string_t *pstr,
+ RE_TRANSLATE_TYPE trans, int icase,
+ const re_dfa_t *dfa) internal_function;
+static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
+ const re_node_set *nodes,
+ unsigned int hash) internal_function;
+static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
+ const re_node_set *nodes,
+ unsigned int context,
+ unsigned int hash) internal_function;
+
+/* Functions for string operation. */
+
+/* This function allocate the buffers. It is necessary to call
+ re_string_reconstruct before using the object. */
+
+static reg_errcode_t
+internal_function
+re_string_allocate (re_string_t *pstr, const char *str, int len, int init_len,
+ RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa)
+{
+ reg_errcode_t ret;
+ int init_buf_len;
+
+ /* Ensure at least one character fits into the buffers. */
+ if (init_len < dfa->mb_cur_max)
+ init_len = dfa->mb_cur_max;
+ init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
+ re_string_construct_common (str, len, pstr, trans, icase, dfa);
+
+ ret = re_string_realloc_buffers (pstr, init_buf_len);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+
+ pstr->word_char = dfa->word_char;
+ pstr->word_ops_used = dfa->word_ops_used;
+ pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
+ pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
+ pstr->valid_raw_len = pstr->valid_len;
+ return REG_NOERROR;
+}
+
+/* This function allocate the buffers, and initialize them. */
+
+static reg_errcode_t
+internal_function
+re_string_construct (re_string_t *pstr, const char *str, int len,
+ RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa)
+{
+ reg_errcode_t ret;
+ memset (pstr, '\0', sizeof (re_string_t));
+ re_string_construct_common (str, len, pstr, trans, icase, dfa);
+
+ if (len > 0)
+ {
+ ret = re_string_realloc_buffers (pstr, len + 1);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+ }
+ pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
+
+ if (icase)
+ {
+#ifdef RE_ENABLE_I18N
+ if (dfa->mb_cur_max > 1)
+ {
+ while (1)
+ {
+ ret = build_wcs_upper_buffer (pstr);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+ if (pstr->valid_raw_len >= len)
+ break;
+ if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
+ break;
+ ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+ }
+ }
+ else
+#endif /* RE_ENABLE_I18N */
+ build_upper_buffer (pstr);
+ }
+ else
+ {
+#ifdef RE_ENABLE_I18N
+ if (dfa->mb_cur_max > 1)
+ build_wcs_buffer (pstr);
+ else
+#endif /* RE_ENABLE_I18N */
+ {
+ if (trans != NULL)
+ re_string_translate_buffer (pstr);
+ else
+ {
+ pstr->valid_len = pstr->bufs_len;
+ pstr->valid_raw_len = pstr->bufs_len;
+ }
+ }
+ }
+
+ return REG_NOERROR;
+}
+
+/* Helper functions for re_string_allocate, and re_string_construct. */
+
+static reg_errcode_t
+internal_function
+re_string_realloc_buffers (re_string_t *pstr, int new_buf_len)
+{
+#ifdef RE_ENABLE_I18N
+ if (pstr->mb_cur_max > 1)
+ {
+ wint_t *new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
+ if (BE (new_wcs == NULL, 0))
+ return REG_ESPACE;
+ pstr->wcs = new_wcs;
+ if (pstr->offsets != NULL)
+ {
+ int *new_offsets = re_realloc (pstr->offsets, int, new_buf_len);
+ if (BE (new_offsets == NULL, 0))
+ return REG_ESPACE;
+ pstr->offsets = new_offsets;
+ }
+ }
+#endif /* RE_ENABLE_I18N */
+ if (pstr->mbs_allocated)
+ {
+ unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
+ new_buf_len);
+ if (BE (new_mbs == NULL, 0))
+ return REG_ESPACE;
+ pstr->mbs = new_mbs;
+ }
+ pstr->bufs_len = new_buf_len;
+ return REG_NOERROR;
+}
+
+
+static void
+internal_function
+re_string_construct_common (const char *str, int len, re_string_t *pstr,
+ RE_TRANSLATE_TYPE trans, int icase,
+ const re_dfa_t *dfa)
+{
+ pstr->raw_mbs = (const unsigned char *) str;
+ pstr->len = len;
+ pstr->raw_len = len;
+ pstr->trans = trans;
+ pstr->icase = icase ? 1 : 0;
+ pstr->mbs_allocated = (trans != NULL || icase);
+ pstr->mb_cur_max = dfa->mb_cur_max;
+ pstr->is_utf8 = dfa->is_utf8;
+ pstr->map_notascii = dfa->map_notascii;
+ pstr->stop = pstr->len;
+ pstr->raw_stop = pstr->stop;
+}
+
+#ifdef RE_ENABLE_I18N
+
+/* Build wide character buffer PSTR->WCS.
+ If the byte sequence of the string are:
+ <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
+ Then wide character buffer will be:
+ <wc1> , WEOF , <wc2> , WEOF , <wc3>
+ We use WEOF for padding, they indicate that the position isn't
+ a first byte of a multibyte character.
+
+ Note that this function assumes PSTR->VALID_LEN elements are already
+ built and starts from PSTR->VALID_LEN. */
+
+static void
+internal_function
+build_wcs_buffer (re_string_t *pstr)
+{
+#ifdef _LIBC
+ unsigned char buf[MB_LEN_MAX];
+ assert (MB_LEN_MAX >= pstr->mb_cur_max);
+#else
+ unsigned char buf[64];
+#endif
+ mbstate_t prev_st;
+ int byte_idx, end_idx, remain_len;
+ size_t mbclen;
+
+ /* Build the buffers from pstr->valid_len to either pstr->len or
+ pstr->bufs_len. */
+ end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
+ for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
+ {
+ wchar_t wc;
+ const char *p;
+
+ remain_len = end_idx - byte_idx;
+ prev_st = pstr->cur_state;
+ /* Apply the translation if we need. */
+ if (BE (pstr->trans != NULL, 0))
+ {
+ int i, ch;
+
+ for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
+ {
+ ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
+ buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
+ }
+ p = (const char *) buf;
+ }
+ else
+ p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
+ mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
+ if (BE (mbclen == (size_t) -2, 0))
+ {
+ /* The buffer doesn't have enough space, finish to build. */
+ pstr->cur_state = prev_st;
+ break;
+ }
+ else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
+ {
+ /* We treat these cases as a singlebyte character. */
+ mbclen = 1;
+ wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
+ if (BE (pstr->trans != NULL, 0))
+ wc = pstr->trans[wc];
+ pstr->cur_state = prev_st;
+ }
+
+ /* Write wide character and padding. */
+ pstr->wcs[byte_idx++] = wc;
+ /* Write paddings. */
+ for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
+ pstr->wcs[byte_idx++] = WEOF;
+ }
+ pstr->valid_len = byte_idx;
+ pstr->valid_raw_len = byte_idx;
+}
+
+/* Build wide character buffer PSTR->WCS like build_wcs_buffer,
+ but for REG_ICASE. */
+
+static reg_errcode_t
+internal_function
+build_wcs_upper_buffer (re_string_t *pstr)
+{
+ mbstate_t prev_st;
+ int src_idx, byte_idx, end_idx, remain_len;
+ size_t mbclen;
+#ifdef _LIBC
+ char buf[MB_LEN_MAX];
+ assert (MB_LEN_MAX >= pstr->mb_cur_max);
+#else
+ char buf[64];
+#endif
+
+ byte_idx = pstr->valid_len;
+ end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
+
+ /* The following optimization assumes that ASCII characters can be
+ mapped to wide characters with a simple cast. */
+ if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
+ {
+ while (byte_idx < end_idx)
+ {
+ wchar_t wc;
+
+ if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
+ && mbsinit (&pstr->cur_state))
+ {
+ /* In case of a singlebyte character. */
+ pstr->mbs[byte_idx]
+ = toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
+ /* The next step uses the assumption that wchar_t is encoded
+ ASCII-safe: all ASCII values can be converted like this. */
+ pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
+ ++byte_idx;
+ continue;
+ }
+
+ remain_len = end_idx - byte_idx;
+ prev_st = pstr->cur_state;
+ mbclen = __mbrtowc (&wc,
+ ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
+ + byte_idx), remain_len, &pstr->cur_state);
+ if (BE (mbclen + 2 > 2, 1))
+ {
+ wchar_t wcu = wc;
+ if (iswlower (wc))
+ {
+ size_t mbcdlen;
+
+ wcu = towupper (wc);
+ mbcdlen = wcrtomb (buf, wcu, &prev_st);
+ if (BE (mbclen == mbcdlen, 1))
+ memcpy (pstr->mbs + byte_idx, buf, mbclen);
+ else
+ {
+ src_idx = byte_idx;
+ goto offsets_needed;
+ }
+ }
+ else
+ memcpy (pstr->mbs + byte_idx,
+ pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
+ pstr->wcs[byte_idx++] = wcu;
+ /* Write paddings. */
+ for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
+ pstr->wcs[byte_idx++] = WEOF;
+ }
+ else if (mbclen == (size_t) -1 || mbclen == 0)
+ {
+ /* It is an invalid character or '\0'. Just use the byte. */
+ int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
+ pstr->mbs[byte_idx] = ch;
+ /* And also cast it to wide char. */
+ pstr->wcs[byte_idx++] = (wchar_t) ch;
+ if (BE (mbclen == (size_t) -1, 0))
+ pstr->cur_state = prev_st;
+ }
+ else
+ {
+ /* The buffer doesn't have enough space, finish to build. */
+ pstr->cur_state = prev_st;
+ break;
+ }
+ }
+ pstr->valid_len = byte_idx;
+ pstr->valid_raw_len = byte_idx;
+ return REG_NOERROR;
+ }
+ else
+ for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
+ {
+ wchar_t wc;
+ const char *p;
+ offsets_needed:
+ remain_len = end_idx - byte_idx;
+ prev_st = pstr->cur_state;
+ if (BE (pstr->trans != NULL, 0))
+ {
+ int i, ch;
+
+ for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
+ {
+ ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
+ buf[i] = pstr->trans[ch];
+ }
+ p = (const char *) buf;
+ }
+ else
+ p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
+ mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
+ if (BE (mbclen + 2 > 2, 1))
+ {
+ wchar_t wcu = wc;
+ if (iswlower (wc))
+ {
+ size_t mbcdlen;
+
+ wcu = towupper (wc);
+ mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
+ if (BE (mbclen == mbcdlen, 1))
+ memcpy (pstr->mbs + byte_idx, buf, mbclen);
+ else if (mbcdlen != (size_t) -1)
+ {
+ size_t i;
+
+ if (byte_idx + mbcdlen > pstr->bufs_len)
+ {
+ pstr->cur_state = prev_st;
+ break;
+ }
+
+ if (pstr->offsets == NULL)
+ {
+ pstr->offsets = re_malloc (int, pstr->bufs_len);
+
+ if (pstr->offsets == NULL)
+ return REG_ESPACE;
+ }
+ if (!pstr->offsets_needed)
+ {
+ for (i = 0; i < (size_t) byte_idx; ++i)
+ pstr->offsets[i] = i;
+ pstr->offsets_needed = 1;
+ }
+
+ memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
+ pstr->wcs[byte_idx] = wcu;
+ pstr->offsets[byte_idx] = src_idx;
+ for (i = 1; i < mbcdlen; ++i)
+ {
+ pstr->offsets[byte_idx + i]
+ = src_idx + (i < mbclen ? i : mbclen - 1);
+ pstr->wcs[byte_idx + i] = WEOF;
+ }
+ pstr->len += mbcdlen - mbclen;
+ if (pstr->raw_stop > src_idx)
+ pstr->stop += mbcdlen - mbclen;
+ end_idx = (pstr->bufs_len > pstr->len)
+ ? pstr->len : pstr->bufs_len;
+ byte_idx += mbcdlen;
+ src_idx += mbclen;
+ continue;
+ }
+ else
+ memcpy (pstr->mbs + byte_idx, p, mbclen);
+ }
+ else
+ memcpy (pstr->mbs + byte_idx, p, mbclen);
+
+ if (BE (pstr->offsets_needed != 0, 0))
+ {
+ size_t i;
+ for (i = 0; i < mbclen; ++i)
+ pstr->offsets[byte_idx + i] = src_idx + i;
+ }
+ src_idx += mbclen;
+
+ pstr->wcs[byte_idx++] = wcu;
+ /* Write paddings. */
+ for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
+ pstr->wcs[byte_idx++] = WEOF;
+ }
+ else if (mbclen == (size_t) -1 || mbclen == 0)
+ {
+ /* It is an invalid character or '\0'. Just use the byte. */
+ int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
+
+ if (BE (pstr->trans != NULL, 0))
+ ch = pstr->trans [ch];
+ pstr->mbs[byte_idx] = ch;
+
+ if (BE (pstr->offsets_needed != 0, 0))
+ pstr->offsets[byte_idx] = src_idx;
+ ++src_idx;
+
+ /* And also cast it to wide char. */
+ pstr->wcs[byte_idx++] = (wchar_t) ch;
+ if (BE (mbclen == (size_t) -1, 0))
+ pstr->cur_state = prev_st;
+ }
+ else
+ {
+ /* The buffer doesn't have enough space, finish to build. */
+ pstr->cur_state = prev_st;
+ break;
+ }
+ }
+ pstr->valid_len = byte_idx;
+ pstr->valid_raw_len = src_idx;
+ return REG_NOERROR;
+}
+
+/* Skip characters until the index becomes greater than NEW_RAW_IDX.
+ Return the index. */
+
+static int
+internal_function
+re_string_skip_chars (re_string_t *pstr, int new_raw_idx, wint_t *last_wc)
+{
+ mbstate_t prev_st;
+ int rawbuf_idx;
+ size_t mbclen;
+ wchar_t wc = WEOF;
+
+ /* Skip the characters which are not necessary to check. */
+ for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
+ rawbuf_idx < new_raw_idx;)
+ {
+ int remain_len;
+ remain_len = pstr->len - rawbuf_idx;
+ prev_st = pstr->cur_state;
+ mbclen = __mbrtowc (&wc, (const char *) pstr->raw_mbs + rawbuf_idx,
+ remain_len, &pstr->cur_state);
+ if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
+ {
+ /* We treat these cases as a single byte character. */
+ if (mbclen == 0 || remain_len == 0)
+ wc = L'\0';
+ else
+ wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx);
+ mbclen = 1;
+ pstr->cur_state = prev_st;
+ }
+ /* Then proceed the next character. */
+ rawbuf_idx += mbclen;
+ }
+ *last_wc = (wint_t) wc;
+ return rawbuf_idx;
+}
+#endif /* RE_ENABLE_I18N */
+
+/* Build the buffer PSTR->MBS, and apply the translation if we need.
+ This function is used in case of REG_ICASE. */
+
+static void
+internal_function
+build_upper_buffer (re_string_t *pstr)
+{
+ int char_idx, end_idx;
+ end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
+
+ for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
+ {
+ int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
+ if (BE (pstr->trans != NULL, 0))
+ ch = pstr->trans[ch];
+ if (islower (ch))
+ pstr->mbs[char_idx] = toupper (ch);
+ else
+ pstr->mbs[char_idx] = ch;
+ }
+ pstr->valid_len = char_idx;
+ pstr->valid_raw_len = char_idx;
+}
+
+/* Apply TRANS to the buffer in PSTR. */
+
+static void
+internal_function
+re_string_translate_buffer (re_string_t *pstr)
+{
+ int buf_idx, end_idx;
+ end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
+
+ for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
+ {
+ int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
+ pstr->mbs[buf_idx] = pstr->trans[ch];
+ }
+
+ pstr->valid_len = buf_idx;
+ pstr->valid_raw_len = buf_idx;
+}
+
+/* This function re-construct the buffers.
+ Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
+ convert to upper case in case of REG_ICASE, apply translation. */
+
+static reg_errcode_t
+internal_function
+re_string_reconstruct (re_string_t *pstr, int idx, int eflags)
+{
+ int offset = idx - pstr->raw_mbs_idx;
+ if (BE (offset < 0, 0))
+ {
+ /* Reset buffer. */
+#ifdef RE_ENABLE_I18N
+ if (pstr->mb_cur_max > 1)
+ memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
+#endif /* RE_ENABLE_I18N */
+ pstr->len = pstr->raw_len;
+ pstr->stop = pstr->raw_stop;
+ pstr->valid_len = 0;
+ pstr->raw_mbs_idx = 0;
+ pstr->valid_raw_len = 0;
+ pstr->offsets_needed = 0;
+ pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
+ : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
+ if (!pstr->mbs_allocated)
+ pstr->mbs = (unsigned char *) pstr->raw_mbs;
+ offset = idx;
+ }
+
+ if (BE (offset != 0, 1))
+ {
+ /* Should the already checked characters be kept? */
+ if (BE (offset < pstr->valid_raw_len, 1))
+ {
+ /* Yes, move them to the front of the buffer. */
+#ifdef RE_ENABLE_I18N
+ if (BE (pstr->offsets_needed, 0))
+ {
+ int low = 0, high = pstr->valid_len, mid;
+ do
+ {
+ mid = (high + low) / 2;
+ if (pstr->offsets[mid] > offset)
+ high = mid;
+ else if (pstr->offsets[mid] < offset)
+ low = mid + 1;
+ else
+ break;
+ }
+ while (low < high);
+ if (pstr->offsets[mid] < offset)
+ ++mid;
+ pstr->tip_context = re_string_context_at (pstr, mid - 1,
+ eflags);
+ /* This can be quite complicated, so handle specially
+ only the common and easy case where the character with
+ different length representation of lower and upper
+ case is present at or after offset. */
+ if (pstr->valid_len > offset
+ && mid == offset && pstr->offsets[mid] == offset)
+ {
+ memmove (pstr->wcs, pstr->wcs + offset,
+ (pstr->valid_len - offset) * sizeof (wint_t));
+ memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
+ pstr->valid_len -= offset;
+ pstr->valid_raw_len -= offset;
+ for (low = 0; low < pstr->valid_len; low++)
+ pstr->offsets[low] = pstr->offsets[low + offset] - offset;
+ }
+ else
+ {
+ /* Otherwise, just find out how long the partial multibyte
+ character at offset is and fill it with WEOF/255. */
+ pstr->len = pstr->raw_len - idx + offset;
+ pstr->stop = pstr->raw_stop - idx + offset;
+ pstr->offsets_needed = 0;
+ while (mid > 0 && pstr->offsets[mid - 1] == offset)
+ --mid;
+ while (mid < pstr->valid_len)
+ if (pstr->wcs[mid] != WEOF)
+ break;
+ else
+ ++mid;
+ if (mid == pstr->valid_len)
+ pstr->valid_len = 0;
+ else
+ {
+ pstr->valid_len = pstr->offsets[mid] - offset;
+ if (pstr->valid_len)
+ {
+ for (low = 0; low < pstr->valid_len; ++low)
+ pstr->wcs[low] = WEOF;
+ memset (pstr->mbs, 255, pstr->valid_len);
+ }
+ }
+ pstr->valid_raw_len = pstr->valid_len;
+ }
+ }
+ else
+#endif
+ {
+ pstr->tip_context = re_string_context_at (pstr, offset - 1,
+ eflags);
+#ifdef RE_ENABLE_I18N
+ if (pstr->mb_cur_max > 1)
+ memmove (pstr->wcs, pstr->wcs + offset,
+ (pstr->valid_len - offset) * sizeof (wint_t));
+#endif /* RE_ENABLE_I18N */
+ if (BE (pstr->mbs_allocated, 0))
+ memmove (pstr->mbs, pstr->mbs + offset,
+ pstr->valid_len - offset);
+ pstr->valid_len -= offset;
+ pstr->valid_raw_len -= offset;
+#if DEBUG
+ assert (pstr->valid_len > 0);
+#endif
+ }
+ }
+ else
+ {
+ /* No, skip all characters until IDX. */
+ int prev_valid_len = pstr->valid_len;
+
+#ifdef RE_ENABLE_I18N
+ if (BE (pstr->offsets_needed, 0))
+ {
+ pstr->len = pstr->raw_len - idx + offset;
+ pstr->stop = pstr->raw_stop - idx + offset;
+ pstr->offsets_needed = 0;
+ }
+#endif
+ pstr->valid_len = 0;
+#ifdef RE_ENABLE_I18N
+ if (pstr->mb_cur_max > 1)
+ {
+ int wcs_idx;
+ wint_t wc = WEOF;
+
+ if (pstr->is_utf8)
+ {
+ const unsigned char *raw, *p, *q, *end;
+
+ /* Special case UTF-8. Multi-byte chars start with any
+ byte other than 0x80 - 0xbf. */
+ raw = pstr->raw_mbs + pstr->raw_mbs_idx;
+ end = raw + (offset - pstr->mb_cur_max);
+ if (end < pstr->raw_mbs)
+ end = pstr->raw_mbs;
+ p = raw + offset - 1;
+#ifdef _LIBC
+ /* We know the wchar_t encoding is UCS4, so for the simple
+ case, ASCII characters, skip the conversion step. */
+ if (isascii (*p) && BE (pstr->trans == NULL, 1))
+ {
+ memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
+ /* pstr->valid_len = 0; */
+ wc = (wchar_t) *p;
+ }
+ else
+#endif
+ for (; p >= end; --p)
+ if ((*p & 0xc0) != 0x80)
+ {
+ mbstate_t cur_state;
+ wchar_t wc2;
+ int mlen = raw + pstr->len - p;
+ unsigned char buf[6];
+ size_t mbclen;
+
+ q = p;
+ if (BE (pstr->trans != NULL, 0))
+ {
+ int i = mlen < 6 ? mlen : 6;
+ while (--i >= 0)
+ buf[i] = pstr->trans[p[i]];
+ q = buf;
+ }
+ /* XXX Don't use mbrtowc, we know which conversion
+ to use (UTF-8 -> UCS4). */
+ memset (&cur_state, 0, sizeof (cur_state));
+ mbclen = __mbrtowc (&wc2, (const char *) p, mlen,
+ &cur_state);
+ if (raw + offset - p <= mbclen
+ && mbclen < (size_t) -2)
+ {
+ memset (&pstr->cur_state, '\0',
+ sizeof (mbstate_t));
+ pstr->valid_len = mbclen - (raw + offset - p);
+ wc = wc2;
+ }
+ break;
+ }
+ }
+
+ if (wc == WEOF)
+ pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
+ if (wc == WEOF)
+ pstr->tip_context
+ = re_string_context_at (pstr, prev_valid_len - 1, eflags);
+ else
+ pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
+ && IS_WIDE_WORD_CHAR (wc))
+ ? CONTEXT_WORD
+ : ((IS_WIDE_NEWLINE (wc)
+ && pstr->newline_anchor)
+ ? CONTEXT_NEWLINE : 0));
+ if (BE (pstr->valid_len, 0))
+ {
+ for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
+ pstr->wcs[wcs_idx] = WEOF;
+ if (pstr->mbs_allocated)
+ memset (pstr->mbs, 255, pstr->valid_len);
+ }
+ pstr->valid_raw_len = pstr->valid_len;
+ }
+ else
+#endif /* RE_ENABLE_I18N */
+ {
+ int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
+ pstr->valid_raw_len = 0;
+ if (pstr->trans)
+ c = pstr->trans[c];
+ pstr->tip_context = (bitset_contain (pstr->word_char, c)
+ ? CONTEXT_WORD
+ : ((IS_NEWLINE (c) && pstr->newline_anchor)
+ ? CONTEXT_NEWLINE : 0));
+ }
+ }
+ if (!BE (pstr->mbs_allocated, 0))
+ pstr->mbs += offset;
+ }
+ pstr->raw_mbs_idx = idx;
+ pstr->len -= offset;
+ pstr->stop -= offset;
+
+ /* Then build the buffers. */
+#ifdef RE_ENABLE_I18N
+ if (pstr->mb_cur_max > 1)
+ {
+ if (pstr->icase)
+ {
+ reg_errcode_t ret = build_wcs_upper_buffer (pstr);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+ }
+ else
+ build_wcs_buffer (pstr);
+ }
+ else
+#endif /* RE_ENABLE_I18N */
+ if (BE (pstr->mbs_allocated, 0))
+ {
+ if (pstr->icase)
+ build_upper_buffer (pstr);
+ else if (pstr->trans != NULL)
+ re_string_translate_buffer (pstr);
+ }
+ else
+ pstr->valid_len = pstr->len;
+
+ pstr->cur_idx = 0;
+ return REG_NOERROR;
+}
+
+static unsigned char
+internal_function __attribute ((pure))
+re_string_peek_byte_case (const re_string_t *pstr, int idx)
+{
+ int ch, off;
+
+ /* Handle the common (easiest) cases first. */
+ if (BE (!pstr->mbs_allocated, 1))
+ return re_string_peek_byte (pstr, idx);
+
+#ifdef RE_ENABLE_I18N
+ if (pstr->mb_cur_max > 1
+ && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
+ return re_string_peek_byte (pstr, idx);
+#endif
+
+ off = pstr->cur_idx + idx;
+#ifdef RE_ENABLE_I18N
+ if (pstr->offsets_needed)
+ off = pstr->offsets[off];
+#endif
+
+ ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
+
+#ifdef RE_ENABLE_I18N
+ /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
+ this function returns CAPITAL LETTER I instead of first byte of
+ DOTLESS SMALL LETTER I. The latter would confuse the parser,
+ since peek_byte_case doesn't advance cur_idx in any way. */
+ if (pstr->offsets_needed && !isascii (ch))
+ return re_string_peek_byte (pstr, idx);
+#endif
+
+ return ch;
+}
+
+static unsigned char
+internal_function __attribute ((pure))
+re_string_fetch_byte_case (re_string_t *pstr)
+{
+ if (BE (!pstr->mbs_allocated, 1))
+ return re_string_fetch_byte (pstr);
+
+#ifdef RE_ENABLE_I18N
+ if (pstr->offsets_needed)
+ {
+ int off, ch;
+
+ /* For tr_TR.UTF-8 [[:islower:]] there is
+ [[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip
+ in that case the whole multi-byte character and return
+ the original letter. On the other side, with
+ [[: DOTLESS SMALL LETTER I return [[:I, as doing
+ anything else would complicate things too much. */
+
+ if (!re_string_first_byte (pstr, pstr->cur_idx))
+ return re_string_fetch_byte (pstr);
+
+ off = pstr->offsets[pstr->cur_idx];
+ ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
+
+ if (! isascii (ch))
+ return re_string_fetch_byte (pstr);
+
+ re_string_skip_bytes (pstr,
+ re_string_char_size_at (pstr, pstr->cur_idx));
+ return ch;
+ }
+#endif
+
+ return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
+}
+
+static void
+internal_function
+re_string_destruct (re_string_t *pstr)
+{
+#ifdef RE_ENABLE_I18N
+ re_free (pstr->wcs);
+ re_free (pstr->offsets);
+#endif /* RE_ENABLE_I18N */
+ if (pstr->mbs_allocated)
+ re_free (pstr->mbs);
+}
+
+/* Return the context at IDX in INPUT. */
+
+static unsigned int
+internal_function
+re_string_context_at (const re_string_t *input, int idx, int eflags)
+{
+ int c;
+ if (BE (idx < 0, 0))
+ /* In this case, we use the value stored in input->tip_context,
+ since we can't know the character in input->mbs[-1] here. */
+ return input->tip_context;
+ if (BE (idx == input->len, 0))
+ return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
+ : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
+#ifdef RE_ENABLE_I18N
+ if (input->mb_cur_max > 1)
+ {
+ wint_t wc;
+ int wc_idx = idx;
+ while(input->wcs[wc_idx] == WEOF)
+ {
+#ifdef DEBUG
+ /* It must not happen. */
+ assert (wc_idx >= 0);
+#endif
+ --wc_idx;
+ if (wc_idx < 0)
+ return input->tip_context;
+ }
+ wc = input->wcs[wc_idx];
+ if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
+ return CONTEXT_WORD;
+ return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
+ ? CONTEXT_NEWLINE : 0);
+ }
+ else
+#endif
+ {
+ c = re_string_byte_at (input, idx);
+ if (bitset_contain (input->word_char, c))
+ return CONTEXT_WORD;
+ return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
+ }
+}
+
+/* Functions for set operation. */
+
+static reg_errcode_t
+internal_function
+re_node_set_alloc (re_node_set *set, int size)
+{
+ set->alloc = size;
+ set->nelem = 0;
+ set->elems = re_malloc (int, size);
+ if (BE (set->elems == NULL, 0))
+ return REG_ESPACE;
+ return REG_NOERROR;
+}
+
+static reg_errcode_t
+internal_function
+re_node_set_init_1 (re_node_set *set, int elem)
+{
+ set->alloc = 1;
+ set->nelem = 1;
+ set->elems = re_malloc (int, 1);
+ if (BE (set->elems == NULL, 0))
+ {
+ set->alloc = set->nelem = 0;
+ return REG_ESPACE;
+ }
+ set->elems[0] = elem;
+ return REG_NOERROR;
+}
+
+static reg_errcode_t
+internal_function
+re_node_set_init_2 (re_node_set *set, int elem1, int elem2)
+{
+ set->alloc = 2;
+ set->elems = re_malloc (int, 2);
+ if (BE (set->elems == NULL, 0))
+ return REG_ESPACE;
+ if (elem1 == elem2)
+ {
+ set->nelem = 1;
+ set->elems[0] = elem1;
+ }
+ else
+ {
+ set->nelem = 2;
+ if (elem1 < elem2)
+ {
+ set->elems[0] = elem1;
+ set->elems[1] = elem2;
+ }
+ else
+ {
+ set->elems[0] = elem2;
+ set->elems[1] = elem1;
+ }
+ }
+ return REG_NOERROR;
+}
+
+static reg_errcode_t
+internal_function
+re_node_set_init_copy (re_node_set *dest, const re_node_set *src)
+{
+ dest->nelem = src->nelem;
+ if (src->nelem > 0)
+ {
+ dest->alloc = dest->nelem;
+ dest->elems = re_malloc (int, dest->alloc);
+ if (BE (dest->elems == NULL, 0))
+ {
+ dest->alloc = dest->nelem = 0;
+ return REG_ESPACE;
+ }
+ memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
+ }
+ else
+ re_node_set_init_empty (dest);
+ return REG_NOERROR;
+}
+
+/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
+ DEST. Return value indicate the error code or REG_NOERROR if succeeded.
+ Note: We assume dest->elems is NULL, when dest->alloc is 0. */
+
+static reg_errcode_t
+internal_function
+re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
+ const re_node_set *src2)
+{
+ int i1, i2, is, id, delta, sbase;
+ if (src1->nelem == 0 || src2->nelem == 0)
+ return REG_NOERROR;
+
+ /* We need dest->nelem + 2 * elems_in_intersection; this is a
+ conservative estimate. */
+ if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
+ {
+ int new_alloc = src1->nelem + src2->nelem + dest->alloc;
+ int *new_elems = re_realloc (dest->elems, int, new_alloc);
+ if (BE (new_elems == NULL, 0))
+ return REG_ESPACE;
+ dest->elems = new_elems;
+ dest->alloc = new_alloc;
+ }
+
+ /* Find the items in the intersection of SRC1 and SRC2, and copy
+ into the top of DEST those that are not already in DEST itself. */
+ sbase = dest->nelem + src1->nelem + src2->nelem;
+ i1 = src1->nelem - 1;
+ i2 = src2->nelem - 1;
+ id = dest->nelem - 1;
+ for (;;)
+ {
+ if (src1->elems[i1] == src2->elems[i2])
+ {
+ /* Try to find the item in DEST. Maybe we could binary search? */
+ while (id >= 0 && dest->elems[id] > src1->elems[i1])
+ --id;
+
+ if (id < 0 || dest->elems[id] != src1->elems[i1])
+ dest->elems[--sbase] = src1->elems[i1];
+
+ if (--i1 < 0 || --i2 < 0)
+ break;
+ }
+
+ /* Lower the highest of the two items. */
+ else if (src1->elems[i1] < src2->elems[i2])
+ {
+ if (--i2 < 0)
+ break;
+ }
+ else
+ {
+ if (--i1 < 0)
+ break;
+ }
+ }
+
+ id = dest->nelem - 1;
+ is = dest->nelem + src1->nelem + src2->nelem - 1;
+ delta = is - sbase + 1;
+
+ /* Now copy. When DELTA becomes zero, the remaining
+ DEST elements are already in place; this is more or
+ less the same loop that is in re_node_set_merge. */
+ dest->nelem += delta;
+ if (delta > 0 && id >= 0)
+ for (;;)
+ {
+ if (dest->elems[is] > dest->elems[id])
+ {
+ /* Copy from the top. */
+ dest->elems[id + delta--] = dest->elems[is--];
+ if (delta == 0)
+ break;
+ }
+ else
+ {
+ /* Slide from the bottom. */
+ dest->elems[id + delta] = dest->elems[id];
+ if (--id < 0)
+ break;
+ }
+ }
+
+ /* Copy remaining SRC elements. */
+ memcpy (dest->elems, dest->elems + sbase, delta * sizeof (int));
+
+ return REG_NOERROR;
+}
+
+/* Calculate the union set of the sets SRC1 and SRC2. And store it to
+ DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
+
+static reg_errcode_t
+internal_function
+re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
+ const re_node_set *src2)
+{
+ int i1, i2, id;
+ if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
+ {
+ dest->alloc = src1->nelem + src2->nelem;
+ dest->elems = re_malloc (int, dest->alloc);
+ if (BE (dest->elems == NULL, 0))
+ return REG_ESPACE;
+ }
+ else
+ {
+ if (src1 != NULL && src1->nelem > 0)
+ return re_node_set_init_copy (dest, src1);
+ else if (src2 != NULL && src2->nelem > 0)
+ return re_node_set_init_copy (dest, src2);
+ else
+ re_node_set_init_empty (dest);
+ return REG_NOERROR;
+ }
+ for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
+ {
+ if (src1->elems[i1] > src2->elems[i2])
+ {
+ dest->elems[id++] = src2->elems[i2++];
+ continue;
+ }
+ if (src1->elems[i1] == src2->elems[i2])
+ ++i2;
+ dest->elems[id++] = src1->elems[i1++];
+ }
+ if (i1 < src1->nelem)
+ {
+ memcpy (dest->elems + id, src1->elems + i1,
+ (src1->nelem - i1) * sizeof (int));
+ id += src1->nelem - i1;
+ }
+ else if (i2 < src2->nelem)
+ {
+ memcpy (dest->elems + id, src2->elems + i2,
+ (src2->nelem - i2) * sizeof (int));
+ id += src2->nelem - i2;
+ }
+ dest->nelem = id;
+ return REG_NOERROR;
+}
+
+/* Calculate the union set of the sets DEST and SRC. And store it to
+ DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
+
+static reg_errcode_t
+internal_function
+re_node_set_merge (re_node_set *dest, const re_node_set *src)
+{
+ int is, id, sbase, delta;
+ if (src == NULL || src->nelem == 0)
+ return REG_NOERROR;
+ if (dest->alloc < 2 * src->nelem + dest->nelem)
+ {
+ int new_alloc = 2 * (src->nelem + dest->alloc);
+ int *new_buffer = re_realloc (dest->elems, int, new_alloc);
+ if (BE (new_buffer == NULL, 0))
+ return REG_ESPACE;
+ dest->elems = new_buffer;
+ dest->alloc = new_alloc;
+ }
+
+ if (BE (dest->nelem == 0, 0))
+ {
+ dest->nelem = src->nelem;
+ memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
+ return REG_NOERROR;
+ }
+
+ /* Copy into the top of DEST the items of SRC that are not
+ found in DEST. Maybe we could binary search in DEST? */
+ for (sbase = dest->nelem + 2 * src->nelem,
+ is = src->nelem - 1, id = dest->nelem - 1; is >= 0 && id >= 0; )
+ {
+ if (dest->elems[id] == src->elems[is])
+ is--, id--;
+ else if (dest->elems[id] < src->elems[is])
+ dest->elems[--sbase] = src->elems[is--];
+ else /* if (dest->elems[id] > src->elems[is]) */
+ --id;
+ }
+
+ if (is >= 0)
+ {
+ /* If DEST is exhausted, the remaining items of SRC must be unique. */
+ sbase -= is + 1;
+ memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (int));
+ }
+
+ id = dest->nelem - 1;
+ is = dest->nelem + 2 * src->nelem - 1;
+ delta = is - sbase + 1;
+ if (delta == 0)
+ return REG_NOERROR;
+
+ /* Now copy. When DELTA becomes zero, the remaining
+ DEST elements are already in place. */
+ dest->nelem += delta;
+ for (;;)
+ {
+ if (dest->elems[is] > dest->elems[id])
+ {
+ /* Copy from the top. */
+ dest->elems[id + delta--] = dest->elems[is--];
+ if (delta == 0)
+ break;
+ }
+ else
+ {
+ /* Slide from the bottom. */
+ dest->elems[id + delta] = dest->elems[id];
+ if (--id < 0)
+ {
+ /* Copy remaining SRC elements. */
+ memcpy (dest->elems, dest->elems + sbase,
+ delta * sizeof (int));
+ break;
+ }
+ }
+ }
+
+ return REG_NOERROR;
+}
+
+/* Insert the new element ELEM to the re_node_set* SET.
+ SET should not already have ELEM.
+ return -1 if an error is occured, return 1 otherwise. */
+
+static int
+internal_function
+re_node_set_insert (re_node_set *set, int elem)
+{
+ int idx;
+ /* In case the set is empty. */
+ if (set->alloc == 0)
+ {
+ if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1))
+ return 1;
+ else
+ return -1;
+ }
+
+ if (BE (set->nelem, 0) == 0)
+ {
+ /* We already guaranteed above that set->alloc != 0. */
+ set->elems[0] = elem;
+ ++set->nelem;
+ return 1;
+ }
+
+ /* Realloc if we need. */
+ if (set->alloc == set->nelem)
+ {
+ int *new_elems;
+ set->alloc = set->alloc * 2;
+ new_elems = re_realloc (set->elems, int, set->alloc);
+ if (BE (new_elems == NULL, 0))
+ return -1;
+ set->elems = new_elems;
+ }
+
+ /* Move the elements which follows the new element. Test the
+ first element separately to skip a check in the inner loop. */
+ if (elem < set->elems[0])
+ {
+ idx = 0;
+ for (idx = set->nelem; idx > 0; idx--)
+ set->elems[idx] = set->elems[idx - 1];
+ }
+ else
+ {
+ for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
+ set->elems[idx] = set->elems[idx - 1];
+ }
+
+ /* Insert the new element. */
+ set->elems[idx] = elem;
+ ++set->nelem;
+ return 1;
+}
+
+/* Insert the new element ELEM to the re_node_set* SET.
+ SET should not already have any element greater than or equal to ELEM.
+ Return -1 if an error is occured, return 1 otherwise. */
+
+static int
+internal_function
+re_node_set_insert_last (re_node_set *set, int elem)
+{
+ /* Realloc if we need. */
+ if (set->alloc == set->nelem)
+ {
+ int *new_elems;
+ set->alloc = (set->alloc + 1) * 2;
+ new_elems = re_realloc (set->elems, int, set->alloc);
+ if (BE (new_elems == NULL, 0))
+ return -1;
+ set->elems = new_elems;
+ }
+
+ /* Insert the new element. */
+ set->elems[set->nelem++] = elem;
+ return 1;
+}
+
+/* Compare two node sets SET1 and SET2.
+ return 1 if SET1 and SET2 are equivalent, return 0 otherwise. */
+
+static int
+internal_function __attribute ((pure))
+re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
+{
+ int i;
+ if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
+ return 0;
+ for (i = set1->nelem ; --i >= 0 ; )
+ if (set1->elems[i] != set2->elems[i])
+ return 0;
+ return 1;
+}
+
+/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */
+
+static int
+internal_function __attribute ((pure))
+re_node_set_contains (const re_node_set *set, int elem)
+{
+ unsigned int idx, right, mid;
+ if (set->nelem <= 0)
+ return 0;
+
+ /* Binary search the element. */
+ idx = 0;
+ right = set->nelem - 1;
+ while (idx < right)
+ {
+ mid = (idx + right) / 2;
+ if (set->elems[mid] < elem)
+ idx = mid + 1;
+ else
+ right = mid;
+ }
+ return set->elems[idx] == elem ? idx + 1 : 0;
+}
+
+static void
+internal_function
+re_node_set_remove_at (re_node_set *set, int idx)
+{
+ if (idx < 0 || idx >= set->nelem)
+ return;
+ --set->nelem;
+ for (; idx < set->nelem; idx++)
+ set->elems[idx] = set->elems[idx + 1];
+}
+
+
+/* Add the token TOKEN to dfa->nodes, and return the index of the token.
+ Or return -1, if an error will be occured. */
+
+static int
+internal_function
+re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
+{
+ int type = token.type;
+ if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
+ {
+ size_t new_nodes_alloc = dfa->nodes_alloc * 2;
+ int *new_nexts, *new_indices;
+ re_node_set *new_edests, *new_eclosures;
+ re_token_t *new_nodes;
+
+ /* Avoid overflows. */
+ if (BE (new_nodes_alloc < dfa->nodes_alloc, 0))
+ return -1;
+
+ new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
+ if (BE (new_nodes == NULL, 0))
+ return -1;
+ dfa->nodes = new_nodes;
+ new_nexts = re_realloc (dfa->nexts, int, new_nodes_alloc);
+ new_indices = re_realloc (dfa->org_indices, int, new_nodes_alloc);
+ new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
+ new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
+ if (BE (new_nexts == NULL || new_indices == NULL
+ || new_edests == NULL || new_eclosures == NULL, 0))
+ return -1;
+ dfa->nexts = new_nexts;
+ dfa->org_indices = new_indices;
+ dfa->edests = new_edests;
+ dfa->eclosures = new_eclosures;
+ dfa->nodes_alloc = new_nodes_alloc;
+ }
+ dfa->nodes[dfa->nodes_len] = token;
+ dfa->nodes[dfa->nodes_len].constraint = 0;
+#ifdef RE_ENABLE_I18N
+ dfa->nodes[dfa->nodes_len].accept_mb =
+ (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET;
+#endif
+ dfa->nexts[dfa->nodes_len] = -1;
+ re_node_set_init_empty (dfa->edests + dfa->nodes_len);
+ re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
+ return dfa->nodes_len++;
+}
+
+static inline unsigned int
+internal_function
+calc_state_hash (const re_node_set *nodes, unsigned int context)
+{
+ unsigned int hash = nodes->nelem + context;
+ int i;
+ for (i = 0 ; i < nodes->nelem ; i++)
+ hash += nodes->elems[i];
+ return hash;
+}
+
+/* Search for the state whose node_set is equivalent to NODES.
+ Return the pointer to the state, if we found it in the DFA.
+ Otherwise create the new one and return it. In case of an error
+ return NULL and set the error code in ERR.
+ Note: - We assume NULL as the invalid state, then it is possible that
+ return value is NULL and ERR is REG_NOERROR.
+ - We never return non-NULL value in case of any errors, it is for
+ optimization. */
+
+static re_dfastate_t *
+internal_function
+re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa,
+ const re_node_set *nodes)
+{
+ unsigned int hash;
+ re_dfastate_t *new_state;
+ struct re_state_table_entry *spot;
+ int i;
+ if (BE (nodes->nelem == 0, 0))
+ {
+ *err = REG_NOERROR;
+ return NULL;
+ }
+ hash = calc_state_hash (nodes, 0);
+ spot = dfa->state_table + (hash & dfa->state_hash_mask);
+
+ for (i = 0 ; i < spot->num ; i++)
+ {
+ re_dfastate_t *state = spot->array[i];
+ if (hash != state->hash)
+ continue;
+ if (re_node_set_compare (&state->nodes, nodes))
+ return state;
+ }
+
+ /* There are no appropriate state in the dfa, create the new one. */
+ new_state = create_ci_newstate (dfa, nodes, hash);
+ if (BE (new_state == NULL, 0))
+ *err = REG_ESPACE;
+
+ return new_state;
+}
+
+/* Search for the state whose node_set is equivalent to NODES and
+ whose context is equivalent to CONTEXT.
+ Return the pointer to the state, if we found it in the DFA.
+ Otherwise create the new one and return it. In case of an error
+ return NULL and set the error code in ERR.
+ Note: - We assume NULL as the invalid state, then it is possible that
+ return value is NULL and ERR is REG_NOERROR.
+ - We never return non-NULL value in case of any errors, it is for
+ optimization. */
+
+static re_dfastate_t *
+internal_function
+re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa,
+ const re_node_set *nodes, unsigned int context)
+{
+ unsigned int hash;
+ re_dfastate_t *new_state;
+ struct re_state_table_entry *spot;
+ int i;
+ if (nodes->nelem == 0)
+ {
+ *err = REG_NOERROR;
+ return NULL;
+ }
+ hash = calc_state_hash (nodes, context);
+ spot = dfa->state_table + (hash & dfa->state_hash_mask);
+
+ for (i = 0 ; i < spot->num ; i++)
+ {
+ re_dfastate_t *state = spot->array[i];
+ if (state->hash == hash
+ && state->context == context
+ && re_node_set_compare (state->entrance_nodes, nodes))
+ return state;
+ }
+ /* There are no appropriate state in `dfa', create the new one. */
+ new_state = create_cd_newstate (dfa, nodes, context, hash);
+ if (BE (new_state == NULL, 0))
+ *err = REG_ESPACE;
+
+ return new_state;
+}
+
+/* Finish initialization of the new state NEWSTATE, and using its hash value
+ HASH put in the appropriate bucket of DFA's state table. Return value
+ indicates the error code if failed. */
+
+static reg_errcode_t
+register_state (const re_dfa_t *dfa, re_dfastate_t *newstate,
+ unsigned int hash)
+{
+ struct re_state_table_entry *spot;
+ reg_errcode_t err;
+ int i;
+
+ newstate->hash = hash;
+ err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
+ if (BE (err != REG_NOERROR, 0))
+ return REG_ESPACE;
+ for (i = 0; i < newstate->nodes.nelem; i++)
+ {
+ int elem = newstate->nodes.elems[i];
+ if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
+ re_node_set_insert_last (&newstate->non_eps_nodes, elem);
+ }
+
+ spot = dfa->state_table + (hash & dfa->state_hash_mask);
+ if (BE (spot->alloc <= spot->num, 0))
+ {
+ int new_alloc = 2 * spot->num + 2;
+ re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
+ new_alloc);
+ if (BE (new_array == NULL, 0))
+ return REG_ESPACE;
+ spot->array = new_array;
+ spot->alloc = new_alloc;
+ }
+ spot->array[spot->num++] = newstate;
+ return REG_NOERROR;
+}
+
+static void
+free_state (re_dfastate_t *state)
+{
+ re_node_set_free (&state->non_eps_nodes);
+ re_node_set_free (&state->inveclosure);
+ if (state->entrance_nodes != &state->nodes)
+ {
+ re_node_set_free (state->entrance_nodes);
+ re_free (state->entrance_nodes);
+ }
+ re_node_set_free (&state->nodes);
+ re_free (state->word_trtable);
+ re_free (state->trtable);
+ re_free (state);
+}
+
+/* Create the new state which is independ of contexts.
+ Return the new state if succeeded, otherwise return NULL. */
+
+static re_dfastate_t *
+internal_function
+create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
+ unsigned int hash)
+{
+ int i;
+ reg_errcode_t err;
+ re_dfastate_t *newstate;
+
+ newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
+ if (BE (newstate == NULL, 0))
+ return NULL;
+ err = re_node_set_init_copy (&newstate->nodes, nodes);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_free (newstate);
+ return NULL;
+ }
+
+ newstate->entrance_nodes = &newstate->nodes;
+ for (i = 0 ; i < nodes->nelem ; i++)
+ {
+ re_token_t *node = dfa->nodes + nodes->elems[i];
+ re_token_type_t type = node->type;
+ if (type == CHARACTER && !node->constraint)
+ continue;
+#ifdef RE_ENABLE_I18N
+ newstate->accept_mb |= node->accept_mb;
+#endif /* RE_ENABLE_I18N */
+
+ /* If the state has the halt node, the state is a halt state. */
+ if (type == END_OF_RE)
+ newstate->halt = 1;
+ else if (type == OP_BACK_REF)
+ newstate->has_backref = 1;
+ else if (type == ANCHOR || node->constraint)
+ newstate->has_constraint = 1;
+ }
+ err = register_state (dfa, newstate, hash);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ free_state (newstate);
+ newstate = NULL;
+ }
+ return newstate;
+}
+
+/* Create the new state which is depend on the context CONTEXT.
+ Return the new state if succeeded, otherwise return NULL. */
+
+static re_dfastate_t *
+internal_function
+create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
+ unsigned int context, unsigned int hash)
+{
+ int i, nctx_nodes = 0;
+ reg_errcode_t err;
+ re_dfastate_t *newstate;
+
+ newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
+ if (BE (newstate == NULL, 0))
+ return NULL;
+ err = re_node_set_init_copy (&newstate->nodes, nodes);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_free (newstate);
+ return NULL;
+ }
+
+ newstate->context = context;
+ newstate->entrance_nodes = &newstate->nodes;
+
+ for (i = 0 ; i < nodes->nelem ; i++)
+ {
+ re_token_t *node = dfa->nodes + nodes->elems[i];
+ re_token_type_t type = node->type;
+ unsigned int constraint = node->constraint;
+
+ if (type == CHARACTER && !constraint)
+ continue;
+#ifdef RE_ENABLE_I18N
+ newstate->accept_mb |= node->accept_mb;
+#endif /* RE_ENABLE_I18N */
+
+ /* If the state has the halt node, the state is a halt state. */
+ if (type == END_OF_RE)
+ newstate->halt = 1;
+ else if (type == OP_BACK_REF)
+ newstate->has_backref = 1;
+
+ if (constraint)
+ {
+ if (newstate->entrance_nodes == &newstate->nodes)
+ {
+ newstate->entrance_nodes = re_malloc (re_node_set, 1);
+ if (BE (newstate->entrance_nodes == NULL, 0))
+ {
+ free_state (newstate);
+ return NULL;
+ }
+ re_node_set_init_copy (newstate->entrance_nodes, nodes);
+ nctx_nodes = 0;
+ newstate->has_constraint = 1;
+ }
+
+ if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
+ {
+ re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
+ ++nctx_nodes;
+ }
+ }
+ }
+ err = register_state (dfa, newstate, hash);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ free_state (newstate);
+ newstate = NULL;
+ }
+ return newstate;
+}
diff --git a/gnu_regex/.svn/text-base/regex_internal.h.svn-base b/gnu_regex/.svn/text-base/regex_internal.h.svn-base
new file mode 100644
index 0000000..71c4a38
--- /dev/null
+++ b/gnu_regex/.svn/text-base/regex_internal.h.svn-base
@@ -0,0 +1,773 @@
+/* Extended regular expression matching and search library.
+ Copyright (C) 2002-2005, 2007, 2008 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifndef _REGEX_INTERNAL_H
+#define _REGEX_INTERNAL_H 1
+
+#include <assert.h>
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if defined HAVE_LANGINFO_H || defined HAVE_LANGINFO_CODESET || defined _LIBC
+# include <langinfo.h>
+#endif
+#if defined HAVE_LOCALE_H || defined _LIBC
+# include <locale.h>
+#endif
+#if defined HAVE_WCHAR_H || defined _LIBC
+# include <wchar.h>
+#endif /* HAVE_WCHAR_H || _LIBC */
+#if defined HAVE_WCTYPE_H || defined _LIBC
+# include <wctype.h>
+#endif /* HAVE_WCTYPE_H || _LIBC */
+#if defined HAVE_STDBOOL_H || defined _LIBC
+# include <stdbool.h>
+#endif /* HAVE_STDBOOL_H || _LIBC */
+#if defined HAVE_STDINT_H || defined _LIBC
+# include <stdint.h>
+#endif /* HAVE_STDINT_H || _LIBC */
+#if defined _LIBC
+# include <bits/libc-lock.h>
+#else
+# define __libc_lock_define(CLASS,NAME)
+# define __libc_lock_init(NAME) do { } while (0)
+# define __libc_lock_lock(NAME) do { } while (0)
+# define __libc_lock_unlock(NAME) do { } while (0)
+#endif
+
+/* In case that the system doesn't have isblank(). */
+#if !defined _LIBC && !defined HAVE_ISBLANK && !defined isblank
+# define isblank(ch) ((ch) == ' ' || (ch) == '\t')
+#endif
+
+#ifdef _LIBC
+# ifndef _RE_DEFINE_LOCALE_FUNCTIONS
+# define _RE_DEFINE_LOCALE_FUNCTIONS 1
+# include <locale/localeinfo.h>
+# include <locale/elem-hash.h>
+# include <locale/coll-lookup.h>
+# endif
+#endif
+
+/* This is for other GNU distributions with internationalized messages. */
+#if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC
+# include <libintl.h>
+# ifdef _LIBC
+# undef gettext
+# define gettext(msgid) \
+ INTUSE(__dcgettext) (_libc_intl_domainname, msgid, LC_MESSAGES)
+# endif
+#else
+# define gettext(msgid) (msgid)
+#endif
+
+#ifndef gettext_noop
+/* This define is so xgettext can find the internationalizable
+ strings. */
+# define gettext_noop(String) String
+#endif
+
+/* For loser systems without the definition. */
+#ifndef SIZE_MAX
+# define SIZE_MAX ((size_t) -1)
+#endif
+
+#if (defined MB_CUR_MAX && HAVE_LOCALE_H && HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_WCRTOMB && HAVE_MBRTOWC && HAVE_WCSCOLL) || _LIBC
+# define RE_ENABLE_I18N
+#endif
+
+#if __GNUC__ >= 3
+# define BE(expr, val) __builtin_expect (expr, val)
+#else
+# define BE(expr, val) (expr)
+# define inline
+#endif
+
+/* Number of single byte character. */
+#define SBC_MAX 256
+
+#define COLL_ELEM_LEN_MAX 8
+
+/* The character which represents newline. */
+#define NEWLINE_CHAR '\n'
+#define WIDE_NEWLINE_CHAR L'\n'
+
+/* Rename to standard API for using out of glibc. */
+#ifndef _LIBC
+# define __wctype wctype
+# define __iswctype iswctype
+# define __btowc btowc
+# define __mbrtowc mbrtowc
+# define __mempcpy mempcpy
+# define __wcrtomb wcrtomb
+# define __regfree regfree
+# define attribute_hidden
+#endif /* not _LIBC */
+
+#ifdef __GNUC__
+# define __attribute(arg) __attribute__ (arg)
+#else
+# define __attribute(arg)
+#endif
+
+extern const char __re_error_msgid[] attribute_hidden;
+extern const size_t __re_error_msgid_idx[] attribute_hidden;
+
+/* An integer used to represent a set of bits. It must be unsigned,
+ and must be at least as wide as unsigned int. */
+typedef unsigned long int bitset_word_t;
+/* All bits set in a bitset_word_t. */
+#define BITSET_WORD_MAX ULONG_MAX
+/* Number of bits in a bitset_word_t. */
+#define BITSET_WORD_BITS (sizeof (bitset_word_t) * CHAR_BIT)
+/* Number of bitset_word_t in a bit_set. */
+#define BITSET_WORDS (SBC_MAX / BITSET_WORD_BITS)
+typedef bitset_word_t bitset_t[BITSET_WORDS];
+typedef bitset_word_t *re_bitset_ptr_t;
+typedef const bitset_word_t *re_const_bitset_ptr_t;
+
+#define bitset_set(set,i) \
+ (set[i / BITSET_WORD_BITS] |= (bitset_word_t) 1 << i % BITSET_WORD_BITS)
+#define bitset_clear(set,i) \
+ (set[i / BITSET_WORD_BITS] &= ~((bitset_word_t) 1 << i % BITSET_WORD_BITS))
+#define bitset_contain(set,i) \
+ (set[i / BITSET_WORD_BITS] & ((bitset_word_t) 1 << i % BITSET_WORD_BITS))
+#define bitset_empty(set) memset (set, '\0', sizeof (bitset_t))
+#define bitset_set_all(set) memset (set, '\xff', sizeof (bitset_t))
+#define bitset_copy(dest,src) memcpy (dest, src, sizeof (bitset_t))
+
+#define PREV_WORD_CONSTRAINT 0x0001
+#define PREV_NOTWORD_CONSTRAINT 0x0002
+#define NEXT_WORD_CONSTRAINT 0x0004
+#define NEXT_NOTWORD_CONSTRAINT 0x0008
+#define PREV_NEWLINE_CONSTRAINT 0x0010
+#define NEXT_NEWLINE_CONSTRAINT 0x0020
+#define PREV_BEGBUF_CONSTRAINT 0x0040
+#define NEXT_ENDBUF_CONSTRAINT 0x0080
+#define WORD_DELIM_CONSTRAINT 0x0100
+#define NOT_WORD_DELIM_CONSTRAINT 0x0200
+
+typedef enum
+{
+ INSIDE_WORD = PREV_WORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
+ WORD_FIRST = PREV_NOTWORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
+ WORD_LAST = PREV_WORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
+ INSIDE_NOTWORD = PREV_NOTWORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
+ LINE_FIRST = PREV_NEWLINE_CONSTRAINT,
+ LINE_LAST = NEXT_NEWLINE_CONSTRAINT,
+ BUF_FIRST = PREV_BEGBUF_CONSTRAINT,
+ BUF_LAST = NEXT_ENDBUF_CONSTRAINT,
+ WORD_DELIM = WORD_DELIM_CONSTRAINT,
+ NOT_WORD_DELIM = NOT_WORD_DELIM_CONSTRAINT
+} re_context_type;
+
+typedef struct
+{
+ int alloc;
+ int nelem;
+ int *elems;
+} re_node_set;
+
+typedef enum
+{
+ NON_TYPE = 0,
+
+ /* Node type, These are used by token, node, tree. */
+ CHARACTER = 1,
+ END_OF_RE = 2,
+ SIMPLE_BRACKET = 3,
+ OP_BACK_REF = 4,
+ OP_PERIOD = 5,
+#ifdef RE_ENABLE_I18N
+ COMPLEX_BRACKET = 6,
+ OP_UTF8_PERIOD = 7,
+#endif /* RE_ENABLE_I18N */
+
+ /* We define EPSILON_BIT as a macro so that OP_OPEN_SUBEXP is used
+ when the debugger shows values of this enum type. */
+#define EPSILON_BIT 8
+ OP_OPEN_SUBEXP = EPSILON_BIT | 0,
+ OP_CLOSE_SUBEXP = EPSILON_BIT | 1,
+ OP_ALT = EPSILON_BIT | 2,
+ OP_DUP_ASTERISK = EPSILON_BIT | 3,
+ ANCHOR = EPSILON_BIT | 4,
+
+ /* Tree type, these are used only by tree. */
+ CONCAT = 16,
+ SUBEXP = 17,
+
+ /* Token type, these are used only by token. */
+ OP_DUP_PLUS = 18,
+ OP_DUP_QUESTION,
+ OP_OPEN_BRACKET,
+ OP_CLOSE_BRACKET,
+ OP_CHARSET_RANGE,
+ OP_OPEN_DUP_NUM,
+ OP_CLOSE_DUP_NUM,
+ OP_NON_MATCH_LIST,
+ OP_OPEN_COLL_ELEM,
+ OP_CLOSE_COLL_ELEM,
+ OP_OPEN_EQUIV_CLASS,
+ OP_CLOSE_EQUIV_CLASS,
+ OP_OPEN_CHAR_CLASS,
+ OP_CLOSE_CHAR_CLASS,
+ OP_WORD,
+ OP_NOTWORD,
+ OP_SPACE,
+ OP_NOTSPACE,
+ BACK_SLASH
+
+} re_token_type_t;
+
+#ifdef RE_ENABLE_I18N
+typedef struct
+{
+ /* Multibyte characters. */
+ wchar_t *mbchars;
+
+ /* Collating symbols. */
+# ifdef _LIBC
+ int32_t *coll_syms;
+# endif
+
+ /* Equivalence classes. */
+# ifdef _LIBC
+ int32_t *equiv_classes;
+# endif
+
+ /* Range expressions. */
+# ifdef _LIBC
+ uint32_t *range_starts;
+ uint32_t *range_ends;
+# else /* not _LIBC */
+ wchar_t *range_starts;
+ wchar_t *range_ends;
+# endif /* not _LIBC */
+
+ /* Character classes. */
+ wctype_t *char_classes;
+
+ /* If this character set is the non-matching list. */
+ unsigned int non_match : 1;
+
+ /* # of multibyte characters. */
+ int nmbchars;
+
+ /* # of collating symbols. */
+ int ncoll_syms;
+
+ /* # of equivalence classes. */
+ int nequiv_classes;
+
+ /* # of range expressions. */
+ int nranges;
+
+ /* # of character classes. */
+ int nchar_classes;
+} re_charset_t;
+#endif /* RE_ENABLE_I18N */
+
+typedef struct
+{
+ union
+ {
+ unsigned char c; /* for CHARACTER */
+ re_bitset_ptr_t sbcset; /* for SIMPLE_BRACKET */
+#ifdef RE_ENABLE_I18N
+ re_charset_t *mbcset; /* for COMPLEX_BRACKET */
+#endif /* RE_ENABLE_I18N */
+ int idx; /* for BACK_REF */
+ re_context_type ctx_type; /* for ANCHOR */
+ } opr;
+#if __GNUC__ >= 2
+ re_token_type_t type : 8;
+#else
+ re_token_type_t type;
+#endif
+ unsigned int constraint : 10; /* context constraint */
+ unsigned int duplicated : 1;
+ unsigned int opt_subexp : 1;
+#ifdef RE_ENABLE_I18N
+ unsigned int accept_mb : 1;
+ /* These 2 bits can be moved into the union if needed (e.g. if running out
+ of bits; move opr.c to opr.c.c and move the flags to opr.c.flags). */
+ unsigned int mb_partial : 1;
+#endif
+ unsigned int word_char : 1;
+} re_token_t;
+
+#define IS_EPSILON_NODE(type) ((type) & EPSILON_BIT)
+
+struct re_string_t
+{
+ /* Indicate the raw buffer which is the original string passed as an
+ argument of regexec(), re_search(), etc.. */
+ const unsigned char *raw_mbs;
+ /* Store the multibyte string. In case of "case insensitive mode" like
+ REG_ICASE, upper cases of the string are stored, otherwise MBS points
+ the same address that RAW_MBS points. */
+ unsigned char *mbs;
+#ifdef RE_ENABLE_I18N
+ /* Store the wide character string which is corresponding to MBS. */
+ wint_t *wcs;
+ int *offsets;
+ mbstate_t cur_state;
+#endif
+ /* Index in RAW_MBS. Each character mbs[i] corresponds to
+ raw_mbs[raw_mbs_idx + i]. */
+ int raw_mbs_idx;
+ /* The length of the valid characters in the buffers. */
+ int valid_len;
+ /* The corresponding number of bytes in raw_mbs array. */
+ int valid_raw_len;
+ /* The length of the buffers MBS and WCS. */
+ int bufs_len;
+ /* The index in MBS, which is updated by re_string_fetch_byte. */
+ int cur_idx;
+ /* length of RAW_MBS array. */
+ int raw_len;
+ /* This is RAW_LEN - RAW_MBS_IDX + VALID_LEN - VALID_RAW_LEN. */
+ int len;
+ /* End of the buffer may be shorter than its length in the cases such
+ as re_match_2, re_search_2. Then, we use STOP for end of the buffer
+ instead of LEN. */
+ int raw_stop;
+ /* This is RAW_STOP - RAW_MBS_IDX adjusted through OFFSETS. */
+ int stop;
+
+ /* The context of mbs[0]. We store the context independently, since
+ the context of mbs[0] may be different from raw_mbs[0], which is
+ the beginning of the input string. */
+ unsigned int tip_context;
+ /* The translation passed as a part of an argument of re_compile_pattern. */
+ RE_TRANSLATE_TYPE trans;
+ /* Copy of re_dfa_t's word_char. */
+ re_const_bitset_ptr_t word_char;
+ /* 1 if REG_ICASE. */
+ unsigned char icase;
+ unsigned char is_utf8;
+ unsigned char map_notascii;
+ unsigned char mbs_allocated;
+ unsigned char offsets_needed;
+ unsigned char newline_anchor;
+ unsigned char word_ops_used;
+ int mb_cur_max;
+};
+typedef struct re_string_t re_string_t;
+
+
+struct re_dfa_t;
+typedef struct re_dfa_t re_dfa_t;
+
+#ifndef _LIBC
+# ifdef __i386__
+# define internal_function __attribute ((regparm (3), stdcall))
+# else
+# define internal_function
+# endif
+#endif
+
+#ifndef NOT_IN_libc
+static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr,
+ int new_buf_len)
+ internal_function;
+# ifdef RE_ENABLE_I18N
+static void build_wcs_buffer (re_string_t *pstr) internal_function;
+static reg_errcode_t build_wcs_upper_buffer (re_string_t *pstr)
+ internal_function;
+# endif /* RE_ENABLE_I18N */
+static void build_upper_buffer (re_string_t *pstr) internal_function;
+static void re_string_translate_buffer (re_string_t *pstr) internal_function;
+static unsigned int re_string_context_at (const re_string_t *input, int idx,
+ int eflags)
+ internal_function __attribute ((pure));
+#endif
+#define re_string_peek_byte(pstr, offset) \
+ ((pstr)->mbs[(pstr)->cur_idx + offset])
+#define re_string_fetch_byte(pstr) \
+ ((pstr)->mbs[(pstr)->cur_idx++])
+#define re_string_first_byte(pstr, idx) \
+ ((idx) == (pstr)->valid_len || (pstr)->wcs[idx] != WEOF)
+#define re_string_is_single_byte_char(pstr, idx) \
+ ((pstr)->wcs[idx] != WEOF && ((pstr)->valid_len == (idx) + 1 \
+ || (pstr)->wcs[(idx) + 1] != WEOF))
+#define re_string_eoi(pstr) ((pstr)->stop <= (pstr)->cur_idx)
+#define re_string_cur_idx(pstr) ((pstr)->cur_idx)
+#define re_string_get_buffer(pstr) ((pstr)->mbs)
+#define re_string_length(pstr) ((pstr)->len)
+#define re_string_byte_at(pstr,idx) ((pstr)->mbs[idx])
+#define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx))
+#define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx))
+
+#ifdef WIN32
+# include <malloc.h>
+#else
+# include <alloca.h>
+#endif
+
+#ifndef _LIBC
+# if HAVE_ALLOCA
+/* The OS usually guarantees only one guard page at the bottom of the stack,
+ and a page size can be as small as 4096 bytes. So we cannot safely
+ allocate anything larger than 4096 bytes. Also care for the possibility
+ of a few compiler-allocated temporary stack slots. */
+# define __libc_use_alloca(n) ((n) < 4032)
+# else
+/* alloca is implemented with malloc, so just use malloc. */
+# define __libc_use_alloca(n) 0
+# endif
+#endif
+
+#define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t)))
+#define re_realloc(p,t,n) ((t *) realloc (p, (n) * sizeof (t)))
+#define re_free(p) free (p)
+
+struct bin_tree_t
+{
+ struct bin_tree_t *parent;
+ struct bin_tree_t *left;
+ struct bin_tree_t *right;
+ struct bin_tree_t *first;
+ struct bin_tree_t *next;
+
+ re_token_t token;
+
+ /* `node_idx' is the index in dfa->nodes, if `type' == 0.
+ Otherwise `type' indicate the type of this node. */
+ int node_idx;
+};
+typedef struct bin_tree_t bin_tree_t;
+
+#define BIN_TREE_STORAGE_SIZE \
+ ((1024 - sizeof (void *)) / sizeof (bin_tree_t))
+
+struct bin_tree_storage_t
+{
+ struct bin_tree_storage_t *next;
+ bin_tree_t data[BIN_TREE_STORAGE_SIZE];
+};
+typedef struct bin_tree_storage_t bin_tree_storage_t;
+
+#define CONTEXT_WORD 1
+#define CONTEXT_NEWLINE (CONTEXT_WORD << 1)
+#define CONTEXT_BEGBUF (CONTEXT_NEWLINE << 1)
+#define CONTEXT_ENDBUF (CONTEXT_BEGBUF << 1)
+
+#define IS_WORD_CONTEXT(c) ((c) & CONTEXT_WORD)
+#define IS_NEWLINE_CONTEXT(c) ((c) & CONTEXT_NEWLINE)
+#define IS_BEGBUF_CONTEXT(c) ((c) & CONTEXT_BEGBUF)
+#define IS_ENDBUF_CONTEXT(c) ((c) & CONTEXT_ENDBUF)
+#define IS_ORDINARY_CONTEXT(c) ((c) == 0)
+
+#define IS_WORD_CHAR(ch) (isalnum (ch) || (ch) == '_')
+#define IS_NEWLINE(ch) ((ch) == NEWLINE_CHAR)
+#define IS_WIDE_WORD_CHAR(ch) (iswalnum (ch) || (ch) == L'_')
+#define IS_WIDE_NEWLINE(ch) ((ch) == WIDE_NEWLINE_CHAR)
+
+#define NOT_SATISFY_PREV_CONSTRAINT(constraint,context) \
+ ((((constraint) & PREV_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \
+ || ((constraint & PREV_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \
+ || ((constraint & PREV_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context))\
+ || ((constraint & PREV_BEGBUF_CONSTRAINT) && !IS_BEGBUF_CONTEXT (context)))
+
+#define NOT_SATISFY_NEXT_CONSTRAINT(constraint,context) \
+ ((((constraint) & NEXT_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \
+ || (((constraint) & NEXT_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \
+ || (((constraint) & NEXT_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context)) \
+ || (((constraint) & NEXT_ENDBUF_CONSTRAINT) && !IS_ENDBUF_CONTEXT (context)))
+
+struct re_dfastate_t
+{
+ unsigned int hash;
+ re_node_set nodes;
+ re_node_set non_eps_nodes;
+ re_node_set inveclosure;
+ re_node_set *entrance_nodes;
+ struct re_dfastate_t **trtable, **word_trtable;
+ unsigned int context : 4;
+ unsigned int halt : 1;
+ /* If this state can accept `multi byte'.
+ Note that we refer to multibyte characters, and multi character
+ collating elements as `multi byte'. */
+ unsigned int accept_mb : 1;
+ /* If this state has backreference node(s). */
+ unsigned int has_backref : 1;
+ unsigned int has_constraint : 1;
+};
+typedef struct re_dfastate_t re_dfastate_t;
+
+struct re_state_table_entry
+{
+ int num;
+ int alloc;
+ re_dfastate_t **array;
+};
+
+/* Array type used in re_sub_match_last_t and re_sub_match_top_t. */
+
+typedef struct
+{
+ int next_idx;
+ int alloc;
+ re_dfastate_t **array;
+} state_array_t;
+
+/* Store information about the node NODE whose type is OP_CLOSE_SUBEXP. */
+
+typedef struct
+{
+ int node;
+ int str_idx; /* The position NODE match at. */
+ state_array_t path;
+} re_sub_match_last_t;
+
+/* Store information about the node NODE whose type is OP_OPEN_SUBEXP.
+ And information about the node, whose type is OP_CLOSE_SUBEXP,
+ corresponding to NODE is stored in LASTS. */
+
+typedef struct
+{
+ int str_idx;
+ int node;
+ state_array_t *path;
+ int alasts; /* Allocation size of LASTS. */
+ int nlasts; /* The number of LASTS. */
+ re_sub_match_last_t **lasts;
+} re_sub_match_top_t;
+
+struct re_backref_cache_entry
+{
+ int node;
+ int str_idx;
+ int subexp_from;
+ int subexp_to;
+ char more;
+ char unused;
+ unsigned short int eps_reachable_subexps_map;
+};
+
+typedef struct
+{
+ /* The string object corresponding to the input string. */
+ re_string_t input;
+#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
+ const re_dfa_t *const dfa;
+#else
+ const re_dfa_t *dfa;
+#endif
+ /* EFLAGS of the argument of regexec. */
+ int eflags;
+ /* Where the matching ends. */
+ int match_last;
+ int last_node;
+ /* The state log used by the matcher. */
+ re_dfastate_t **state_log;
+ int state_log_top;
+ /* Back reference cache. */
+ int nbkref_ents;
+ int abkref_ents;
+ struct re_backref_cache_entry *bkref_ents;
+ int max_mb_elem_len;
+ int nsub_tops;
+ int asub_tops;
+ re_sub_match_top_t **sub_tops;
+} re_match_context_t;
+
+typedef struct
+{
+ re_dfastate_t **sifted_states;
+ re_dfastate_t **limited_states;
+ int last_node;
+ int last_str_idx;
+ re_node_set limits;
+} re_sift_context_t;
+
+struct re_fail_stack_ent_t
+{
+ int idx;
+ int node;
+ regmatch_t *regs;
+ re_node_set eps_via_nodes;
+};
+
+struct re_fail_stack_t
+{
+ int num;
+ int alloc;
+ struct re_fail_stack_ent_t *stack;
+};
+
+struct re_dfa_t
+{
+ re_token_t *nodes;
+ size_t nodes_alloc;
+ size_t nodes_len;
+ int *nexts;
+ int *org_indices;
+ re_node_set *edests;
+ re_node_set *eclosures;
+ re_node_set *inveclosures;
+ struct re_state_table_entry *state_table;
+ re_dfastate_t *init_state;
+ re_dfastate_t *init_state_word;
+ re_dfastate_t *init_state_nl;
+ re_dfastate_t *init_state_begbuf;
+ bin_tree_t *str_tree;
+ bin_tree_storage_t *str_tree_storage;
+ re_bitset_ptr_t sb_char;
+ int str_tree_storage_idx;
+
+ /* number of subexpressions `re_nsub' is in regex_t. */
+ unsigned int state_hash_mask;
+ int init_node;
+ int nbackref; /* The number of backreference in this dfa. */
+
+ /* Bitmap expressing which backreference is used. */
+ bitset_word_t used_bkref_map;
+ bitset_word_t completed_bkref_map;
+
+ unsigned int has_plural_match : 1;
+ /* If this dfa has "multibyte node", which is a backreference or
+ a node which can accept multibyte character or multi character
+ collating element. */
+ unsigned int has_mb_node : 1;
+ unsigned int is_utf8 : 1;
+ unsigned int map_notascii : 1;
+ unsigned int word_ops_used : 1;
+ int mb_cur_max;
+ bitset_t word_char;
+ reg_syntax_t syntax;
+ int *subexp_map;
+#ifdef DEBUG
+ char* re_str;
+#endif
+ __libc_lock_define (, lock)
+};
+
+#define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set))
+#define re_node_set_remove(set,id) \
+ (re_node_set_remove_at (set, re_node_set_contains (set, id) - 1))
+#define re_node_set_empty(p) ((p)->nelem = 0)
+#define re_node_set_free(set) re_free ((set)->elems)
+
+
+typedef enum
+{
+ SB_CHAR,
+ MB_CHAR,
+ EQUIV_CLASS,
+ COLL_SYM,
+ CHAR_CLASS
+} bracket_elem_type;
+
+typedef struct
+{
+ bracket_elem_type type;
+ union
+ {
+ unsigned char ch;
+ unsigned char *name;
+ wchar_t wch;
+ } opr;
+} bracket_elem_t;
+
+
+/* Inline functions for bitset operation. */
+static inline void
+bitset_not (bitset_t set)
+{
+ int bitset_i;
+ for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
+ set[bitset_i] = ~set[bitset_i];
+}
+
+static inline void
+bitset_merge (bitset_t dest, const bitset_t src)
+{
+ int bitset_i;
+ for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
+ dest[bitset_i] |= src[bitset_i];
+}
+
+static inline void
+bitset_mask (bitset_t dest, const bitset_t src)
+{
+ int bitset_i;
+ for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
+ dest[bitset_i] &= src[bitset_i];
+}
+
+#ifdef RE_ENABLE_I18N
+/* Inline functions for re_string. */
+static inline int
+internal_function __attribute ((pure))
+re_string_char_size_at (const re_string_t *pstr, int idx)
+{
+ int byte_idx;
+ if (pstr->mb_cur_max == 1)
+ return 1;
+ for (byte_idx = 1; idx + byte_idx < pstr->valid_len; ++byte_idx)
+ if (pstr->wcs[idx + byte_idx] != WEOF)
+ break;
+ return byte_idx;
+}
+
+static inline wint_t
+internal_function __attribute ((pure))
+re_string_wchar_at (const re_string_t *pstr, int idx)
+{
+ if (pstr->mb_cur_max == 1)
+ return (wint_t) pstr->mbs[idx];
+ return (wint_t) pstr->wcs[idx];
+}
+
+# ifndef NOT_IN_libc
+static int
+internal_function __attribute ((pure))
+re_string_elem_size_at (const re_string_t *pstr, int idx)
+{
+# ifdef _LIBC
+ const unsigned char *p, *extra;
+ const int32_t *table, *indirect;
+ int32_t tmp;
+# include <locale/weight.h>
+ uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+
+ if (nrules != 0)
+ {
+ table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
+ extra = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
+ indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
+ _NL_COLLATE_INDIRECTMB);
+ p = pstr->mbs + idx;
+ tmp = findidx (&p);
+ return p - pstr->mbs - idx;
+ }
+ else
+# endif /* _LIBC */
+ return 1;
+}
+# endif
+#endif /* RE_ENABLE_I18N */
+
+#endif /* _REGEX_INTERNAL_H */
diff --git a/gnu_regex/.svn/text-base/regexec.c.svn-base b/gnu_regex/.svn/text-base/regexec.c.svn-base
new file mode 100644
index 0000000..560921d
--- /dev/null
+++ b/gnu_regex/.svn/text-base/regexec.c.svn-base
@@ -0,0 +1,4338 @@
+/* Extended regular expression matching and search library.
+ Copyright (C) 2002, 2003, 2004, 2005, 2007, 2009 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
+ int n) internal_function;
+static void match_ctx_clean (re_match_context_t *mctx) internal_function;
+static void match_ctx_free (re_match_context_t *cache) internal_function;
+static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
+ int str_idx, int from, int to)
+ internal_function;
+static int search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)
+ internal_function;
+static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node,
+ int str_idx) internal_function;
+static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
+ int node, int str_idx)
+ internal_function;
+static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
+ re_dfastate_t **limited_sts, int last_node,
+ int last_str_idx)
+ internal_function;
+static reg_errcode_t re_search_internal (const regex_t *preg,
+ const char *string, int length,
+ int start, int range, int stop,
+ size_t nmatch, regmatch_t pmatch[],
+ int eflags) internal_function;
+static int re_search_2_stub (struct re_pattern_buffer *bufp,
+ const char *string1, int length1,
+ const char *string2, int length2,
+ int start, int range, struct re_registers *regs,
+ int stop, int ret_len) internal_function;
+static int re_search_stub (struct re_pattern_buffer *bufp,
+ const char *string, int length, int start,
+ int range, int stop, struct re_registers *regs,
+ int ret_len) internal_function;
+static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
+ int nregs, int regs_allocated) internal_function;
+static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
+ internal_function;
+static int check_matching (re_match_context_t *mctx, int fl_longest_match,
+ int *p_match_first) internal_function;
+static int check_halt_state_context (const re_match_context_t *mctx,
+ const re_dfastate_t *state, int idx)
+ internal_function;
+static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
+ regmatch_t *prev_idx_match, int cur_node,
+ int cur_idx, int nmatch) internal_function;
+static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
+ int str_idx, int dest_node, int nregs,
+ regmatch_t *regs,
+ re_node_set *eps_via_nodes)
+ internal_function;
+static reg_errcode_t set_regs (const regex_t *preg,
+ const re_match_context_t *mctx,
+ size_t nmatch, regmatch_t *pmatch,
+ int fl_backtrack) internal_function;
+static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs)
+ internal_function;
+
+#ifdef RE_ENABLE_I18N
+static int sift_states_iter_mb (const re_match_context_t *mctx,
+ re_sift_context_t *sctx,
+ int node_idx, int str_idx, int max_str_idx)
+ internal_function;
+#endif /* RE_ENABLE_I18N */
+static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
+ re_sift_context_t *sctx)
+ internal_function;
+static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
+ re_sift_context_t *sctx, int str_idx,
+ re_node_set *cur_dest)
+ internal_function;
+static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
+ re_sift_context_t *sctx,
+ int str_idx,
+ re_node_set *dest_nodes)
+ internal_function;
+static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
+ re_node_set *dest_nodes,
+ const re_node_set *candidates)
+ internal_function;
+static int check_dst_limits (const re_match_context_t *mctx,
+ re_node_set *limits,
+ int dst_node, int dst_idx, int src_node,
+ int src_idx) internal_function;
+static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
+ int boundaries, int subexp_idx,
+ int from_node, int bkref_idx)
+ internal_function;
+static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
+ int limit, int subexp_idx,
+ int node, int str_idx,
+ int bkref_idx) internal_function;
+static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
+ re_node_set *dest_nodes,
+ const re_node_set *candidates,
+ re_node_set *limits,
+ struct re_backref_cache_entry *bkref_ents,
+ int str_idx) internal_function;
+static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
+ re_sift_context_t *sctx,
+ int str_idx, const re_node_set *candidates)
+ internal_function;
+static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
+ re_dfastate_t **dst,
+ re_dfastate_t **src, int num)
+ internal_function;
+static re_dfastate_t *find_recover_state (reg_errcode_t *err,
+ re_match_context_t *mctx) internal_function;
+static re_dfastate_t *transit_state (reg_errcode_t *err,
+ re_match_context_t *mctx,
+ re_dfastate_t *state) internal_function;
+static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
+ re_match_context_t *mctx,
+ re_dfastate_t *next_state)
+ internal_function;
+static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
+ re_node_set *cur_nodes,
+ int str_idx) internal_function;
+#if 0
+static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
+ re_match_context_t *mctx,
+ re_dfastate_t *pstate)
+ internal_function;
+#endif
+#ifdef RE_ENABLE_I18N
+static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
+ re_dfastate_t *pstate)
+ internal_function;
+#endif /* RE_ENABLE_I18N */
+static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
+ const re_node_set *nodes)
+ internal_function;
+static reg_errcode_t get_subexp (re_match_context_t *mctx,
+ int bkref_node, int bkref_str_idx)
+ internal_function;
+static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
+ const re_sub_match_top_t *sub_top,
+ re_sub_match_last_t *sub_last,
+ int bkref_node, int bkref_str)
+ internal_function;
+static int find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
+ int subexp_idx, int type) internal_function;
+static reg_errcode_t check_arrival (re_match_context_t *mctx,
+ state_array_t *path, int top_node,
+ int top_str, int last_node, int last_str,
+ int type) internal_function;
+static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
+ int str_idx,
+ re_node_set *cur_nodes,
+ re_node_set *next_nodes)
+ internal_function;
+static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
+ re_node_set *cur_nodes,
+ int ex_subexp, int type)
+ internal_function;
+static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
+ re_node_set *dst_nodes,
+ int target, int ex_subexp,
+ int type) internal_function;
+static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
+ re_node_set *cur_nodes, int cur_str,
+ int subexp_num, int type)
+ internal_function;
+static int build_trtable (const re_dfa_t *dfa,
+ re_dfastate_t *state) internal_function;
+#ifdef RE_ENABLE_I18N
+static int check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
+ const re_string_t *input, int idx)
+ internal_function;
+# ifdef _LIBC
+static unsigned int find_collation_sequence_value (const unsigned char *mbs,
+ size_t name_len)
+ internal_function;
+# endif /* _LIBC */
+#endif /* RE_ENABLE_I18N */
+static int group_nodes_into_DFAstates (const re_dfa_t *dfa,
+ const re_dfastate_t *state,
+ re_node_set *states_node,
+ bitset_t *states_ch) internal_function;
+static int check_node_accept (const re_match_context_t *mctx,
+ const re_token_t *node, int idx)
+ internal_function;
+static reg_errcode_t extend_buffers (re_match_context_t *mctx)
+ internal_function;
+
+/* Entry point for POSIX code. */
+
+/* regexec searches for a given pattern, specified by PREG, in the
+ string STRING.
+
+ If NMATCH is zero or REG_NOSUB was set in the cflags argument to
+ `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
+ least NMATCH elements, and we set them to the offsets of the
+ corresponding matched substrings.
+
+ EFLAGS specifies `execution flags' which affect matching: if
+ REG_NOTBOL is set, then ^ does not match at the beginning of the
+ string; if REG_NOTEOL is set, then $ does not match at the end.
+
+ We return 0 if we find a match and REG_NOMATCH if not. */
+
+int
+regexec (preg, string, nmatch, pmatch, eflags)
+ const regex_t *__restrict preg;
+ const char *__restrict string;
+ size_t nmatch;
+ regmatch_t pmatch[];
+ int eflags;
+{
+ reg_errcode_t err;
+ int start, length;
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+
+ if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
+ return REG_BADPAT;
+
+ if (eflags & REG_STARTEND)
+ {
+ start = pmatch[0].rm_so;
+ length = pmatch[0].rm_eo;
+ }
+ else
+ {
+ start = 0;
+ length = strlen (string);
+ }
+
+ __libc_lock_lock (dfa->lock);
+ if (preg->no_sub)
+ err = re_search_internal (preg, string, length, start, length - start,
+ length, 0, NULL, eflags);
+ else
+ err = re_search_internal (preg, string, length, start, length - start,
+ length, nmatch, pmatch, eflags);
+ __libc_lock_unlock (dfa->lock);
+ return err != REG_NOERROR;
+}
+
+#ifdef _LIBC
+# include <shlib-compat.h>
+versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
+
+# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
+__typeof__ (__regexec) __compat_regexec;
+
+int
+attribute_compat_text_section
+__compat_regexec (const regex_t *__restrict preg,
+ const char *__restrict string, size_t nmatch,
+ regmatch_t pmatch[], int eflags)
+{
+ return regexec (preg, string, nmatch, pmatch,
+ eflags & (REG_NOTBOL | REG_NOTEOL));
+}
+compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
+# endif
+#endif
+
+/* Entry points for GNU code. */
+
+/* re_match, re_search, re_match_2, re_search_2
+
+ The former two functions operate on STRING with length LENGTH,
+ while the later two operate on concatenation of STRING1 and STRING2
+ with lengths LENGTH1 and LENGTH2, respectively.
+
+ re_match() matches the compiled pattern in BUFP against the string,
+ starting at index START.
+
+ re_search() first tries matching at index START, then it tries to match
+ starting from index START + 1, and so on. The last start position tried
+ is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same
+ way as re_match().)
+
+ The parameter STOP of re_{match,search}_2 specifies that no match exceeding
+ the first STOP characters of the concatenation of the strings should be
+ concerned.
+
+ If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
+ and all groups is stroed in REGS. (For the "_2" variants, the offsets are
+ computed relative to the concatenation, not relative to the individual
+ strings.)
+
+ On success, re_match* functions return the length of the match, re_search*
+ return the position of the start of the match. Return value -1 means no
+ match was found and -2 indicates an internal error. */
+
+int
+re_match (bufp, string, length, start, regs)
+ struct re_pattern_buffer *bufp;
+ const char *string;
+ int length, start;
+ struct re_registers *regs;
+{
+ return re_search_stub (bufp, string, length, start, 0, length, regs, 1);
+}
+#ifdef _LIBC
+weak_alias (__re_match, re_match)
+#endif
+
+int
+re_search (bufp, string, length, start, range, regs)
+ struct re_pattern_buffer *bufp;
+ const char *string;
+ int length, start, range;
+ struct re_registers *regs;
+{
+ return re_search_stub (bufp, string, length, start, range, length, regs, 0);
+}
+#ifdef _LIBC
+weak_alias (__re_search, re_search)
+#endif
+
+int
+re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
+ struct re_pattern_buffer *bufp;
+ const char *string1, *string2;
+ int length1, length2, start, stop;
+ struct re_registers *regs;
+{
+ return re_search_2_stub (bufp, string1, length1, string2, length2,
+ start, 0, regs, stop, 1);
+}
+#ifdef _LIBC
+weak_alias (__re_match_2, re_match_2)
+#endif
+
+int
+re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
+ struct re_pattern_buffer *bufp;
+ const char *string1, *string2;
+ int length1, length2, start, range, stop;
+ struct re_registers *regs;
+{
+ return re_search_2_stub (bufp, string1, length1, string2, length2,
+ start, range, regs, stop, 0);
+}
+#ifdef _LIBC
+weak_alias (__re_search_2, re_search_2)
+#endif
+
+static int
+re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs,
+ stop, ret_len)
+ struct re_pattern_buffer *bufp;
+ const char *string1, *string2;
+ int length1, length2, start, range, stop, ret_len;
+ struct re_registers *regs;
+{
+ const char *str;
+ int rval;
+ int len = length1 + length2;
+ int free_str = 0;
+
+ if (BE (length1 < 0 || length2 < 0 || stop < 0, 0))
+ return -2;
+
+ /* Concatenate the strings. */
+ if (length2 > 0)
+ if (length1 > 0)
+ {
+ char *s = re_malloc (char, len);
+
+ if (BE (s == NULL, 0))
+ return -2;
+#ifdef _LIBC
+ memcpy (__mempcpy (s, string1, length1), string2, length2);
+#else
+ memcpy (s, string1, length1);
+ memcpy (s + length1, string2, length2);
+#endif
+ str = s;
+ free_str = 1;
+ }
+ else
+ str = string2;
+ else
+ str = string1;
+
+ rval = re_search_stub (bufp, str, len, start, range, stop, regs,
+ ret_len);
+ if (free_str)
+ re_free ((char *) str);
+ return rval;
+}
+
+/* The parameters have the same meaning as those of re_search.
+ Additional parameters:
+ If RET_LEN is nonzero the length of the match is returned (re_match style);
+ otherwise the position of the match is returned. */
+
+static int
+re_search_stub (bufp, string, length, start, range, stop, regs, ret_len)
+ struct re_pattern_buffer *bufp;
+ const char *string;
+ int length, start, range, stop, ret_len;
+ struct re_registers *regs;
+{
+ reg_errcode_t result;
+ regmatch_t *pmatch;
+ int nregs, rval;
+ int eflags = 0;
+ re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
+
+ /* Check for out-of-range. */
+ if (BE (start < 0 || start > length, 0))
+ return -1;
+ if (BE (start + range > length, 0))
+ range = length - start;
+ else if (BE (start + range < 0, 0))
+ range = -start;
+
+ __libc_lock_lock (dfa->lock);
+
+ eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
+ eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
+
+ /* Compile fastmap if we haven't yet. */
+ if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate)
+ re_compile_fastmap (bufp);
+
+ if (BE (bufp->no_sub, 0))
+ regs = NULL;
+
+ /* We need at least 1 register. */
+ if (regs == NULL)
+ nregs = 1;
+ else if (BE (bufp->regs_allocated == REGS_FIXED &&
+ regs->num_regs < bufp->re_nsub + 1, 0))
+ {
+ nregs = regs->num_regs;
+ if (BE (nregs < 1, 0))
+ {
+ /* Nothing can be copied to regs. */
+ regs = NULL;
+ nregs = 1;
+ }
+ }
+ else
+ nregs = bufp->re_nsub + 1;
+ pmatch = re_malloc (regmatch_t, nregs);
+ if (BE (pmatch == NULL, 0))
+ {
+ rval = -2;
+ goto out;
+ }
+
+ result = re_search_internal (bufp, string, length, start, range, stop,
+ nregs, pmatch, eflags);
+
+ rval = 0;
+
+ /* I hope we needn't fill ther regs with -1's when no match was found. */
+ if (result != REG_NOERROR)
+ rval = -1;
+ else if (regs != NULL)
+ {
+ /* If caller wants register contents data back, copy them. */
+ bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
+ bufp->regs_allocated);
+ if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
+ rval = -2;
+ }
+
+ if (BE (rval == 0, 1))
+ {
+ if (ret_len)
+ {
+ assert (pmatch[0].rm_so == start);
+ rval = pmatch[0].rm_eo - start;
+ }
+ else
+ rval = pmatch[0].rm_so;
+ }
+ re_free (pmatch);
+ out:
+ __libc_lock_unlock (dfa->lock);
+ return rval;
+}
+
+static unsigned
+re_copy_regs (regs, pmatch, nregs, regs_allocated)
+ struct re_registers *regs;
+ regmatch_t *pmatch;
+ int nregs, regs_allocated;
+{
+ int rval = REGS_REALLOCATE;
+ int i;
+ int need_regs = nregs + 1;
+ /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
+ uses. */
+
+ /* Have the register data arrays been allocated? */
+ if (regs_allocated == REGS_UNALLOCATED)
+ { /* No. So allocate them with malloc. */
+ regs->start = re_malloc (regoff_t, need_regs);
+ regs->end = re_malloc (regoff_t, need_regs);
+ if (BE (regs->start == NULL, 0) || BE (regs->end == NULL, 0))
+ return REGS_UNALLOCATED;
+ regs->num_regs = need_regs;
+ }
+ else if (regs_allocated == REGS_REALLOCATE)
+ { /* Yes. If we need more elements than were already
+ allocated, reallocate them. If we need fewer, just
+ leave it alone. */
+ if (BE (need_regs > regs->num_regs, 0))
+ {
+ regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
+ regoff_t *new_end = re_realloc (regs->end, regoff_t, need_regs);
+ if (BE (new_start == NULL, 0) || BE (new_end == NULL, 0))
+ return REGS_UNALLOCATED;
+ regs->start = new_start;
+ regs->end = new_end;
+ regs->num_regs = need_regs;
+ }
+ }
+ else
+ {
+ assert (regs_allocated == REGS_FIXED);
+ /* This function may not be called with REGS_FIXED and nregs too big. */
+ assert (regs->num_regs >= nregs);
+ rval = REGS_FIXED;
+ }
+
+ /* Copy the regs. */
+ for (i = 0; i < nregs; ++i)
+ {
+ regs->start[i] = pmatch[i].rm_so;
+ regs->end[i] = pmatch[i].rm_eo;
+ }
+ for ( ; i < regs->num_regs; ++i)
+ regs->start[i] = regs->end[i] = -1;
+
+ return rval;
+}
+
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+ ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
+ this memory for recording register information. STARTS and ENDS
+ must be allocated using the malloc library routine, and must each
+ be at least NUM_REGS * sizeof (regoff_t) bytes long.
+
+ If NUM_REGS == 0, then subsequent matches should allocate their own
+ register data.
+
+ Unless this function is called, the first search or match using
+ PATTERN_BUFFER will allocate its own register data, without
+ freeing the old data. */
+
+void
+re_set_registers (bufp, regs, num_regs, starts, ends)
+ struct re_pattern_buffer *bufp;
+ struct re_registers *regs;
+ unsigned num_regs;
+ regoff_t *starts, *ends;
+{
+ if (num_regs)
+ {
+ bufp->regs_allocated = REGS_REALLOCATE;
+ regs->num_regs = num_regs;
+ regs->start = starts;
+ regs->end = ends;
+ }
+ else
+ {
+ bufp->regs_allocated = REGS_UNALLOCATED;
+ regs->num_regs = 0;
+ regs->start = regs->end = (regoff_t *) 0;
+ }
+}
+#ifdef _LIBC
+weak_alias (__re_set_registers, re_set_registers)
+#endif
+
+/* Entry points compatible with 4.2 BSD regex library. We don't define
+ them unless specifically requested. */
+
+#if defined _REGEX_RE_COMP || defined _LIBC
+int
+# ifdef _LIBC
+weak_function
+# endif
+re_exec (s)
+ const char *s;
+{
+ return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
+}
+#endif /* _REGEX_RE_COMP */
+
+/* Internal entry point. */
+
+/* Searches for a compiled pattern PREG in the string STRING, whose
+ length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same
+ mingings with regexec. START, and RANGE have the same meanings
+ with re_search.
+ Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
+ otherwise return the error code.
+ Note: We assume front end functions already check ranges.
+ (START + RANGE >= 0 && START + RANGE <= LENGTH) */
+
+static reg_errcode_t
+re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
+ eflags)
+ const regex_t *preg;
+ const char *string;
+ int length, start, range, stop, eflags;
+ size_t nmatch;
+ regmatch_t pmatch[];
+{
+ reg_errcode_t err;
+ const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
+ int left_lim, right_lim, incr;
+ int fl_longest_match, match_first, match_kind, match_last = -1;
+ int extra_nmatch;
+ int sb, ch;
+#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
+ re_match_context_t mctx = { .dfa = dfa };
+#else
+ re_match_context_t mctx;
+#endif
+ char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate
+ && range && !preg->can_be_null) ? preg->fastmap : NULL;
+ RE_TRANSLATE_TYPE t = preg->translate;
+
+#if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
+ memset (&mctx, '\0', sizeof (re_match_context_t));
+ mctx.dfa = dfa;
+#endif
+
+ extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
+ nmatch -= extra_nmatch;
+
+ /* Check if the DFA haven't been compiled. */
+ if (BE (preg->used == 0 || dfa->init_state == NULL
+ || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
+ || dfa->init_state_begbuf == NULL, 0))
+ return REG_NOMATCH;
+
+#ifdef DEBUG
+ /* We assume front-end functions already check them. */
+ assert (start + range >= 0 && start + range <= length);
+#endif
+
+ /* If initial states with non-begbuf contexts have no elements,
+ the regex must be anchored. If preg->newline_anchor is set,
+ we'll never use init_state_nl, so do not check it. */
+ if (dfa->init_state->nodes.nelem == 0
+ && dfa->init_state_word->nodes.nelem == 0
+ && (dfa->init_state_nl->nodes.nelem == 0
+ || !preg->newline_anchor))
+ {
+ if (start != 0 && start + range != 0)
+ return REG_NOMATCH;
+ start = range = 0;
+ }
+
+ /* We must check the longest matching, if nmatch > 0. */
+ fl_longest_match = (nmatch != 0 || dfa->nbackref);
+
+ err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
+ preg->translate, preg->syntax & RE_ICASE, dfa);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ mctx.input.stop = stop;
+ mctx.input.raw_stop = stop;
+ mctx.input.newline_anchor = preg->newline_anchor;
+
+ err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+
+ /* We will log all the DFA states through which the dfa pass,
+ if nmatch > 1, or this dfa has "multibyte node", which is a
+ back-reference or a node which can accept multibyte character or
+ multi character collating element. */
+ if (nmatch > 1 || dfa->has_mb_node)
+ {
+ mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
+ if (BE (mctx.state_log == NULL, 0))
+ {
+ err = REG_ESPACE;
+ goto free_return;
+ }
+ }
+ else
+ mctx.state_log = NULL;
+
+ match_first = start;
+ mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
+ : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
+
+ /* Check incrementally whether of not the input string match. */
+ incr = (range < 0) ? -1 : 1;
+ left_lim = (range < 0) ? start + range : start;
+ right_lim = (range < 0) ? start : start + range;
+ sb = dfa->mb_cur_max == 1;
+ match_kind =
+ (fastmap
+ ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
+ | (range >= 0 ? 2 : 0)
+ | (t != NULL ? 1 : 0))
+ : 8);
+
+ for (;; match_first += incr)
+ {
+ err = REG_NOMATCH;
+ if (match_first < left_lim || right_lim < match_first)
+ goto free_return;
+
+ /* Advance as rapidly as possible through the string, until we
+ find a plausible place to start matching. This may be done
+ with varying efficiency, so there are various possibilities:
+ only the most common of them are specialized, in order to
+ save on code size. We use a switch statement for speed. */
+ switch (match_kind)
+ {
+ case 8:
+ /* No fastmap. */
+ break;
+
+ case 7:
+ /* Fastmap with single-byte translation, match forward. */
+ while (BE (match_first < right_lim, 1)
+ && !fastmap[t[(unsigned char) string[match_first]]])
+ ++match_first;
+ goto forward_match_found_start_or_reached_end;
+
+ case 6:
+ /* Fastmap without translation, match forward. */
+ while (BE (match_first < right_lim, 1)
+ && !fastmap[(unsigned char) string[match_first]])
+ ++match_first;
+
+ forward_match_found_start_or_reached_end:
+ if (BE (match_first == right_lim, 0))
+ {
+ ch = match_first >= length
+ ? 0 : (unsigned char) string[match_first];
+ if (!fastmap[t ? t[ch] : ch])
+ goto free_return;
+ }
+ break;
+
+ case 4:
+ case 5:
+ /* Fastmap without multi-byte translation, match backwards. */
+ while (match_first >= left_lim)
+ {
+ ch = match_first >= length
+ ? 0 : (unsigned char) string[match_first];
+ if (fastmap[t ? t[ch] : ch])
+ break;
+ --match_first;
+ }
+ if (match_first < left_lim)
+ goto free_return;
+ break;
+
+ default:
+ /* In this case, we can't determine easily the current byte,
+ since it might be a component byte of a multibyte
+ character. Then we use the constructed buffer instead. */
+ for (;;)
+ {
+ /* If MATCH_FIRST is out of the valid range, reconstruct the
+ buffers. */
+ unsigned int offset = match_first - mctx.input.raw_mbs_idx;
+ if (BE (offset >= (unsigned int) mctx.input.valid_raw_len, 0))
+ {
+ err = re_string_reconstruct (&mctx.input, match_first,
+ eflags);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+
+ offset = match_first - mctx.input.raw_mbs_idx;
+ }
+ /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
+ Note that MATCH_FIRST must not be smaller than 0. */
+ ch = (match_first >= length
+ ? 0 : re_string_byte_at (&mctx.input, offset));
+ if (fastmap[ch])
+ break;
+ match_first += incr;
+ if (match_first < left_lim || match_first > right_lim)
+ {
+ err = REG_NOMATCH;
+ goto free_return;
+ }
+ }
+ break;
+ }
+
+ /* Reconstruct the buffers so that the matcher can assume that
+ the matching starts from the beginning of the buffer. */
+ err = re_string_reconstruct (&mctx.input, match_first, eflags);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+
+#ifdef RE_ENABLE_I18N
+ /* Don't consider this char as a possible match start if it part,
+ yet isn't the head, of a multibyte character. */
+ if (!sb && !re_string_first_byte (&mctx.input, 0))
+ continue;
+#endif
+
+ /* It seems to be appropriate one, then use the matcher. */
+ /* We assume that the matching starts from 0. */
+ mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
+ match_last = check_matching (&mctx, fl_longest_match,
+ range >= 0 ? &match_first : NULL);
+ if (match_last != -1)
+ {
+ if (BE (match_last == -2, 0))
+ {
+ err = REG_ESPACE;
+ goto free_return;
+ }
+ else
+ {
+ mctx.match_last = match_last;
+ if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
+ {
+ re_dfastate_t *pstate = mctx.state_log[match_last];
+ mctx.last_node = check_halt_state_context (&mctx, pstate,
+ match_last);
+ }
+ if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
+ || dfa->nbackref)
+ {
+ err = prune_impossible_nodes (&mctx);
+ if (err == REG_NOERROR)
+ break;
+ if (BE (err != REG_NOMATCH, 0))
+ goto free_return;
+ match_last = -1;
+ }
+ else
+ break; /* We found a match. */
+ }
+ }
+
+ match_ctx_clean (&mctx);
+ }
+
+#ifdef DEBUG
+ assert (match_last != -1);
+ assert (err == REG_NOERROR);
+#endif
+
+ /* Set pmatch[] if we need. */
+ if (nmatch > 0)
+ {
+ int reg_idx;
+
+ /* Initialize registers. */
+ for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
+ pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
+
+ /* Set the points where matching start/end. */
+ pmatch[0].rm_so = 0;
+ pmatch[0].rm_eo = mctx.match_last;
+
+ if (!preg->no_sub && nmatch > 1)
+ {
+ err = set_regs (preg, &mctx, nmatch, pmatch,
+ dfa->has_plural_match && dfa->nbackref > 0);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ }
+
+ /* At last, add the offset to the each registers, since we slided
+ the buffers so that we could assume that the matching starts
+ from 0. */
+ for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
+ if (pmatch[reg_idx].rm_so != -1)
+ {
+#ifdef RE_ENABLE_I18N
+ if (BE (mctx.input.offsets_needed != 0, 0))
+ {
+ pmatch[reg_idx].rm_so =
+ (pmatch[reg_idx].rm_so == mctx.input.valid_len
+ ? mctx.input.valid_raw_len
+ : mctx.input.offsets[pmatch[reg_idx].rm_so]);
+ pmatch[reg_idx].rm_eo =
+ (pmatch[reg_idx].rm_eo == mctx.input.valid_len
+ ? mctx.input.valid_raw_len
+ : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
+ }
+#else
+ assert (mctx.input.offsets_needed == 0);
+#endif
+ pmatch[reg_idx].rm_so += match_first;
+ pmatch[reg_idx].rm_eo += match_first;
+ }
+ for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
+ {
+ pmatch[nmatch + reg_idx].rm_so = -1;
+ pmatch[nmatch + reg_idx].rm_eo = -1;
+ }
+
+ if (dfa->subexp_map)
+ for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
+ if (dfa->subexp_map[reg_idx] != reg_idx)
+ {
+ pmatch[reg_idx + 1].rm_so
+ = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
+ pmatch[reg_idx + 1].rm_eo
+ = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
+ }
+ }
+
+ free_return:
+ re_free (mctx.state_log);
+ if (dfa->nbackref)
+ match_ctx_free (&mctx);
+ re_string_destruct (&mctx.input);
+ return err;
+}
+
+static reg_errcode_t
+prune_impossible_nodes (mctx)
+ re_match_context_t *mctx;
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ int halt_node, match_last;
+ reg_errcode_t ret;
+ re_dfastate_t **sifted_states;
+ re_dfastate_t **lim_states = NULL;
+ re_sift_context_t sctx;
+#ifdef DEBUG
+ assert (mctx->state_log != NULL);
+#endif
+ match_last = mctx->match_last;
+ halt_node = mctx->last_node;
+ sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
+ if (BE (sifted_states == NULL, 0))
+ {
+ ret = REG_ESPACE;
+ goto free_return;
+ }
+ if (dfa->nbackref)
+ {
+ lim_states = re_malloc (re_dfastate_t *, match_last + 1);
+ if (BE (lim_states == NULL, 0))
+ {
+ ret = REG_ESPACE;
+ goto free_return;
+ }
+ while (1)
+ {
+ memset (lim_states, '\0',
+ sizeof (re_dfastate_t *) * (match_last + 1));
+ sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
+ match_last);
+ ret = sift_states_backward (mctx, &sctx);
+ re_node_set_free (&sctx.limits);
+ if (BE (ret != REG_NOERROR, 0))
+ goto free_return;
+ if (sifted_states[0] != NULL || lim_states[0] != NULL)
+ break;
+ do
+ {
+ --match_last;
+ if (match_last < 0)
+ {
+ ret = REG_NOMATCH;
+ goto free_return;
+ }
+ } while (mctx->state_log[match_last] == NULL
+ || !mctx->state_log[match_last]->halt);
+ halt_node = check_halt_state_context (mctx,
+ mctx->state_log[match_last],
+ match_last);
+ }
+ ret = merge_state_array (dfa, sifted_states, lim_states,
+ match_last + 1);
+ re_free (lim_states);
+ lim_states = NULL;
+ if (BE (ret != REG_NOERROR, 0))
+ goto free_return;
+ }
+ else
+ {
+ sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
+ ret = sift_states_backward (mctx, &sctx);
+ re_node_set_free (&sctx.limits);
+ if (BE (ret != REG_NOERROR, 0))
+ goto free_return;
+ if (sifted_states[0] == NULL)
+ {
+ ret = REG_NOMATCH;
+ goto free_return;
+ }
+ }
+ re_free (mctx->state_log);
+ mctx->state_log = sifted_states;
+ sifted_states = NULL;
+ mctx->last_node = halt_node;
+ mctx->match_last = match_last;
+ ret = REG_NOERROR;
+ free_return:
+ re_free (sifted_states);
+ re_free (lim_states);
+ return ret;
+}
+
+/* Acquire an initial state and return it.
+ We must select appropriate initial state depending on the context,
+ since initial states may have constraints like "\<", "^", etc.. */
+
+static inline re_dfastate_t *
+__attribute ((always_inline)) internal_function
+acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
+ int idx)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ if (dfa->init_state->has_constraint)
+ {
+ unsigned int context;
+ context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
+ if (IS_WORD_CONTEXT (context))
+ return dfa->init_state_word;
+ else if (IS_ORDINARY_CONTEXT (context))
+ return dfa->init_state;
+ else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
+ return dfa->init_state_begbuf;
+ else if (IS_NEWLINE_CONTEXT (context))
+ return dfa->init_state_nl;
+ else if (IS_BEGBUF_CONTEXT (context))
+ {
+ /* It is relatively rare case, then calculate on demand. */
+ return re_acquire_state_context (err, dfa,
+ dfa->init_state->entrance_nodes,
+ context);
+ }
+ else
+ /* Must not happen? */
+ return dfa->init_state;
+ }
+ else
+ return dfa->init_state;
+}
+
+/* Check whether the regular expression match input string INPUT or not,
+ and return the index where the matching end, return -1 if not match,
+ or return -2 in case of an error.
+ FL_LONGEST_MATCH means we want the POSIX longest matching.
+ If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
+ next place where we may want to try matching.
+ Note that the matcher assume that the maching starts from the current
+ index of the buffer. */
+
+static int
+internal_function
+check_matching (re_match_context_t *mctx, int fl_longest_match,
+ int *p_match_first)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ reg_errcode_t err;
+ int match = 0;
+ int match_last = -1;
+ int cur_str_idx = re_string_cur_idx (&mctx->input);
+ re_dfastate_t *cur_state;
+ int at_init_state = p_match_first != NULL;
+ int next_start_idx = cur_str_idx;
+
+ err = REG_NOERROR;
+ cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
+ /* An initial state must not be NULL (invalid). */
+ if (BE (cur_state == NULL, 0))
+ {
+ assert (err == REG_ESPACE);
+ return -2;
+ }
+
+ if (mctx->state_log != NULL)
+ {
+ mctx->state_log[cur_str_idx] = cur_state;
+
+ /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
+ later. E.g. Processing back references. */
+ if (BE (dfa->nbackref, 0))
+ {
+ at_init_state = 0;
+ err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+
+ if (cur_state->has_backref)
+ {
+ err = transit_state_bkref (mctx, &cur_state->nodes);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ }
+ }
+
+ /* If the RE accepts NULL string. */
+ if (BE (cur_state->halt, 0))
+ {
+ if (!cur_state->has_constraint
+ || check_halt_state_context (mctx, cur_state, cur_str_idx))
+ {
+ if (!fl_longest_match)
+ return cur_str_idx;
+ else
+ {
+ match_last = cur_str_idx;
+ match = 1;
+ }
+ }
+ }
+
+ while (!re_string_eoi (&mctx->input))
+ {
+ re_dfastate_t *old_state = cur_state;
+ int next_char_idx = re_string_cur_idx (&mctx->input) + 1;
+
+ if (BE (next_char_idx >= mctx->input.bufs_len, 0)
+ || (BE (next_char_idx >= mctx->input.valid_len, 0)
+ && mctx->input.valid_len < mctx->input.len))
+ {
+ err = extend_buffers (mctx);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ assert (err == REG_ESPACE);
+ return -2;
+ }
+ }
+
+ cur_state = transit_state (&err, mctx, cur_state);
+ if (mctx->state_log != NULL)
+ cur_state = merge_state_with_log (&err, mctx, cur_state);
+
+ if (cur_state == NULL)
+ {
+ /* Reached the invalid state or an error. Try to recover a valid
+ state using the state log, if available and if we have not
+ already found a valid (even if not the longest) match. */
+ if (BE (err != REG_NOERROR, 0))
+ return -2;
+
+ if (mctx->state_log == NULL
+ || (match && !fl_longest_match)
+ || (cur_state = find_recover_state (&err, mctx)) == NULL)
+ break;
+ }
+
+ if (BE (at_init_state, 0))
+ {
+ if (old_state == cur_state)
+ next_start_idx = next_char_idx;
+ else
+ at_init_state = 0;
+ }
+
+ if (cur_state->halt)
+ {
+ /* Reached a halt state.
+ Check the halt state can satisfy the current context. */
+ if (!cur_state->has_constraint
+ || check_halt_state_context (mctx, cur_state,
+ re_string_cur_idx (&mctx->input)))
+ {
+ /* We found an appropriate halt state. */
+ match_last = re_string_cur_idx (&mctx->input);
+ match = 1;
+
+ /* We found a match, do not modify match_first below. */
+ p_match_first = NULL;
+ if (!fl_longest_match)
+ break;
+ }
+ }
+ }
+
+ if (p_match_first)
+ *p_match_first += next_start_idx;
+
+ return match_last;
+}
+
+/* Check NODE match the current context. */
+
+static int
+internal_function
+check_halt_node_context (const re_dfa_t *dfa, int node, unsigned int context)
+{
+ re_token_type_t type = dfa->nodes[node].type;
+ unsigned int constraint = dfa->nodes[node].constraint;
+ if (type != END_OF_RE)
+ return 0;
+ if (!constraint)
+ return 1;
+ if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
+ return 0;
+ return 1;
+}
+
+/* Check the halt state STATE match the current context.
+ Return 0 if not match, if the node, STATE has, is a halt node and
+ match the context, return the node. */
+
+static int
+internal_function
+check_halt_state_context (const re_match_context_t *mctx,
+ const re_dfastate_t *state, int idx)
+{
+ int i;
+ unsigned int context;
+#ifdef DEBUG
+ assert (state->halt);
+#endif
+ context = re_string_context_at (&mctx->input, idx, mctx->eflags);
+ for (i = 0; i < state->nodes.nelem; ++i)
+ if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
+ return state->nodes.elems[i];
+ return 0;
+}
+
+/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
+ corresponding to the DFA).
+ Return the destination node, and update EPS_VIA_NODES, return -1 in case
+ of errors. */
+
+static int
+internal_function
+proceed_next_node (const re_match_context_t *mctx, int nregs, regmatch_t *regs,
+ int *pidx, int node, re_node_set *eps_via_nodes,
+ struct re_fail_stack_t *fs)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ int i, err;
+ if (IS_EPSILON_NODE (dfa->nodes[node].type))
+ {
+ re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
+ re_node_set *edests = &dfa->edests[node];
+ int dest_node;
+ err = re_node_set_insert (eps_via_nodes, node);
+ if (BE (err < 0, 0))
+ return -2;
+ /* Pick up a valid destination, or return -1 if none is found. */
+ for (dest_node = -1, i = 0; i < edests->nelem; ++i)
+ {
+ int candidate = edests->elems[i];
+ if (!re_node_set_contains (cur_nodes, candidate))
+ continue;
+ if (dest_node == -1)
+ dest_node = candidate;
+
+ else
+ {
+ /* In order to avoid infinite loop like "(a*)*", return the second
+ epsilon-transition if the first was already considered. */
+ if (re_node_set_contains (eps_via_nodes, dest_node))
+ return candidate;
+
+ /* Otherwise, push the second epsilon-transition on the fail stack. */
+ else if (fs != NULL
+ && push_fail_stack (fs, *pidx, candidate, nregs, regs,
+ eps_via_nodes))
+ return -2;
+
+ /* We know we are going to exit. */
+ break;
+ }
+ }
+ return dest_node;
+ }
+ else
+ {
+ int naccepted = 0;
+ re_token_type_t type = dfa->nodes[node].type;
+
+#ifdef RE_ENABLE_I18N
+ if (dfa->nodes[node].accept_mb)
+ naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
+ else
+#endif /* RE_ENABLE_I18N */
+ if (type == OP_BACK_REF)
+ {
+ int subexp_idx = dfa->nodes[node].opr.idx + 1;
+ naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
+ if (fs != NULL)
+ {
+ if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
+ return -1;
+ else if (naccepted)
+ {
+ char *buf = (char *) re_string_get_buffer (&mctx->input);
+ if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
+ naccepted) != 0)
+ return -1;
+ }
+ }
+
+ if (naccepted == 0)
+ {
+ int dest_node;
+ err = re_node_set_insert (eps_via_nodes, node);
+ if (BE (err < 0, 0))
+ return -2;
+ dest_node = dfa->edests[node].elems[0];
+ if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
+ dest_node))
+ return dest_node;
+ }
+ }
+
+ if (naccepted != 0
+ || check_node_accept (mctx, dfa->nodes + node, *pidx))
+ {
+ int dest_node = dfa->nexts[node];
+ *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
+ if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
+ || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
+ dest_node)))
+ return -1;
+ re_node_set_empty (eps_via_nodes);
+ return dest_node;
+ }
+ }
+ return -1;
+}
+
+static reg_errcode_t
+internal_function
+push_fail_stack (struct re_fail_stack_t *fs, int str_idx, int dest_node,
+ int nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
+{
+ reg_errcode_t err;
+ int num = fs->num++;
+ if (fs->num == fs->alloc)
+ {
+ struct re_fail_stack_ent_t *new_array;
+ new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
+ * fs->alloc * 2));
+ if (new_array == NULL)
+ return REG_ESPACE;
+ fs->alloc *= 2;
+ fs->stack = new_array;
+ }
+ fs->stack[num].idx = str_idx;
+ fs->stack[num].node = dest_node;
+ fs->stack[num].regs = re_malloc (regmatch_t, nregs);
+ if (fs->stack[num].regs == NULL)
+ return REG_ESPACE;
+ memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
+ err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
+ return err;
+}
+
+static int
+internal_function
+pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs,
+ regmatch_t *regs, re_node_set *eps_via_nodes)
+{
+ int num = --fs->num;
+ assert (num >= 0);
+ *pidx = fs->stack[num].idx;
+ memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
+ re_node_set_free (eps_via_nodes);
+ re_free (fs->stack[num].regs);
+ *eps_via_nodes = fs->stack[num].eps_via_nodes;
+ return fs->stack[num].node;
+}
+
+/* Set the positions where the subexpressions are starts/ends to registers
+ PMATCH.
+ Note: We assume that pmatch[0] is already set, and
+ pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */
+
+static reg_errcode_t
+internal_function
+set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
+ regmatch_t *pmatch, int fl_backtrack)
+{
+ const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
+ int idx, cur_node;
+ re_node_set eps_via_nodes;
+ struct re_fail_stack_t *fs;
+ struct re_fail_stack_t fs_body = { 0, 2, NULL };
+ regmatch_t *prev_idx_match;
+ int prev_idx_match_malloced = 0;
+
+#ifdef DEBUG
+ assert (nmatch > 1);
+ assert (mctx->state_log != NULL);
+#endif
+ if (fl_backtrack)
+ {
+ fs = &fs_body;
+ fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
+ if (fs->stack == NULL)
+ return REG_ESPACE;
+ }
+ else
+ fs = NULL;
+
+ cur_node = dfa->init_node;
+ re_node_set_init_empty (&eps_via_nodes);
+
+ if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
+ prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
+ else
+ {
+ prev_idx_match = re_malloc (regmatch_t, nmatch);
+ if (prev_idx_match == NULL)
+ {
+ free_fail_stack_return (fs);
+ return REG_ESPACE;
+ }
+ prev_idx_match_malloced = 1;
+ }
+ memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
+
+ for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
+ {
+ update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
+
+ if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
+ {
+ int reg_idx;
+ if (fs)
+ {
+ for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
+ if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
+ break;
+ if (reg_idx == nmatch)
+ {
+ re_node_set_free (&eps_via_nodes);
+ if (prev_idx_match_malloced)
+ re_free (prev_idx_match);
+ return free_fail_stack_return (fs);
+ }
+ cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
+ &eps_via_nodes);
+ }
+ else
+ {
+ re_node_set_free (&eps_via_nodes);
+ if (prev_idx_match_malloced)
+ re_free (prev_idx_match);
+ return REG_NOERROR;
+ }
+ }
+
+ /* Proceed to next node. */
+ cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
+ &eps_via_nodes, fs);
+
+ if (BE (cur_node < 0, 0))
+ {
+ if (BE (cur_node == -2, 0))
+ {
+ re_node_set_free (&eps_via_nodes);
+ if (prev_idx_match_malloced)
+ re_free (prev_idx_match);
+ free_fail_stack_return (fs);
+ return REG_ESPACE;
+ }
+ if (fs)
+ cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
+ &eps_via_nodes);
+ else
+ {
+ re_node_set_free (&eps_via_nodes);
+ if (prev_idx_match_malloced)
+ re_free (prev_idx_match);
+ return REG_NOMATCH;
+ }
+ }
+ }
+ re_node_set_free (&eps_via_nodes);
+ if (prev_idx_match_malloced)
+ re_free (prev_idx_match);
+ return free_fail_stack_return (fs);
+}
+
+static reg_errcode_t
+internal_function
+free_fail_stack_return (struct re_fail_stack_t *fs)
+{
+ if (fs)
+ {
+ int fs_idx;
+ for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
+ {
+ re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
+ re_free (fs->stack[fs_idx].regs);
+ }
+ re_free (fs->stack);
+ }
+ return REG_NOERROR;
+}
+
+static void
+internal_function
+update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
+ regmatch_t *prev_idx_match, int cur_node, int cur_idx, int nmatch)
+{
+ int type = dfa->nodes[cur_node].type;
+ if (type == OP_OPEN_SUBEXP)
+ {
+ int reg_num = dfa->nodes[cur_node].opr.idx + 1;
+
+ /* We are at the first node of this sub expression. */
+ if (reg_num < nmatch)
+ {
+ pmatch[reg_num].rm_so = cur_idx;
+ pmatch[reg_num].rm_eo = -1;
+ }
+ }
+ else if (type == OP_CLOSE_SUBEXP)
+ {
+ int reg_num = dfa->nodes[cur_node].opr.idx + 1;
+ if (reg_num < nmatch)
+ {
+ /* We are at the last node of this sub expression. */
+ if (pmatch[reg_num].rm_so < cur_idx)
+ {
+ pmatch[reg_num].rm_eo = cur_idx;
+ /* This is a non-empty match or we are not inside an optional
+ subexpression. Accept this right away. */
+ memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
+ }
+ else
+ {
+ if (dfa->nodes[cur_node].opt_subexp
+ && prev_idx_match[reg_num].rm_so != -1)
+ /* We transited through an empty match for an optional
+ subexpression, like (a?)*, and this is not the subexp's
+ first match. Copy back the old content of the registers
+ so that matches of an inner subexpression are undone as
+ well, like in ((a?))*. */
+ memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
+ else
+ /* We completed a subexpression, but it may be part of
+ an optional one, so do not update PREV_IDX_MATCH. */
+ pmatch[reg_num].rm_eo = cur_idx;
+ }
+ }
+ }
+}
+
+/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
+ and sift the nodes in each states according to the following rules.
+ Updated state_log will be wrote to STATE_LOG.
+
+ Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
+ 1. When STR_IDX == MATCH_LAST(the last index in the state_log):
+ If `a' isn't the LAST_NODE and `a' can't epsilon transit to
+ the LAST_NODE, we throw away the node `a'.
+ 2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
+ string `s' and transit to `b':
+ i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
+ away the node `a'.
+ ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
+ thrown away, we throw away the node `a'.
+ 3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
+ i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
+ node `a'.
+ ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
+ we throw away the node `a'. */
+
+#define STATE_NODE_CONTAINS(state,node) \
+ ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
+
+static reg_errcode_t
+internal_function
+sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
+{
+ reg_errcode_t err;
+ int null_cnt = 0;
+ int str_idx = sctx->last_str_idx;
+ re_node_set cur_dest;
+
+#ifdef DEBUG
+ assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
+#endif
+
+ /* Build sifted state_log[str_idx]. It has the nodes which can epsilon
+ transit to the last_node and the last_node itself. */
+ err = re_node_set_init_1 (&cur_dest, sctx->last_node);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+
+ /* Then check each states in the state_log. */
+ while (str_idx > 0)
+ {
+ /* Update counters. */
+ null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
+ if (null_cnt > mctx->max_mb_elem_len)
+ {
+ memset (sctx->sifted_states, '\0',
+ sizeof (re_dfastate_t *) * str_idx);
+ re_node_set_free (&cur_dest);
+ return REG_NOERROR;
+ }
+ re_node_set_empty (&cur_dest);
+ --str_idx;
+
+ if (mctx->state_log[str_idx])
+ {
+ err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ }
+
+ /* Add all the nodes which satisfy the following conditions:
+ - It can epsilon transit to a node in CUR_DEST.
+ - It is in CUR_SRC.
+ And update state_log. */
+ err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ }
+ err = REG_NOERROR;
+ free_return:
+ re_node_set_free (&cur_dest);
+ return err;
+}
+
+static reg_errcode_t
+internal_function
+build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
+ int str_idx, re_node_set *cur_dest)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
+ int i;
+
+ /* Then build the next sifted state.
+ We build the next sifted state on `cur_dest', and update
+ `sifted_states[str_idx]' with `cur_dest'.
+ Note:
+ `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
+ `cur_src' points the node_set of the old `state_log[str_idx]'
+ (with the epsilon nodes pre-filtered out). */
+ for (i = 0; i < cur_src->nelem; i++)
+ {
+ int prev_node = cur_src->elems[i];
+ int naccepted = 0;
+ int ret;
+
+#ifdef DEBUG
+ re_token_type_t type = dfa->nodes[prev_node].type;
+ assert (!IS_EPSILON_NODE (type));
+#endif
+#ifdef RE_ENABLE_I18N
+ /* If the node may accept `multi byte'. */
+ if (dfa->nodes[prev_node].accept_mb)
+ naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
+ str_idx, sctx->last_str_idx);
+#endif /* RE_ENABLE_I18N */
+
+ /* We don't check backreferences here.
+ See update_cur_sifted_state(). */
+ if (!naccepted
+ && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
+ && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
+ dfa->nexts[prev_node]))
+ naccepted = 1;
+
+ if (naccepted == 0)
+ continue;
+
+ if (sctx->limits.nelem)
+ {
+ int to_idx = str_idx + naccepted;
+ if (check_dst_limits (mctx, &sctx->limits,
+ dfa->nexts[prev_node], to_idx,
+ prev_node, str_idx))
+ continue;
+ }
+ ret = re_node_set_insert (cur_dest, prev_node);
+ if (BE (ret == -1, 0))
+ return REG_ESPACE;
+ }
+
+ return REG_NOERROR;
+}
+
+/* Helper functions. */
+
+static reg_errcode_t
+internal_function
+clean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx)
+{
+ int top = mctx->state_log_top;
+
+ if (next_state_log_idx >= mctx->input.bufs_len
+ || (next_state_log_idx >= mctx->input.valid_len
+ && mctx->input.valid_len < mctx->input.len))
+ {
+ reg_errcode_t err;
+ err = extend_buffers (mctx);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+
+ if (top < next_state_log_idx)
+ {
+ memset (mctx->state_log + top + 1, '\0',
+ sizeof (re_dfastate_t *) * (next_state_log_idx - top));
+ mctx->state_log_top = next_state_log_idx;
+ }
+ return REG_NOERROR;
+}
+
+static reg_errcode_t
+internal_function
+merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
+ re_dfastate_t **src, int num)
+{
+ int st_idx;
+ reg_errcode_t err;
+ for (st_idx = 0; st_idx < num; ++st_idx)
+ {
+ if (dst[st_idx] == NULL)
+ dst[st_idx] = src[st_idx];
+ else if (src[st_idx] != NULL)
+ {
+ re_node_set merged_set;
+ err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
+ &src[st_idx]->nodes);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
+ re_node_set_free (&merged_set);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ }
+ return REG_NOERROR;
+}
+
+static reg_errcode_t
+internal_function
+update_cur_sifted_state (const re_match_context_t *mctx,
+ re_sift_context_t *sctx, int str_idx,
+ re_node_set *dest_nodes)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ reg_errcode_t err = REG_NOERROR;
+ const re_node_set *candidates;
+ candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
+ : &mctx->state_log[str_idx]->nodes);
+
+ if (dest_nodes->nelem == 0)
+ sctx->sifted_states[str_idx] = NULL;
+ else
+ {
+ if (candidates)
+ {
+ /* At first, add the nodes which can epsilon transit to a node in
+ DEST_NODE. */
+ err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+
+ /* Then, check the limitations in the current sift_context. */
+ if (sctx->limits.nelem)
+ {
+ err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
+ mctx->bkref_ents, str_idx);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ }
+
+ sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+
+ if (candidates && mctx->state_log[str_idx]->has_backref)
+ {
+ err = sift_states_bkref (mctx, sctx, str_idx, candidates);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ return REG_NOERROR;
+}
+
+static reg_errcode_t
+internal_function
+add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
+ const re_node_set *candidates)
+{
+ reg_errcode_t err = REG_NOERROR;
+ int i;
+
+ re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+
+ if (!state->inveclosure.alloc)
+ {
+ err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
+ if (BE (err != REG_NOERROR, 0))
+ return REG_ESPACE;
+ for (i = 0; i < dest_nodes->nelem; i++)
+ re_node_set_merge (&state->inveclosure,
+ dfa->inveclosures + dest_nodes->elems[i]);
+ }
+ return re_node_set_add_intersect (dest_nodes, candidates,
+ &state->inveclosure);
+}
+
+static reg_errcode_t
+internal_function
+sub_epsilon_src_nodes (const re_dfa_t *dfa, int node, re_node_set *dest_nodes,
+ const re_node_set *candidates)
+{
+ int ecl_idx;
+ reg_errcode_t err;
+ re_node_set *inv_eclosure = dfa->inveclosures + node;
+ re_node_set except_nodes;
+ re_node_set_init_empty (&except_nodes);
+ for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
+ {
+ int cur_node = inv_eclosure->elems[ecl_idx];
+ if (cur_node == node)
+ continue;
+ if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
+ {
+ int edst1 = dfa->edests[cur_node].elems[0];
+ int edst2 = ((dfa->edests[cur_node].nelem > 1)
+ ? dfa->edests[cur_node].elems[1] : -1);
+ if ((!re_node_set_contains (inv_eclosure, edst1)
+ && re_node_set_contains (dest_nodes, edst1))
+ || (edst2 > 0
+ && !re_node_set_contains (inv_eclosure, edst2)
+ && re_node_set_contains (dest_nodes, edst2)))
+ {
+ err = re_node_set_add_intersect (&except_nodes, candidates,
+ dfa->inveclosures + cur_node);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&except_nodes);
+ return err;
+ }
+ }
+ }
+ }
+ for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
+ {
+ int cur_node = inv_eclosure->elems[ecl_idx];
+ if (!re_node_set_contains (&except_nodes, cur_node))
+ {
+ int idx = re_node_set_contains (dest_nodes, cur_node) - 1;
+ re_node_set_remove_at (dest_nodes, idx);
+ }
+ }
+ re_node_set_free (&except_nodes);
+ return REG_NOERROR;
+}
+
+static int
+internal_function
+check_dst_limits (const re_match_context_t *mctx, re_node_set *limits,
+ int dst_node, int dst_idx, int src_node, int src_idx)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ int lim_idx, src_pos, dst_pos;
+
+ int dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
+ int src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
+ for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
+ {
+ int subexp_idx;
+ struct re_backref_cache_entry *ent;
+ ent = mctx->bkref_ents + limits->elems[lim_idx];
+ subexp_idx = dfa->nodes[ent->node].opr.idx;
+
+ dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
+ subexp_idx, dst_node, dst_idx,
+ dst_bkref_idx);
+ src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
+ subexp_idx, src_node, src_idx,
+ src_bkref_idx);
+
+ /* In case of:
+ <src> <dst> ( <subexp> )
+ ( <subexp> ) <src> <dst>
+ ( <subexp1> <src> <subexp2> <dst> <subexp3> ) */
+ if (src_pos == dst_pos)
+ continue; /* This is unrelated limitation. */
+ else
+ return 1;
+ }
+ return 0;
+}
+
+static int
+internal_function
+check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
+ int subexp_idx, int from_node, int bkref_idx)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ const re_node_set *eclosures = dfa->eclosures + from_node;
+ int node_idx;
+
+ /* Else, we are on the boundary: examine the nodes on the epsilon
+ closure. */
+ for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
+ {
+ int node = eclosures->elems[node_idx];
+ switch (dfa->nodes[node].type)
+ {
+ case OP_BACK_REF:
+ if (bkref_idx != -1)
+ {
+ struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
+ do
+ {
+ int dst, cpos;
+
+ if (ent->node != node)
+ continue;
+
+ if (subexp_idx < BITSET_WORD_BITS
+ && !(ent->eps_reachable_subexps_map
+ & ((bitset_word_t) 1 << subexp_idx)))
+ continue;
+
+ /* Recurse trying to reach the OP_OPEN_SUBEXP and
+ OP_CLOSE_SUBEXP cases below. But, if the
+ destination node is the same node as the source
+ node, don't recurse because it would cause an
+ infinite loop: a regex that exhibits this behavior
+ is ()\1*\1* */
+ dst = dfa->edests[node].elems[0];
+ if (dst == from_node)
+ {
+ if (boundaries & 1)
+ return -1;
+ else /* if (boundaries & 2) */
+ return 0;
+ }
+
+ cpos =
+ check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
+ dst, bkref_idx);
+ if (cpos == -1 /* && (boundaries & 1) */)
+ return -1;
+ if (cpos == 0 && (boundaries & 2))
+ return 0;
+
+ if (subexp_idx < BITSET_WORD_BITS)
+ ent->eps_reachable_subexps_map
+ &= ~((bitset_word_t) 1 << subexp_idx);
+ }
+ while (ent++->more);
+ }
+ break;
+
+ case OP_OPEN_SUBEXP:
+ if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
+ return -1;
+ break;
+
+ case OP_CLOSE_SUBEXP:
+ if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
+ return 0;
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ return (boundaries & 2) ? 1 : 0;
+}
+
+static int
+internal_function
+check_dst_limits_calc_pos (const re_match_context_t *mctx, int limit,
+ int subexp_idx, int from_node, int str_idx,
+ int bkref_idx)
+{
+ struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
+ int boundaries;
+
+ /* If we are outside the range of the subexpression, return -1 or 1. */
+ if (str_idx < lim->subexp_from)
+ return -1;
+
+ if (lim->subexp_to < str_idx)
+ return 1;
+
+ /* If we are within the subexpression, return 0. */
+ boundaries = (str_idx == lim->subexp_from);
+ boundaries |= (str_idx == lim->subexp_to) << 1;
+ if (boundaries == 0)
+ return 0;
+
+ /* Else, examine epsilon closure. */
+ return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
+ from_node, bkref_idx);
+}
+
+/* Check the limitations of sub expressions LIMITS, and remove the nodes
+ which are against limitations from DEST_NODES. */
+
+static reg_errcode_t
+internal_function
+check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
+ const re_node_set *candidates, re_node_set *limits,
+ struct re_backref_cache_entry *bkref_ents, int str_idx)
+{
+ reg_errcode_t err;
+ int node_idx, lim_idx;
+
+ for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
+ {
+ int subexp_idx;
+ struct re_backref_cache_entry *ent;
+ ent = bkref_ents + limits->elems[lim_idx];
+
+ if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
+ continue; /* This is unrelated limitation. */
+
+ subexp_idx = dfa->nodes[ent->node].opr.idx;
+ if (ent->subexp_to == str_idx)
+ {
+ int ops_node = -1;
+ int cls_node = -1;
+ for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
+ {
+ int node = dest_nodes->elems[node_idx];
+ re_token_type_t type = dfa->nodes[node].type;
+ if (type == OP_OPEN_SUBEXP
+ && subexp_idx == dfa->nodes[node].opr.idx)
+ ops_node = node;
+ else if (type == OP_CLOSE_SUBEXP
+ && subexp_idx == dfa->nodes[node].opr.idx)
+ cls_node = node;
+ }
+
+ /* Check the limitation of the open subexpression. */
+ /* Note that (ent->subexp_to = str_idx != ent->subexp_from). */
+ if (ops_node >= 0)
+ {
+ err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
+ candidates);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+
+ /* Check the limitation of the close subexpression. */
+ if (cls_node >= 0)
+ for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
+ {
+ int node = dest_nodes->elems[node_idx];
+ if (!re_node_set_contains (dfa->inveclosures + node,
+ cls_node)
+ && !re_node_set_contains (dfa->eclosures + node,
+ cls_node))
+ {
+ /* It is against this limitation.
+ Remove it form the current sifted state. */
+ err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
+ candidates);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ --node_idx;
+ }
+ }
+ }
+ else /* (ent->subexp_to != str_idx) */
+ {
+ for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
+ {
+ int node = dest_nodes->elems[node_idx];
+ re_token_type_t type = dfa->nodes[node].type;
+ if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
+ {
+ if (subexp_idx != dfa->nodes[node].opr.idx)
+ continue;
+ /* It is against this limitation.
+ Remove it form the current sifted state. */
+ err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
+ candidates);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ }
+ }
+ }
+ return REG_NOERROR;
+}
+
+static reg_errcode_t
+internal_function
+sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
+ int str_idx, const re_node_set *candidates)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ reg_errcode_t err;
+ int node_idx, node;
+ re_sift_context_t local_sctx;
+ int first_idx = search_cur_bkref_entry (mctx, str_idx);
+
+ if (first_idx == -1)
+ return REG_NOERROR;
+
+ local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */
+
+ for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
+ {
+ int enabled_idx;
+ re_token_type_t type;
+ struct re_backref_cache_entry *entry;
+ node = candidates->elems[node_idx];
+ type = dfa->nodes[node].type;
+ /* Avoid infinite loop for the REs like "()\1+". */
+ if (node == sctx->last_node && str_idx == sctx->last_str_idx)
+ continue;
+ if (type != OP_BACK_REF)
+ continue;
+
+ entry = mctx->bkref_ents + first_idx;
+ enabled_idx = first_idx;
+ do
+ {
+ int subexp_len;
+ int to_idx;
+ int dst_node;
+ int ret;
+ re_dfastate_t *cur_state;
+
+ if (entry->node != node)
+ continue;
+ subexp_len = entry->subexp_to - entry->subexp_from;
+ to_idx = str_idx + subexp_len;
+ dst_node = (subexp_len ? dfa->nexts[node]
+ : dfa->edests[node].elems[0]);
+
+ if (to_idx > sctx->last_str_idx
+ || sctx->sifted_states[to_idx] == NULL
+ || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
+ || check_dst_limits (mctx, &sctx->limits, node,
+ str_idx, dst_node, to_idx))
+ continue;
+
+ if (local_sctx.sifted_states == NULL)
+ {
+ local_sctx = *sctx;
+ err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ }
+ local_sctx.last_node = node;
+ local_sctx.last_str_idx = str_idx;
+ ret = re_node_set_insert (&local_sctx.limits, enabled_idx);
+ if (BE (ret < 0, 0))
+ {
+ err = REG_ESPACE;
+ goto free_return;
+ }
+ cur_state = local_sctx.sifted_states[str_idx];
+ err = sift_states_backward (mctx, &local_sctx);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ if (sctx->limited_states != NULL)
+ {
+ err = merge_state_array (dfa, sctx->limited_states,
+ local_sctx.sifted_states,
+ str_idx + 1);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ }
+ local_sctx.sifted_states[str_idx] = cur_state;
+ re_node_set_remove (&local_sctx.limits, enabled_idx);
+
+ /* mctx->bkref_ents may have changed, reload the pointer. */
+ entry = mctx->bkref_ents + enabled_idx;
+ }
+ while (enabled_idx++, entry++->more);
+ }
+ err = REG_NOERROR;
+ free_return:
+ if (local_sctx.sifted_states != NULL)
+ {
+ re_node_set_free (&local_sctx.limits);
+ }
+
+ return err;
+}
+
+
+#ifdef RE_ENABLE_I18N
+static int
+internal_function
+sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
+ int node_idx, int str_idx, int max_str_idx)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ int naccepted;
+ /* Check the node can accept `multi byte'. */
+ naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
+ if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
+ !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
+ dfa->nexts[node_idx]))
+ /* The node can't accept the `multi byte', or the
+ destination was already thrown away, then the node
+ could't accept the current input `multi byte'. */
+ naccepted = 0;
+ /* Otherwise, it is sure that the node could accept
+ `naccepted' bytes input. */
+ return naccepted;
+}
+#endif /* RE_ENABLE_I18N */
+
+
+/* Functions for state transition. */
+
+/* Return the next state to which the current state STATE will transit by
+ accepting the current input byte, and update STATE_LOG if necessary.
+ If STATE can accept a multibyte char/collating element/back reference
+ update the destination of STATE_LOG. */
+
+static re_dfastate_t *
+internal_function
+transit_state (reg_errcode_t *err, re_match_context_t *mctx,
+ re_dfastate_t *state)
+{
+ re_dfastate_t **trtable;
+ unsigned char ch;
+
+#ifdef RE_ENABLE_I18N
+ /* If the current state can accept multibyte. */
+ if (BE (state->accept_mb, 0))
+ {
+ *err = transit_state_mb (mctx, state);
+ if (BE (*err != REG_NOERROR, 0))
+ return NULL;
+ }
+#endif /* RE_ENABLE_I18N */
+
+ /* Then decide the next state with the single byte. */
+#if 0
+ if (0)
+ /* don't use transition table */
+ return transit_state_sb (err, mctx, state);
+#endif
+
+ /* Use transition table */
+ ch = re_string_fetch_byte (&mctx->input);
+ for (;;)
+ {
+ trtable = state->trtable;
+ if (BE (trtable != NULL, 1))
+ return trtable[ch];
+
+ trtable = state->word_trtable;
+ if (BE (trtable != NULL, 1))
+ {
+ unsigned int context;
+ context
+ = re_string_context_at (&mctx->input,
+ re_string_cur_idx (&mctx->input) - 1,
+ mctx->eflags);
+ if (IS_WORD_CONTEXT (context))
+ return trtable[ch + SBC_MAX];
+ else
+ return trtable[ch];
+ }
+
+ if (!build_trtable (mctx->dfa, state))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+
+ /* Retry, we now have a transition table. */
+ }
+}
+
+/* Update the state_log if we need */
+re_dfastate_t *
+internal_function
+merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
+ re_dfastate_t *next_state)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ int cur_idx = re_string_cur_idx (&mctx->input);
+
+ if (cur_idx > mctx->state_log_top)
+ {
+ mctx->state_log[cur_idx] = next_state;
+ mctx->state_log_top = cur_idx;
+ }
+ else if (mctx->state_log[cur_idx] == 0)
+ {
+ mctx->state_log[cur_idx] = next_state;
+ }
+ else
+ {
+ re_dfastate_t *pstate;
+ unsigned int context;
+ re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
+ /* If (state_log[cur_idx] != 0), it implies that cur_idx is
+ the destination of a multibyte char/collating element/
+ back reference. Then the next state is the union set of
+ these destinations and the results of the transition table. */
+ pstate = mctx->state_log[cur_idx];
+ log_nodes = pstate->entrance_nodes;
+ if (next_state != NULL)
+ {
+ table_nodes = next_state->entrance_nodes;
+ *err = re_node_set_init_union (&next_nodes, table_nodes,
+ log_nodes);
+ if (BE (*err != REG_NOERROR, 0))
+ return NULL;
+ }
+ else
+ next_nodes = *log_nodes;
+ /* Note: We already add the nodes of the initial state,
+ then we don't need to add them here. */
+
+ context = re_string_context_at (&mctx->input,
+ re_string_cur_idx (&mctx->input) - 1,
+ mctx->eflags);
+ next_state = mctx->state_log[cur_idx]
+ = re_acquire_state_context (err, dfa, &next_nodes, context);
+ /* We don't need to check errors here, since the return value of
+ this function is next_state and ERR is already set. */
+
+ if (table_nodes != NULL)
+ re_node_set_free (&next_nodes);
+ }
+
+ if (BE (dfa->nbackref, 0) && next_state != NULL)
+ {
+ /* Check OP_OPEN_SUBEXP in the current state in case that we use them
+ later. We must check them here, since the back references in the
+ next state might use them. */
+ *err = check_subexp_matching_top (mctx, &next_state->nodes,
+ cur_idx);
+ if (BE (*err != REG_NOERROR, 0))
+ return NULL;
+
+ /* If the next state has back references. */
+ if (next_state->has_backref)
+ {
+ *err = transit_state_bkref (mctx, &next_state->nodes);
+ if (BE (*err != REG_NOERROR, 0))
+ return NULL;
+ next_state = mctx->state_log[cur_idx];
+ }
+ }
+
+ return next_state;
+}
+
+/* Skip bytes in the input that correspond to part of a
+ multi-byte match, then look in the log for a state
+ from which to restart matching. */
+re_dfastate_t *
+internal_function
+find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
+{
+ re_dfastate_t *cur_state;
+ do
+ {
+ int max = mctx->state_log_top;
+ int cur_str_idx = re_string_cur_idx (&mctx->input);
+
+ do
+ {
+ if (++cur_str_idx > max)
+ return NULL;
+ re_string_skip_bytes (&mctx->input, 1);
+ }
+ while (mctx->state_log[cur_str_idx] == NULL);
+
+ cur_state = merge_state_with_log (err, mctx, NULL);
+ }
+ while (*err == REG_NOERROR && cur_state == NULL);
+ return cur_state;
+}
+
+/* Helper functions for transit_state. */
+
+/* From the node set CUR_NODES, pick up the nodes whose types are
+ OP_OPEN_SUBEXP and which have corresponding back references in the regular
+ expression. And register them to use them later for evaluating the
+ correspoding back references. */
+
+static reg_errcode_t
+internal_function
+check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
+ int str_idx)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ int node_idx;
+ reg_errcode_t err;
+
+ /* TODO: This isn't efficient.
+ Because there might be more than one nodes whose types are
+ OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
+ nodes.
+ E.g. RE: (a){2} */
+ for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
+ {
+ int node = cur_nodes->elems[node_idx];
+ if (dfa->nodes[node].type == OP_OPEN_SUBEXP
+ && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
+ && (dfa->used_bkref_map
+ & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
+ {
+ err = match_ctx_add_subtop (mctx, node, str_idx);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ }
+ return REG_NOERROR;
+}
+
+#if 0
+/* Return the next state to which the current state STATE will transit by
+ accepting the current input byte. */
+
+static re_dfastate_t *
+transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
+ re_dfastate_t *state)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ re_node_set next_nodes;
+ re_dfastate_t *next_state;
+ int node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
+ unsigned int context;
+
+ *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
+ if (BE (*err != REG_NOERROR, 0))
+ return NULL;
+ for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
+ {
+ int cur_node = state->nodes.elems[node_cnt];
+ if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
+ {
+ *err = re_node_set_merge (&next_nodes,
+ dfa->eclosures + dfa->nexts[cur_node]);
+ if (BE (*err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&next_nodes);
+ return NULL;
+ }
+ }
+ }
+ context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
+ next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
+ /* We don't need to check errors here, since the return value of
+ this function is next_state and ERR is already set. */
+
+ re_node_set_free (&next_nodes);
+ re_string_skip_bytes (&mctx->input, 1);
+ return next_state;
+}
+#endif
+
+#ifdef RE_ENABLE_I18N
+static reg_errcode_t
+internal_function
+transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ reg_errcode_t err;
+ int i;
+
+ for (i = 0; i < pstate->nodes.nelem; ++i)
+ {
+ re_node_set dest_nodes, *new_nodes;
+ int cur_node_idx = pstate->nodes.elems[i];
+ int naccepted, dest_idx;
+ unsigned int context;
+ re_dfastate_t *dest_state;
+
+ if (!dfa->nodes[cur_node_idx].accept_mb)
+ continue;
+
+ if (dfa->nodes[cur_node_idx].constraint)
+ {
+ context = re_string_context_at (&mctx->input,
+ re_string_cur_idx (&mctx->input),
+ mctx->eflags);
+ if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
+ context))
+ continue;
+ }
+
+ /* How many bytes the node can accept? */
+ naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
+ re_string_cur_idx (&mctx->input));
+ if (naccepted == 0)
+ continue;
+
+ /* The node can accepts `naccepted' bytes. */
+ dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
+ mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
+ : mctx->max_mb_elem_len);
+ err = clean_state_log_if_needed (mctx, dest_idx);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+#ifdef DEBUG
+ assert (dfa->nexts[cur_node_idx] != -1);
+#endif
+ new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
+
+ dest_state = mctx->state_log[dest_idx];
+ if (dest_state == NULL)
+ dest_nodes = *new_nodes;
+ else
+ {
+ err = re_node_set_init_union (&dest_nodes,
+ dest_state->entrance_nodes, new_nodes);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ context = re_string_context_at (&mctx->input, dest_idx - 1,
+ mctx->eflags);
+ mctx->state_log[dest_idx]
+ = re_acquire_state_context (&err, dfa, &dest_nodes, context);
+ if (dest_state != NULL)
+ re_node_set_free (&dest_nodes);
+ if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
+ return err;
+ }
+ return REG_NOERROR;
+}
+#endif /* RE_ENABLE_I18N */
+
+static reg_errcode_t
+internal_function
+transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ reg_errcode_t err;
+ int i;
+ int cur_str_idx = re_string_cur_idx (&mctx->input);
+
+ for (i = 0; i < nodes->nelem; ++i)
+ {
+ int dest_str_idx, prev_nelem, bkc_idx;
+ int node_idx = nodes->elems[i];
+ unsigned int context;
+ const re_token_t *node = dfa->nodes + node_idx;
+ re_node_set *new_dest_nodes;
+
+ /* Check whether `node' is a backreference or not. */
+ if (node->type != OP_BACK_REF)
+ continue;
+
+ if (node->constraint)
+ {
+ context = re_string_context_at (&mctx->input, cur_str_idx,
+ mctx->eflags);
+ if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
+ continue;
+ }
+
+ /* `node' is a backreference.
+ Check the substring which the substring matched. */
+ bkc_idx = mctx->nbkref_ents;
+ err = get_subexp (mctx, node_idx, cur_str_idx);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+
+ /* And add the epsilon closures (which is `new_dest_nodes') of
+ the backreference to appropriate state_log. */
+#ifdef DEBUG
+ assert (dfa->nexts[node_idx] != -1);
+#endif
+ for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
+ {
+ int subexp_len;
+ re_dfastate_t *dest_state;
+ struct re_backref_cache_entry *bkref_ent;
+ bkref_ent = mctx->bkref_ents + bkc_idx;
+ if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
+ continue;
+ subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
+ new_dest_nodes = (subexp_len == 0
+ ? dfa->eclosures + dfa->edests[node_idx].elems[0]
+ : dfa->eclosures + dfa->nexts[node_idx]);
+ dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
+ - bkref_ent->subexp_from);
+ context = re_string_context_at (&mctx->input, dest_str_idx - 1,
+ mctx->eflags);
+ dest_state = mctx->state_log[dest_str_idx];
+ prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
+ : mctx->state_log[cur_str_idx]->nodes.nelem);
+ /* Add `new_dest_node' to state_log. */
+ if (dest_state == NULL)
+ {
+ mctx->state_log[dest_str_idx]
+ = re_acquire_state_context (&err, dfa, new_dest_nodes,
+ context);
+ if (BE (mctx->state_log[dest_str_idx] == NULL
+ && err != REG_NOERROR, 0))
+ goto free_return;
+ }
+ else
+ {
+ re_node_set dest_nodes;
+ err = re_node_set_init_union (&dest_nodes,
+ dest_state->entrance_nodes,
+ new_dest_nodes);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&dest_nodes);
+ goto free_return;
+ }
+ mctx->state_log[dest_str_idx]
+ = re_acquire_state_context (&err, dfa, &dest_nodes, context);
+ re_node_set_free (&dest_nodes);
+ if (BE (mctx->state_log[dest_str_idx] == NULL
+ && err != REG_NOERROR, 0))
+ goto free_return;
+ }
+ /* We need to check recursively if the backreference can epsilon
+ transit. */
+ if (subexp_len == 0
+ && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
+ {
+ err = check_subexp_matching_top (mctx, new_dest_nodes,
+ cur_str_idx);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ err = transit_state_bkref (mctx, new_dest_nodes);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ }
+ }
+ }
+ err = REG_NOERROR;
+ free_return:
+ return err;
+}
+
+/* Enumerate all the candidates which the backreference BKREF_NODE can match
+ at BKREF_STR_IDX, and register them by match_ctx_add_entry().
+ Note that we might collect inappropriate candidates here.
+ However, the cost of checking them strictly here is too high, then we
+ delay these checking for prune_impossible_nodes(). */
+
+static reg_errcode_t
+internal_function
+get_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ int subexp_num, sub_top_idx;
+ const char *buf = (const char *) re_string_get_buffer (&mctx->input);
+ /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */
+ int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
+ if (cache_idx != -1)
+ {
+ const struct re_backref_cache_entry *entry
+ = mctx->bkref_ents + cache_idx;
+ do
+ if (entry->node == bkref_node)
+ return REG_NOERROR; /* We already checked it. */
+ while (entry++->more);
+ }
+
+ subexp_num = dfa->nodes[bkref_node].opr.idx;
+
+ /* For each sub expression */
+ for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
+ {
+ reg_errcode_t err;
+ re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
+ re_sub_match_last_t *sub_last;
+ int sub_last_idx, sl_str, bkref_str_off;
+
+ if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
+ continue; /* It isn't related. */
+
+ sl_str = sub_top->str_idx;
+ bkref_str_off = bkref_str_idx;
+ /* At first, check the last node of sub expressions we already
+ evaluated. */
+ for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
+ {
+ int sl_str_diff;
+ sub_last = sub_top->lasts[sub_last_idx];
+ sl_str_diff = sub_last->str_idx - sl_str;
+ /* The matched string by the sub expression match with the substring
+ at the back reference? */
+ if (sl_str_diff > 0)
+ {
+ if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
+ {
+ /* Not enough chars for a successful match. */
+ if (bkref_str_off + sl_str_diff > mctx->input.len)
+ break;
+
+ err = clean_state_log_if_needed (mctx,
+ bkref_str_off
+ + sl_str_diff);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ buf = (const char *) re_string_get_buffer (&mctx->input);
+ }
+ if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
+ /* We don't need to search this sub expression any more. */
+ break;
+ }
+ bkref_str_off += sl_str_diff;
+ sl_str += sl_str_diff;
+ err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
+ bkref_str_idx);
+
+ /* Reload buf, since the preceding call might have reallocated
+ the buffer. */
+ buf = (const char *) re_string_get_buffer (&mctx->input);
+
+ if (err == REG_NOMATCH)
+ continue;
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+
+ if (sub_last_idx < sub_top->nlasts)
+ continue;
+ if (sub_last_idx > 0)
+ ++sl_str;
+ /* Then, search for the other last nodes of the sub expression. */
+ for (; sl_str <= bkref_str_idx; ++sl_str)
+ {
+ int cls_node, sl_str_off;
+ const re_node_set *nodes;
+ sl_str_off = sl_str - sub_top->str_idx;
+ /* The matched string by the sub expression match with the substring
+ at the back reference? */
+ if (sl_str_off > 0)
+ {
+ if (BE (bkref_str_off >= mctx->input.valid_len, 0))
+ {
+ /* If we are at the end of the input, we cannot match. */
+ if (bkref_str_off >= mctx->input.len)
+ break;
+
+ err = extend_buffers (mctx);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+
+ buf = (const char *) re_string_get_buffer (&mctx->input);
+ }
+ if (buf [bkref_str_off++] != buf[sl_str - 1])
+ break; /* We don't need to search this sub expression
+ any more. */
+ }
+ if (mctx->state_log[sl_str] == NULL)
+ continue;
+ /* Does this state have a ')' of the sub expression? */
+ nodes = &mctx->state_log[sl_str]->nodes;
+ cls_node = find_subexp_node (dfa, nodes, subexp_num,
+ OP_CLOSE_SUBEXP);
+ if (cls_node == -1)
+ continue; /* No. */
+ if (sub_top->path == NULL)
+ {
+ sub_top->path = calloc (sizeof (state_array_t),
+ sl_str - sub_top->str_idx + 1);
+ if (sub_top->path == NULL)
+ return REG_ESPACE;
+ }
+ /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
+ in the current context? */
+ err = check_arrival (mctx, sub_top->path, sub_top->node,
+ sub_top->str_idx, cls_node, sl_str,
+ OP_CLOSE_SUBEXP);
+ if (err == REG_NOMATCH)
+ continue;
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
+ if (BE (sub_last == NULL, 0))
+ return REG_ESPACE;
+ err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
+ bkref_str_idx);
+ if (err == REG_NOMATCH)
+ continue;
+ }
+ }
+ return REG_NOERROR;
+}
+
+/* Helper functions for get_subexp(). */
+
+/* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
+ If it can arrive, register the sub expression expressed with SUB_TOP
+ and SUB_LAST. */
+
+static reg_errcode_t
+internal_function
+get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
+ re_sub_match_last_t *sub_last, int bkref_node, int bkref_str)
+{
+ reg_errcode_t err;
+ int to_idx;
+ /* Can the subexpression arrive the back reference? */
+ err = check_arrival (mctx, &sub_last->path, sub_last->node,
+ sub_last->str_idx, bkref_node, bkref_str,
+ OP_OPEN_SUBEXP);
+ if (err != REG_NOERROR)
+ return err;
+ err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
+ sub_last->str_idx);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
+ return clean_state_log_if_needed (mctx, to_idx);
+}
+
+/* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
+ Search '(' if FL_OPEN, or search ')' otherwise.
+ TODO: This function isn't efficient...
+ Because there might be more than one nodes whose types are
+ OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
+ nodes.
+ E.g. RE: (a){2} */
+
+static int
+internal_function
+find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
+ int subexp_idx, int type)
+{
+ int cls_idx;
+ for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
+ {
+ int cls_node = nodes->elems[cls_idx];
+ const re_token_t *node = dfa->nodes + cls_node;
+ if (node->type == type
+ && node->opr.idx == subexp_idx)
+ return cls_node;
+ }
+ return -1;
+}
+
+/* Check whether the node TOP_NODE at TOP_STR can arrive to the node
+ LAST_NODE at LAST_STR. We record the path onto PATH since it will be
+ heavily reused.
+ Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */
+
+static reg_errcode_t
+internal_function
+check_arrival (re_match_context_t *mctx, state_array_t *path, int top_node,
+ int top_str, int last_node, int last_str, int type)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ reg_errcode_t err = REG_NOERROR;
+ int subexp_num, backup_cur_idx, str_idx, null_cnt;
+ re_dfastate_t *cur_state = NULL;
+ re_node_set *cur_nodes, next_nodes;
+ re_dfastate_t **backup_state_log;
+ unsigned int context;
+
+ subexp_num = dfa->nodes[top_node].opr.idx;
+ /* Extend the buffer if we need. */
+ if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
+ {
+ re_dfastate_t **new_array;
+ int old_alloc = path->alloc;
+ path->alloc += last_str + mctx->max_mb_elem_len + 1;
+ new_array = re_realloc (path->array, re_dfastate_t *, path->alloc);
+ if (BE (new_array == NULL, 0))
+ {
+ path->alloc = old_alloc;
+ return REG_ESPACE;
+ }
+ path->array = new_array;
+ memset (new_array + old_alloc, '\0',
+ sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
+ }
+
+ str_idx = path->next_idx ? 0 : top_str;
+
+ /* Temporary modify MCTX. */
+ backup_state_log = mctx->state_log;
+ backup_cur_idx = mctx->input.cur_idx;
+ mctx->state_log = path->array;
+ mctx->input.cur_idx = str_idx;
+
+ /* Setup initial node set. */
+ context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
+ if (str_idx == top_str)
+ {
+ err = re_node_set_init_1 (&next_nodes, top_node);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&next_nodes);
+ return err;
+ }
+ }
+ else
+ {
+ cur_state = mctx->state_log[str_idx];
+ if (cur_state && cur_state->has_backref)
+ {
+ err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ else
+ re_node_set_init_empty (&next_nodes);
+ }
+ if (str_idx == top_str || (cur_state && cur_state->has_backref))
+ {
+ if (next_nodes.nelem)
+ {
+ err = expand_bkref_cache (mctx, &next_nodes, str_idx,
+ subexp_num, type);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&next_nodes);
+ return err;
+ }
+ }
+ cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
+ if (BE (cur_state == NULL && err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&next_nodes);
+ return err;
+ }
+ mctx->state_log[str_idx] = cur_state;
+ }
+
+ for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
+ {
+ re_node_set_empty (&next_nodes);
+ if (mctx->state_log[str_idx + 1])
+ {
+ err = re_node_set_merge (&next_nodes,
+ &mctx->state_log[str_idx + 1]->nodes);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&next_nodes);
+ return err;
+ }
+ }
+ if (cur_state)
+ {
+ err = check_arrival_add_next_nodes (mctx, str_idx,
+ &cur_state->non_eps_nodes,
+ &next_nodes);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&next_nodes);
+ return err;
+ }
+ }
+ ++str_idx;
+ if (next_nodes.nelem)
+ {
+ err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&next_nodes);
+ return err;
+ }
+ err = expand_bkref_cache (mctx, &next_nodes, str_idx,
+ subexp_num, type);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&next_nodes);
+ return err;
+ }
+ }
+ context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
+ cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
+ if (BE (cur_state == NULL && err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&next_nodes);
+ return err;
+ }
+ mctx->state_log[str_idx] = cur_state;
+ null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
+ }
+ re_node_set_free (&next_nodes);
+ cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
+ : &mctx->state_log[last_str]->nodes);
+ path->next_idx = str_idx;
+
+ /* Fix MCTX. */
+ mctx->state_log = backup_state_log;
+ mctx->input.cur_idx = backup_cur_idx;
+
+ /* Then check the current node set has the node LAST_NODE. */
+ if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
+ return REG_NOERROR;
+
+ return REG_NOMATCH;
+}
+
+/* Helper functions for check_arrival. */
+
+/* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
+ to NEXT_NODES.
+ TODO: This function is similar to the functions transit_state*(),
+ however this function has many additional works.
+ Can't we unify them? */
+
+static reg_errcode_t
+internal_function
+check_arrival_add_next_nodes (re_match_context_t *mctx, int str_idx,
+ re_node_set *cur_nodes, re_node_set *next_nodes)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ int result;
+ int cur_idx;
+ reg_errcode_t err = REG_NOERROR;
+ re_node_set union_set;
+ re_node_set_init_empty (&union_set);
+ for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
+ {
+ int naccepted = 0;
+ int cur_node = cur_nodes->elems[cur_idx];
+#ifdef DEBUG
+ re_token_type_t type = dfa->nodes[cur_node].type;
+ assert (!IS_EPSILON_NODE (type));
+#endif
+#ifdef RE_ENABLE_I18N
+ /* If the node may accept `multi byte'. */
+ if (dfa->nodes[cur_node].accept_mb)
+ {
+ naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
+ str_idx);
+ if (naccepted > 1)
+ {
+ re_dfastate_t *dest_state;
+ int next_node = dfa->nexts[cur_node];
+ int next_idx = str_idx + naccepted;
+ dest_state = mctx->state_log[next_idx];
+ re_node_set_empty (&union_set);
+ if (dest_state)
+ {
+ err = re_node_set_merge (&union_set, &dest_state->nodes);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&union_set);
+ return err;
+ }
+ }
+ result = re_node_set_insert (&union_set, next_node);
+ if (BE (result < 0, 0))
+ {
+ re_node_set_free (&union_set);
+ return REG_ESPACE;
+ }
+ mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
+ &union_set);
+ if (BE (mctx->state_log[next_idx] == NULL
+ && err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&union_set);
+ return err;
+ }
+ }
+ }
+#endif /* RE_ENABLE_I18N */
+ if (naccepted
+ || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
+ {
+ result = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
+ if (BE (result < 0, 0))
+ {
+ re_node_set_free (&union_set);
+ return REG_ESPACE;
+ }
+ }
+ }
+ re_node_set_free (&union_set);
+ return REG_NOERROR;
+}
+
+/* For all the nodes in CUR_NODES, add the epsilon closures of them to
+ CUR_NODES, however exclude the nodes which are:
+ - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
+ - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
+*/
+
+static reg_errcode_t
+internal_function
+check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
+ int ex_subexp, int type)
+{
+ reg_errcode_t err;
+ int idx, outside_node;
+ re_node_set new_nodes;
+#ifdef DEBUG
+ assert (cur_nodes->nelem);
+#endif
+ err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ /* Create a new node set NEW_NODES with the nodes which are epsilon
+ closures of the node in CUR_NODES. */
+
+ for (idx = 0; idx < cur_nodes->nelem; ++idx)
+ {
+ int cur_node = cur_nodes->elems[idx];
+ const re_node_set *eclosure = dfa->eclosures + cur_node;
+ outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
+ if (outside_node == -1)
+ {
+ /* There are no problematic nodes, just merge them. */
+ err = re_node_set_merge (&new_nodes, eclosure);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&new_nodes);
+ return err;
+ }
+ }
+ else
+ {
+ /* There are problematic nodes, re-calculate incrementally. */
+ err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
+ ex_subexp, type);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&new_nodes);
+ return err;
+ }
+ }
+ }
+ re_node_set_free (cur_nodes);
+ *cur_nodes = new_nodes;
+ return REG_NOERROR;
+}
+
+/* Helper function for check_arrival_expand_ecl.
+ Check incrementally the epsilon closure of TARGET, and if it isn't
+ problematic append it to DST_NODES. */
+
+static reg_errcode_t
+internal_function
+check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
+ int target, int ex_subexp, int type)
+{
+ int cur_node;
+ for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
+ {
+ int err;
+
+ if (dfa->nodes[cur_node].type == type
+ && dfa->nodes[cur_node].opr.idx == ex_subexp)
+ {
+ if (type == OP_CLOSE_SUBEXP)
+ {
+ err = re_node_set_insert (dst_nodes, cur_node);
+ if (BE (err == -1, 0))
+ return REG_ESPACE;
+ }
+ break;
+ }
+ err = re_node_set_insert (dst_nodes, cur_node);
+ if (BE (err == -1, 0))
+ return REG_ESPACE;
+ if (dfa->edests[cur_node].nelem == 0)
+ break;
+ if (dfa->edests[cur_node].nelem == 2)
+ {
+ err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
+ dfa->edests[cur_node].elems[1],
+ ex_subexp, type);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ cur_node = dfa->edests[cur_node].elems[0];
+ }
+ return REG_NOERROR;
+}
+
+
+/* For all the back references in the current state, calculate the
+ destination of the back references by the appropriate entry
+ in MCTX->BKREF_ENTS. */
+
+static reg_errcode_t
+internal_function
+expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
+ int cur_str, int subexp_num, int type)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ reg_errcode_t err;
+ int cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
+ struct re_backref_cache_entry *ent;
+
+ if (cache_idx_start == -1)
+ return REG_NOERROR;
+
+ restart:
+ ent = mctx->bkref_ents + cache_idx_start;
+ do
+ {
+ int to_idx, next_node;
+
+ /* Is this entry ENT is appropriate? */
+ if (!re_node_set_contains (cur_nodes, ent->node))
+ continue; /* No. */
+
+ to_idx = cur_str + ent->subexp_to - ent->subexp_from;
+ /* Calculate the destination of the back reference, and append it
+ to MCTX->STATE_LOG. */
+ if (to_idx == cur_str)
+ {
+ /* The backreference did epsilon transit, we must re-check all the
+ node in the current state. */
+ re_node_set new_dests;
+ reg_errcode_t err2, err3;
+ next_node = dfa->edests[ent->node].elems[0];
+ if (re_node_set_contains (cur_nodes, next_node))
+ continue;
+ err = re_node_set_init_1 (&new_dests, next_node);
+ err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
+ err3 = re_node_set_merge (cur_nodes, &new_dests);
+ re_node_set_free (&new_dests);
+ if (BE (err != REG_NOERROR || err2 != REG_NOERROR
+ || err3 != REG_NOERROR, 0))
+ {
+ err = (err != REG_NOERROR ? err
+ : (err2 != REG_NOERROR ? err2 : err3));
+ return err;
+ }
+ /* TODO: It is still inefficient... */
+ goto restart;
+ }
+ else
+ {
+ re_node_set union_set;
+ next_node = dfa->nexts[ent->node];
+ if (mctx->state_log[to_idx])
+ {
+ int ret;
+ if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
+ next_node))
+ continue;
+ err = re_node_set_init_copy (&union_set,
+ &mctx->state_log[to_idx]->nodes);
+ ret = re_node_set_insert (&union_set, next_node);
+ if (BE (err != REG_NOERROR || ret < 0, 0))
+ {
+ re_node_set_free (&union_set);
+ err = err != REG_NOERROR ? err : REG_ESPACE;
+ return err;
+ }
+ }
+ else
+ {
+ err = re_node_set_init_1 (&union_set, next_node);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
+ re_node_set_free (&union_set);
+ if (BE (mctx->state_log[to_idx] == NULL
+ && err != REG_NOERROR, 0))
+ return err;
+ }
+ }
+ while (ent++->more);
+ return REG_NOERROR;
+}
+
+/* Build transition table for the state.
+ Return 1 if succeeded, otherwise return NULL. */
+
+static int
+internal_function
+build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
+{
+ reg_errcode_t err;
+ int i, j, ch, need_word_trtable = 0;
+ bitset_word_t elem, mask;
+ bool dests_node_malloced = false;
+ bool dest_states_malloced = false;
+ int ndests; /* Number of the destination states from `state'. */
+ re_dfastate_t **trtable;
+ re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
+ re_node_set follows, *dests_node;
+ bitset_t *dests_ch;
+ bitset_t acceptable;
+
+ struct dests_alloc
+ {
+ re_node_set dests_node[SBC_MAX];
+ bitset_t dests_ch[SBC_MAX];
+ } *dests_alloc;
+
+ /* We build DFA states which corresponds to the destination nodes
+ from `state'. `dests_node[i]' represents the nodes which i-th
+ destination state contains, and `dests_ch[i]' represents the
+ characters which i-th destination state accepts. */
+ if (__libc_use_alloca (sizeof (struct dests_alloc)))
+ dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
+ else
+ {
+ dests_alloc = re_malloc (struct dests_alloc, 1);
+ if (BE (dests_alloc == NULL, 0))
+ return 0;
+ dests_node_malloced = true;
+ }
+ dests_node = dests_alloc->dests_node;
+ dests_ch = dests_alloc->dests_ch;
+
+ /* Initialize transiton table. */
+ state->word_trtable = state->trtable = NULL;
+
+ /* At first, group all nodes belonging to `state' into several
+ destinations. */
+ ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
+ if (BE (ndests <= 0, 0))
+ {
+ if (dests_node_malloced)
+ free (dests_alloc);
+ /* Return 0 in case of an error, 1 otherwise. */
+ if (ndests == 0)
+ {
+ state->trtable = (re_dfastate_t **)
+ calloc (sizeof (re_dfastate_t *), SBC_MAX);
+ return 1;
+ }
+ return 0;
+ }
+
+ err = re_node_set_alloc (&follows, ndests + 1);
+ if (BE (err != REG_NOERROR, 0))
+ goto out_free;
+
+ if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
+ + ndests * 3 * sizeof (re_dfastate_t *)))
+ dest_states = (re_dfastate_t **)
+ alloca (ndests * 3 * sizeof (re_dfastate_t *));
+ else
+ {
+ dest_states = (re_dfastate_t **)
+ malloc (ndests * 3 * sizeof (re_dfastate_t *));
+ if (BE (dest_states == NULL, 0))
+ {
+out_free:
+ if (dest_states_malloced)
+ free (dest_states);
+ re_node_set_free (&follows);
+ for (i = 0; i < ndests; ++i)
+ re_node_set_free (dests_node + i);
+ if (dests_node_malloced)
+ free (dests_alloc);
+ return 0;
+ }
+ dest_states_malloced = true;
+ }
+ dest_states_word = dest_states + ndests;
+ dest_states_nl = dest_states_word + ndests;
+ bitset_empty (acceptable);
+
+ /* Then build the states for all destinations. */
+ for (i = 0; i < ndests; ++i)
+ {
+ int next_node;
+ re_node_set_empty (&follows);
+ /* Merge the follows of this destination states. */
+ for (j = 0; j < dests_node[i].nelem; ++j)
+ {
+ next_node = dfa->nexts[dests_node[i].elems[j]];
+ if (next_node != -1)
+ {
+ err = re_node_set_merge (&follows, dfa->eclosures + next_node);
+ if (BE (err != REG_NOERROR, 0))
+ goto out_free;
+ }
+ }
+ dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
+ if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
+ goto out_free;
+ /* If the new state has context constraint,
+ build appropriate states for these contexts. */
+ if (dest_states[i]->has_constraint)
+ {
+ dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
+ CONTEXT_WORD);
+ if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
+ goto out_free;
+
+ if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
+ need_word_trtable = 1;
+
+ dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
+ CONTEXT_NEWLINE);
+ if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
+ goto out_free;
+ }
+ else
+ {
+ dest_states_word[i] = dest_states[i];
+ dest_states_nl[i] = dest_states[i];
+ }
+ bitset_merge (acceptable, dests_ch[i]);
+ }
+
+ if (!BE (need_word_trtable, 0))
+ {
+ /* We don't care about whether the following character is a word
+ character, or we are in a single-byte character set so we can
+ discern by looking at the character code: allocate a
+ 256-entry transition table. */
+ trtable = state->trtable =
+ (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
+ if (BE (trtable == NULL, 0))
+ goto out_free;
+
+ /* For all characters ch...: */
+ for (i = 0; i < BITSET_WORDS; ++i)
+ for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
+ elem;
+ mask <<= 1, elem >>= 1, ++ch)
+ if (BE (elem & 1, 0))
+ {
+ /* There must be exactly one destination which accepts
+ character ch. See group_nodes_into_DFAstates. */
+ for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
+ ;
+
+ /* j-th destination accepts the word character ch. */
+ if (dfa->word_char[i] & mask)
+ trtable[ch] = dest_states_word[j];
+ else
+ trtable[ch] = dest_states[j];
+ }
+ }
+ else
+ {
+ /* We care about whether the following character is a word
+ character, and we are in a multi-byte character set: discern
+ by looking at the character code: build two 256-entry
+ transition tables, one starting at trtable[0] and one
+ starting at trtable[SBC_MAX]. */
+ trtable = state->word_trtable =
+ (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
+ if (BE (trtable == NULL, 0))
+ goto out_free;
+
+ /* For all characters ch...: */
+ for (i = 0; i < BITSET_WORDS; ++i)
+ for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
+ elem;
+ mask <<= 1, elem >>= 1, ++ch)
+ if (BE (elem & 1, 0))
+ {
+ /* There must be exactly one destination which accepts
+ character ch. See group_nodes_into_DFAstates. */
+ for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
+ ;
+
+ /* j-th destination accepts the word character ch. */
+ trtable[ch] = dest_states[j];
+ trtable[ch + SBC_MAX] = dest_states_word[j];
+ }
+ }
+
+ /* new line */
+ if (bitset_contain (acceptable, NEWLINE_CHAR))
+ {
+ /* The current state accepts newline character. */
+ for (j = 0; j < ndests; ++j)
+ if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
+ {
+ /* k-th destination accepts newline character. */
+ trtable[NEWLINE_CHAR] = dest_states_nl[j];
+ if (need_word_trtable)
+ trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
+ /* There must be only one destination which accepts
+ newline. See group_nodes_into_DFAstates. */
+ break;
+ }
+ }
+
+ if (dest_states_malloced)
+ free (dest_states);
+
+ re_node_set_free (&follows);
+ for (i = 0; i < ndests; ++i)
+ re_node_set_free (dests_node + i);
+
+ if (dests_node_malloced)
+ free (dests_alloc);
+
+ return 1;
+}
+
+/* Group all nodes belonging to STATE into several destinations.
+ Then for all destinations, set the nodes belonging to the destination
+ to DESTS_NODE[i] and set the characters accepted by the destination
+ to DEST_CH[i]. This function return the number of destinations. */
+
+static int
+internal_function
+group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
+ re_node_set *dests_node, bitset_t *dests_ch)
+{
+ reg_errcode_t err;
+ int result;
+ int i, j, k;
+ int ndests; /* Number of the destinations from `state'. */
+ bitset_t accepts; /* Characters a node can accept. */
+ const re_node_set *cur_nodes = &state->nodes;
+ bitset_empty (accepts);
+ ndests = 0;
+
+ /* For all the nodes belonging to `state', */
+ for (i = 0; i < cur_nodes->nelem; ++i)
+ {
+ re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
+ re_token_type_t type = node->type;
+ unsigned int constraint = node->constraint;
+
+ /* Enumerate all single byte character this node can accept. */
+ if (type == CHARACTER)
+ bitset_set (accepts, node->opr.c);
+ else if (type == SIMPLE_BRACKET)
+ {
+ bitset_merge (accepts, node->opr.sbcset);
+ }
+ else if (type == OP_PERIOD)
+ {
+#ifdef RE_ENABLE_I18N
+ if (dfa->mb_cur_max > 1)
+ bitset_merge (accepts, dfa->sb_char);
+ else
+#endif
+ bitset_set_all (accepts);
+ if (!(dfa->syntax & RE_DOT_NEWLINE))
+ bitset_clear (accepts, '\n');
+ if (dfa->syntax & RE_DOT_NOT_NULL)
+ bitset_clear (accepts, '\0');
+ }
+#ifdef RE_ENABLE_I18N
+ else if (type == OP_UTF8_PERIOD)
+ {
+ memset (accepts, '\xff', sizeof (bitset_t) / 2);
+ if (!(dfa->syntax & RE_DOT_NEWLINE))
+ bitset_clear (accepts, '\n');
+ if (dfa->syntax & RE_DOT_NOT_NULL)
+ bitset_clear (accepts, '\0');
+ }
+#endif
+ else
+ continue;
+
+ /* Check the `accepts' and sift the characters which are not
+ match it the context. */
+ if (constraint)
+ {
+ if (constraint & NEXT_NEWLINE_CONSTRAINT)
+ {
+ bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
+ bitset_empty (accepts);
+ if (accepts_newline)
+ bitset_set (accepts, NEWLINE_CHAR);
+ else
+ continue;
+ }
+ if (constraint & NEXT_ENDBUF_CONSTRAINT)
+ {
+ bitset_empty (accepts);
+ continue;
+ }
+
+ if (constraint & NEXT_WORD_CONSTRAINT)
+ {
+ bitset_word_t any_set = 0;
+ if (type == CHARACTER && !node->word_char)
+ {
+ bitset_empty (accepts);
+ continue;
+ }
+#ifdef RE_ENABLE_I18N
+ if (dfa->mb_cur_max > 1)
+ for (j = 0; j < BITSET_WORDS; ++j)
+ any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
+ else
+#endif
+ for (j = 0; j < BITSET_WORDS; ++j)
+ any_set |= (accepts[j] &= dfa->word_char[j]);
+ if (!any_set)
+ continue;
+ }
+ if (constraint & NEXT_NOTWORD_CONSTRAINT)
+ {
+ bitset_word_t any_set = 0;
+ if (type == CHARACTER && node->word_char)
+ {
+ bitset_empty (accepts);
+ continue;
+ }
+#ifdef RE_ENABLE_I18N
+ if (dfa->mb_cur_max > 1)
+ for (j = 0; j < BITSET_WORDS; ++j)
+ any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
+ else
+#endif
+ for (j = 0; j < BITSET_WORDS; ++j)
+ any_set |= (accepts[j] &= ~dfa->word_char[j]);
+ if (!any_set)
+ continue;
+ }
+ }
+
+ /* Then divide `accepts' into DFA states, or create a new
+ state. Above, we make sure that accepts is not empty. */
+ for (j = 0; j < ndests; ++j)
+ {
+ bitset_t intersec; /* Intersection sets, see below. */
+ bitset_t remains;
+ /* Flags, see below. */
+ bitset_word_t has_intersec, not_subset, not_consumed;
+
+ /* Optimization, skip if this state doesn't accept the character. */
+ if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
+ continue;
+
+ /* Enumerate the intersection set of this state and `accepts'. */
+ has_intersec = 0;
+ for (k = 0; k < BITSET_WORDS; ++k)
+ has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
+ /* And skip if the intersection set is empty. */
+ if (!has_intersec)
+ continue;
+
+ /* Then check if this state is a subset of `accepts'. */
+ not_subset = not_consumed = 0;
+ for (k = 0; k < BITSET_WORDS; ++k)
+ {
+ not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
+ not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
+ }
+
+ /* If this state isn't a subset of `accepts', create a
+ new group state, which has the `remains'. */
+ if (not_subset)
+ {
+ bitset_copy (dests_ch[ndests], remains);
+ bitset_copy (dests_ch[j], intersec);
+ err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
+ if (BE (err != REG_NOERROR, 0))
+ goto error_return;
+ ++ndests;
+ }
+
+ /* Put the position in the current group. */
+ result = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
+ if (BE (result < 0, 0))
+ goto error_return;
+
+ /* If all characters are consumed, go to next node. */
+ if (!not_consumed)
+ break;
+ }
+ /* Some characters remain, create a new group. */
+ if (j == ndests)
+ {
+ bitset_copy (dests_ch[ndests], accepts);
+ err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
+ if (BE (err != REG_NOERROR, 0))
+ goto error_return;
+ ++ndests;
+ bitset_empty (accepts);
+ }
+ }
+ return ndests;
+ error_return:
+ for (j = 0; j < ndests; ++j)
+ re_node_set_free (dests_node + j);
+ return -1;
+}
+
+#ifdef RE_ENABLE_I18N
+/* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
+ Return the number of the bytes the node accepts.
+ STR_IDX is the current index of the input string.
+
+ This function handles the nodes which can accept one character, or
+ one collating element like '.', '[a-z]', opposite to the other nodes
+ can only accept one byte. */
+
+static int
+internal_function
+check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
+ const re_string_t *input, int str_idx)
+{
+ const re_token_t *node = dfa->nodes + node_idx;
+ int char_len, elem_len;
+ int i;
+
+ if (BE (node->type == OP_UTF8_PERIOD, 0))
+ {
+ unsigned char c = re_string_byte_at (input, str_idx), d;
+ if (BE (c < 0xc2, 1))
+ return 0;
+
+ if (str_idx + 2 > input->len)
+ return 0;
+
+ d = re_string_byte_at (input, str_idx + 1);
+ if (c < 0xe0)
+ return (d < 0x80 || d > 0xbf) ? 0 : 2;
+ else if (c < 0xf0)
+ {
+ char_len = 3;
+ if (c == 0xe0 && d < 0xa0)
+ return 0;
+ }
+ else if (c < 0xf8)
+ {
+ char_len = 4;
+ if (c == 0xf0 && d < 0x90)
+ return 0;
+ }
+ else if (c < 0xfc)
+ {
+ char_len = 5;
+ if (c == 0xf8 && d < 0x88)
+ return 0;
+ }
+ else if (c < 0xfe)
+ {
+ char_len = 6;
+ if (c == 0xfc && d < 0x84)
+ return 0;
+ }
+ else
+ return 0;
+
+ if (str_idx + char_len > input->len)
+ return 0;
+
+ for (i = 1; i < char_len; ++i)
+ {
+ d = re_string_byte_at (input, str_idx + i);
+ if (d < 0x80 || d > 0xbf)
+ return 0;
+ }
+ return char_len;
+ }
+
+ char_len = re_string_char_size_at (input, str_idx);
+ if (node->type == OP_PERIOD)
+ {
+ if (char_len <= 1)
+ return 0;
+ /* FIXME: I don't think this if is needed, as both '\n'
+ and '\0' are char_len == 1. */
+ /* '.' accepts any one character except the following two cases. */
+ if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
+ re_string_byte_at (input, str_idx) == '\n') ||
+ ((dfa->syntax & RE_DOT_NOT_NULL) &&
+ re_string_byte_at (input, str_idx) == '\0'))
+ return 0;
+ return char_len;
+ }
+
+ elem_len = re_string_elem_size_at (input, str_idx);
+ if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
+ return 0;
+
+ if (node->type == COMPLEX_BRACKET)
+ {
+ const re_charset_t *cset = node->opr.mbcset;
+# ifdef _LIBC
+ const unsigned char *pin
+ = ((const unsigned char *) re_string_get_buffer (input) + str_idx);
+ int j;
+ uint32_t nrules;
+# endif /* _LIBC */
+ int match_len = 0;
+ wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
+ ? re_string_wchar_at (input, str_idx) : 0);
+
+ /* match with multibyte character? */
+ for (i = 0; i < cset->nmbchars; ++i)
+ if (wc == cset->mbchars[i])
+ {
+ match_len = char_len;
+ goto check_node_accept_bytes_match;
+ }
+ /* match with character_class? */
+ for (i = 0; i < cset->nchar_classes; ++i)
+ {
+ wctype_t wt = cset->char_classes[i];
+ if (__iswctype (wc, wt))
+ {
+ match_len = char_len;
+ goto check_node_accept_bytes_match;
+ }
+ }
+
+# ifdef _LIBC
+ nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+ if (nrules != 0)
+ {
+ unsigned int in_collseq = 0;
+ const int32_t *table, *indirect;
+ const unsigned char *weights, *extra;
+ const char *collseqwc;
+ /* This #include defines a local function! */
+# include <locale/weight.h>
+
+ /* match with collating_symbol? */
+ if (cset->ncoll_syms)
+ extra = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
+ for (i = 0; i < cset->ncoll_syms; ++i)
+ {
+ const unsigned char *coll_sym = extra + cset->coll_syms[i];
+ /* Compare the length of input collating element and
+ the length of current collating element. */
+ if (*coll_sym != elem_len)
+ continue;
+ /* Compare each bytes. */
+ for (j = 0; j < *coll_sym; j++)
+ if (pin[j] != coll_sym[1 + j])
+ break;
+ if (j == *coll_sym)
+ {
+ /* Match if every bytes is equal. */
+ match_len = j;
+ goto check_node_accept_bytes_match;
+ }
+ }
+
+ if (cset->nranges)
+ {
+ if (elem_len <= char_len)
+ {
+ collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
+ in_collseq = __collseq_table_lookup (collseqwc, wc);
+ }
+ else
+ in_collseq = find_collation_sequence_value (pin, elem_len);
+ }
+ /* match with range expression? */
+ for (i = 0; i < cset->nranges; ++i)
+ if (cset->range_starts[i] <= in_collseq
+ && in_collseq <= cset->range_ends[i])
+ {
+ match_len = elem_len;
+ goto check_node_accept_bytes_match;
+ }
+
+ /* match with equivalence_class? */
+ if (cset->nequiv_classes)
+ {
+ const unsigned char *cp = pin;
+ table = (const int32_t *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
+ weights = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
+ extra = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
+ indirect = (const int32_t *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
+ int32_t idx = findidx (&cp);
+ if (idx > 0)
+ for (i = 0; i < cset->nequiv_classes; ++i)
+ {
+ int32_t equiv_class_idx = cset->equiv_classes[i];
+ size_t weight_len = weights[idx & 0xffffff];
+ if (weight_len == weights[equiv_class_idx & 0xffffff]
+ && (idx >> 24) == (equiv_class_idx >> 24))
+ {
+ int cnt = 0;
+
+ idx &= 0xffffff;
+ equiv_class_idx &= 0xffffff;
+
+ while (cnt <= weight_len
+ && (weights[equiv_class_idx + 1 + cnt]
+ == weights[idx + 1 + cnt]))
+ ++cnt;
+ if (cnt > weight_len)
+ {
+ match_len = elem_len;
+ goto check_node_accept_bytes_match;
+ }
+ }
+ }
+ }
+ }
+ else
+# endif /* _LIBC */
+ {
+ /* match with range expression? */
+#if __GNUC__ >= 2
+ wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
+#else
+ wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
+ cmp_buf[2] = wc;
+#endif
+ for (i = 0; i < cset->nranges; ++i)
+ {
+ cmp_buf[0] = cset->range_starts[i];
+ cmp_buf[4] = cset->range_ends[i];
+ if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
+ && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
+ {
+ match_len = char_len;
+ goto check_node_accept_bytes_match;
+ }
+ }
+ }
+ check_node_accept_bytes_match:
+ if (!cset->non_match)
+ return match_len;
+ else
+ {
+ if (match_len > 0)
+ return 0;
+ else
+ return (elem_len > char_len) ? elem_len : char_len;
+ }
+ }
+ return 0;
+}
+
+# ifdef _LIBC
+static unsigned int
+internal_function
+find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
+{
+ uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+ if (nrules == 0)
+ {
+ if (mbs_len == 1)
+ {
+ /* No valid character. Match it as a single byte character. */
+ const unsigned char *collseq = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
+ return collseq[mbs[0]];
+ }
+ return UINT_MAX;
+ }
+ else
+ {
+ int32_t idx;
+ const unsigned char *extra = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
+ int32_t extrasize = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
+
+ for (idx = 0; idx < extrasize;)
+ {
+ int mbs_cnt, found = 0;
+ int32_t elem_mbs_len;
+ /* Skip the name of collating element name. */
+ idx = idx + extra[idx] + 1;
+ elem_mbs_len = extra[idx++];
+ if (mbs_len == elem_mbs_len)
+ {
+ for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
+ if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
+ break;
+ if (mbs_cnt == elem_mbs_len)
+ /* Found the entry. */
+ found = 1;
+ }
+ /* Skip the byte sequence of the collating element. */
+ idx += elem_mbs_len;
+ /* Adjust for the alignment. */
+ idx = (idx + 3) & ~3;
+ /* Skip the collation sequence value. */
+ idx += sizeof (uint32_t);
+ /* Skip the wide char sequence of the collating element. */
+ idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
+ /* If we found the entry, return the sequence value. */
+ if (found)
+ return *(uint32_t *) (extra + idx);
+ /* Skip the collation sequence value. */
+ idx += sizeof (uint32_t);
+ }
+ return UINT_MAX;
+ }
+}
+# endif /* _LIBC */
+#endif /* RE_ENABLE_I18N */
+
+/* Check whether the node accepts the byte which is IDX-th
+ byte of the INPUT. */
+
+static int
+internal_function
+check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
+ int idx)
+{
+ unsigned char ch;
+ ch = re_string_byte_at (&mctx->input, idx);
+ switch (node->type)
+ {
+ case CHARACTER:
+ if (node->opr.c != ch)
+ return 0;
+ break;
+
+ case SIMPLE_BRACKET:
+ if (!bitset_contain (node->opr.sbcset, ch))
+ return 0;
+ break;
+
+#ifdef RE_ENABLE_I18N
+ case OP_UTF8_PERIOD:
+ if (ch >= 0x80)
+ return 0;
+ /* FALLTHROUGH */
+#endif
+ case OP_PERIOD:
+ if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
+ || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
+ return 0;
+ break;
+
+ default:
+ return 0;
+ }
+
+ if (node->constraint)
+ {
+ /* The node has constraints. Check whether the current context
+ satisfies the constraints. */
+ unsigned int context = re_string_context_at (&mctx->input, idx,
+ mctx->eflags);
+ if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
+ return 0;
+ }
+
+ return 1;
+}
+
+/* Extend the buffers, if the buffers have run out. */
+
+static reg_errcode_t
+internal_function
+extend_buffers (re_match_context_t *mctx)
+{
+ reg_errcode_t ret;
+ re_string_t *pstr = &mctx->input;
+
+ /* Double the lengthes of the buffers. */
+ ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+
+ if (mctx->state_log != NULL)
+ {
+ /* And double the length of state_log. */
+ /* XXX We have no indication of the size of this buffer. If this
+ allocation fail we have no indication that the state_log array
+ does not have the right size. */
+ re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
+ pstr->bufs_len + 1);
+ if (BE (new_array == NULL, 0))
+ return REG_ESPACE;
+ mctx->state_log = new_array;
+ }
+
+ /* Then reconstruct the buffers. */
+ if (pstr->icase)
+ {
+#ifdef RE_ENABLE_I18N
+ if (pstr->mb_cur_max > 1)
+ {
+ ret = build_wcs_upper_buffer (pstr);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+ }
+ else
+#endif /* RE_ENABLE_I18N */
+ build_upper_buffer (pstr);
+ }
+ else
+ {
+#ifdef RE_ENABLE_I18N
+ if (pstr->mb_cur_max > 1)
+ build_wcs_buffer (pstr);
+ else
+#endif /* RE_ENABLE_I18N */
+ {
+ if (pstr->trans != NULL)
+ re_string_translate_buffer (pstr);
+ }
+ }
+ return REG_NOERROR;
+}
+
+
+/* Functions for matching context. */
+
+/* Initialize MCTX. */
+
+static reg_errcode_t
+internal_function
+match_ctx_init (re_match_context_t *mctx, int eflags, int n)
+{
+ mctx->eflags = eflags;
+ mctx->match_last = -1;
+ if (n > 0)
+ {
+ mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
+ mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
+ if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
+ return REG_ESPACE;
+ }
+ /* Already zero-ed by the caller.
+ else
+ mctx->bkref_ents = NULL;
+ mctx->nbkref_ents = 0;
+ mctx->nsub_tops = 0; */
+ mctx->abkref_ents = n;
+ mctx->max_mb_elem_len = 1;
+ mctx->asub_tops = n;
+ return REG_NOERROR;
+}
+
+/* Clean the entries which depend on the current input in MCTX.
+ This function must be invoked when the matcher changes the start index
+ of the input, or changes the input string. */
+
+static void
+internal_function
+match_ctx_clean (re_match_context_t *mctx)
+{
+ int st_idx;
+ for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
+ {
+ int sl_idx;
+ re_sub_match_top_t *top = mctx->sub_tops[st_idx];
+ for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
+ {
+ re_sub_match_last_t *last = top->lasts[sl_idx];
+ re_free (last->path.array);
+ re_free (last);
+ }
+ re_free (top->lasts);
+ if (top->path)
+ {
+ re_free (top->path->array);
+ re_free (top->path);
+ }
+ free (top);
+ }
+
+ mctx->nsub_tops = 0;
+ mctx->nbkref_ents = 0;
+}
+
+/* Free all the memory associated with MCTX. */
+
+static void
+internal_function
+match_ctx_free (re_match_context_t *mctx)
+{
+ /* First, free all the memory associated with MCTX->SUB_TOPS. */
+ match_ctx_clean (mctx);
+ re_free (mctx->sub_tops);
+ re_free (mctx->bkref_ents);
+}
+
+/* Add a new backreference entry to MCTX.
+ Note that we assume that caller never call this function with duplicate
+ entry, and call with STR_IDX which isn't smaller than any existing entry.
+*/
+
+static reg_errcode_t
+internal_function
+match_ctx_add_entry (re_match_context_t *mctx, int node, int str_idx, int from,
+ int to)
+{
+ if (mctx->nbkref_ents >= mctx->abkref_ents)
+ {
+ struct re_backref_cache_entry* new_entry;
+ new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
+ mctx->abkref_ents * 2);
+ if (BE (new_entry == NULL, 0))
+ {
+ re_free (mctx->bkref_ents);
+ return REG_ESPACE;
+ }
+ mctx->bkref_ents = new_entry;
+ memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
+ sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
+ mctx->abkref_ents *= 2;
+ }
+ if (mctx->nbkref_ents > 0
+ && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
+ mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
+
+ mctx->bkref_ents[mctx->nbkref_ents].node = node;
+ mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
+ mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
+ mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
+
+ /* This is a cache that saves negative results of check_dst_limits_calc_pos.
+ If bit N is clear, means that this entry won't epsilon-transition to
+ an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression. If
+ it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
+ such node.
+
+ A backreference does not epsilon-transition unless it is empty, so set
+ to all zeros if FROM != TO. */
+ mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
+ = (from == to ? ~0 : 0);
+
+ mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
+ if (mctx->max_mb_elem_len < to - from)
+ mctx->max_mb_elem_len = to - from;
+ return REG_NOERROR;
+}
+
+/* Search for the first entry which has the same str_idx, or -1 if none is
+ found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */
+
+static int
+internal_function
+search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)
+{
+ int left, right, mid, last;
+ last = right = mctx->nbkref_ents;
+ for (left = 0; left < right;)
+ {
+ mid = (left + right) / 2;
+ if (mctx->bkref_ents[mid].str_idx < str_idx)
+ left = mid + 1;
+ else
+ right = mid;
+ }
+ if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
+ return left;
+ else
+ return -1;
+}
+
+/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
+ at STR_IDX. */
+
+static reg_errcode_t
+internal_function
+match_ctx_add_subtop (re_match_context_t *mctx, int node, int str_idx)
+{
+#ifdef DEBUG
+ assert (mctx->sub_tops != NULL);
+ assert (mctx->asub_tops > 0);
+#endif
+ if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
+ {
+ int new_asub_tops = mctx->asub_tops * 2;
+ re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
+ re_sub_match_top_t *,
+ new_asub_tops);
+ if (BE (new_array == NULL, 0))
+ return REG_ESPACE;
+ mctx->sub_tops = new_array;
+ mctx->asub_tops = new_asub_tops;
+ }
+ mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
+ if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
+ return REG_ESPACE;
+ mctx->sub_tops[mctx->nsub_tops]->node = node;
+ mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
+ return REG_NOERROR;
+}
+
+/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
+ at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */
+
+static re_sub_match_last_t *
+internal_function
+match_ctx_add_sublast (re_sub_match_top_t *subtop, int node, int str_idx)
+{
+ re_sub_match_last_t *new_entry;
+ if (BE (subtop->nlasts == subtop->alasts, 0))
+ {
+ int new_alasts = 2 * subtop->alasts + 1;
+ re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
+ re_sub_match_last_t *,
+ new_alasts);
+ if (BE (new_array == NULL, 0))
+ return NULL;
+ subtop->lasts = new_array;
+ subtop->alasts = new_alasts;
+ }
+ new_entry = calloc (1, sizeof (re_sub_match_last_t));
+ if (BE (new_entry != NULL, 1))
+ {
+ subtop->lasts[subtop->nlasts] = new_entry;
+ new_entry->node = node;
+ new_entry->str_idx = str_idx;
+ ++subtop->nlasts;
+ }
+ return new_entry;
+}
+
+static void
+internal_function
+sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
+ re_dfastate_t **limited_sts, int last_node, int last_str_idx)
+{
+ sctx->sifted_states = sifted_sts;
+ sctx->limited_states = limited_sts;
+ sctx->last_node = last_node;
+ sctx->last_str_idx = last_str_idx;
+ re_node_set_init_empty (&sctx->limits);
+}
diff --git a/gnu_regex/README.txt b/gnu_regex/README.txt
new file mode 100644
index 0000000..8fccbea
--- /dev/null
+++ b/gnu_regex/README.txt
@@ -0,0 +1,5 @@
+These source files were taken from the GNU glibc-2.10.1 package.
+
+ ftp://ftp.gnu.org/gnu/glibc/glibc-2.10.1.tar.bz2
+
+Minor changes were made to eliminate compiler errors and warnings.
diff --git a/gnu_regex/regcomp.c b/gnu_regex/regcomp.c
new file mode 100644
index 0000000..1f3daf2
--- /dev/null
+++ b/gnu_regex/regcomp.c
@@ -0,0 +1,3818 @@
+/* Extended regular expression matching and search library.
+ Copyright (C) 2002,2003,2004,2005,2006,2007,2009
+ Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern,
+ size_t length, reg_syntax_t syntax);
+static void re_compile_fastmap_iter (regex_t *bufp,
+ const re_dfastate_t *init_state,
+ char *fastmap);
+static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len);
+#ifdef RE_ENABLE_I18N
+static void free_charset (re_charset_t *cset);
+#endif /* RE_ENABLE_I18N */
+static void free_workarea_compile (regex_t *preg);
+static reg_errcode_t create_initial_state (re_dfa_t *dfa);
+#ifdef RE_ENABLE_I18N
+static void optimize_utf8 (re_dfa_t *dfa);
+#endif
+static reg_errcode_t analyze (regex_t *preg);
+static reg_errcode_t preorder (bin_tree_t *root,
+ reg_errcode_t (fn (void *, bin_tree_t *)),
+ void *extra);
+static reg_errcode_t postorder (bin_tree_t *root,
+ reg_errcode_t (fn (void *, bin_tree_t *)),
+ void *extra);
+static reg_errcode_t optimize_subexps (void *extra, bin_tree_t *node);
+static reg_errcode_t lower_subexps (void *extra, bin_tree_t *node);
+static bin_tree_t *lower_subexp (reg_errcode_t *err, regex_t *preg,
+ bin_tree_t *node);
+static reg_errcode_t calc_first (void *extra, bin_tree_t *node);
+static reg_errcode_t calc_next (void *extra, bin_tree_t *node);
+static reg_errcode_t link_nfa_nodes (void *extra, bin_tree_t *node);
+static int duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint);
+static int search_duplicated_node (const re_dfa_t *dfa, int org_node,
+ unsigned int constraint);
+static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
+static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa,
+ int node, int root);
+static reg_errcode_t calc_inveclosure (re_dfa_t *dfa);
+static int fetch_number (re_string_t *input, re_token_t *token,
+ reg_syntax_t syntax);
+static int peek_token (re_token_t *token, re_string_t *input,
+ reg_syntax_t syntax) internal_function;
+static bin_tree_t *parse (re_string_t *regexp, regex_t *preg,
+ reg_syntax_t syntax, reg_errcode_t *err);
+static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg,
+ re_token_t *token, reg_syntax_t syntax,
+ int nest, reg_errcode_t *err);
+static bin_tree_t *parse_branch (re_string_t *regexp, regex_t *preg,
+ re_token_t *token, reg_syntax_t syntax,
+ int nest, reg_errcode_t *err);
+static bin_tree_t *parse_expression (re_string_t *regexp, regex_t *preg,
+ re_token_t *token, reg_syntax_t syntax,
+ int nest, reg_errcode_t *err);
+static bin_tree_t *parse_sub_exp (re_string_t *regexp, regex_t *preg,
+ re_token_t *token, reg_syntax_t syntax,
+ int nest, reg_errcode_t *err);
+static bin_tree_t *parse_dup_op (bin_tree_t *dup_elem, re_string_t *regexp,
+ re_dfa_t *dfa, re_token_t *token,
+ reg_syntax_t syntax, reg_errcode_t *err);
+static bin_tree_t *parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa,
+ re_token_t *token, reg_syntax_t syntax,
+ reg_errcode_t *err);
+static reg_errcode_t parse_bracket_element (bracket_elem_t *elem,
+ re_string_t *regexp,
+ re_token_t *token, int token_len,
+ re_dfa_t *dfa,
+ reg_syntax_t syntax,
+ int accept_hyphen);
+static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem,
+ re_string_t *regexp,
+ re_token_t *token);
+#ifdef RE_ENABLE_I18N
+static reg_errcode_t build_equiv_class (bitset_t sbcset,
+ re_charset_t *mbcset,
+ int *equiv_class_alloc,
+ const unsigned char *name);
+static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
+ bitset_t sbcset,
+ re_charset_t *mbcset,
+ int *char_class_alloc,
+ const unsigned char *class_name,
+ reg_syntax_t syntax);
+#else /* not RE_ENABLE_I18N */
+static reg_errcode_t build_equiv_class (bitset_t sbcset,
+ const unsigned char *name);
+static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
+ bitset_t sbcset,
+ const unsigned char *class_name,
+ reg_syntax_t syntax);
+#endif /* not RE_ENABLE_I18N */
+static bin_tree_t *build_charclass_op (re_dfa_t *dfa,
+ RE_TRANSLATE_TYPE trans,
+ const unsigned char *class_name,
+ const unsigned char *extra,
+ int non_match, reg_errcode_t *err);
+static bin_tree_t *create_tree (re_dfa_t *dfa,
+ bin_tree_t *left, bin_tree_t *right,
+ re_token_type_t type);
+static bin_tree_t *create_token_tree (re_dfa_t *dfa,
+ bin_tree_t *left, bin_tree_t *right,
+ const re_token_t *token);
+static bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa);
+static void free_token (re_token_t *node);
+static reg_errcode_t free_tree (void *extra, bin_tree_t *node);
+static reg_errcode_t mark_opt_subexp (void *extra, bin_tree_t *node);
+
+/* This table gives an error message for each of the error codes listed
+ in regex.h. Obviously the order here has to be same as there.
+ POSIX doesn't require that we do anything for REG_NOERROR,
+ but why not be nice? */
+
+const char __re_error_msgid[] attribute_hidden =
+ {
+#define REG_NOERROR_IDX 0
+ gettext_noop ("Success") /* REG_NOERROR */
+ "\0"
+#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
+ gettext_noop ("No match") /* REG_NOMATCH */
+ "\0"
+#define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match")
+ gettext_noop ("Invalid regular expression") /* REG_BADPAT */
+ "\0"
+#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
+ gettext_noop ("Invalid collation character") /* REG_ECOLLATE */
+ "\0"
+#define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character")
+ gettext_noop ("Invalid character class name") /* REG_ECTYPE */
+ "\0"
+#define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name")
+ gettext_noop ("Trailing backslash") /* REG_EESCAPE */
+ "\0"
+#define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash")
+ gettext_noop ("Invalid back reference") /* REG_ESUBREG */
+ "\0"
+#define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference")
+ gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */
+ "\0"
+#define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
+ gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
+ "\0"
+#define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
+ gettext_noop ("Unmatched \\{") /* REG_EBRACE */
+ "\0"
+#define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{")
+ gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
+ "\0"
+#define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
+ gettext_noop ("Invalid range end") /* REG_ERANGE */
+ "\0"
+#define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end")
+ gettext_noop ("Memory exhausted") /* REG_ESPACE */
+ "\0"
+#define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted")
+ gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
+ "\0"
+#define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
+ gettext_noop ("Premature end of regular expression") /* REG_EEND */
+ "\0"
+#define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression")
+ gettext_noop ("Regular expression too big") /* REG_ESIZE */
+ "\0"
+#define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big")
+ gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
+ };
+
+const size_t __re_error_msgid_idx[] attribute_hidden =
+ {
+ REG_NOERROR_IDX,
+ REG_NOMATCH_IDX,
+ REG_BADPAT_IDX,
+ REG_ECOLLATE_IDX,
+ REG_ECTYPE_IDX,
+ REG_EESCAPE_IDX,
+ REG_ESUBREG_IDX,
+ REG_EBRACK_IDX,
+ REG_EPAREN_IDX,
+ REG_EBRACE_IDX,
+ REG_BADBR_IDX,
+ REG_ERANGE_IDX,
+ REG_ESPACE_IDX,
+ REG_BADRPT_IDX,
+ REG_EEND_IDX,
+ REG_ESIZE_IDX,
+ REG_ERPAREN_IDX
+ };
+
+/* Entry points for GNU code. */
+
+/* re_compile_pattern is the GNU regular expression compiler: it
+ compiles PATTERN (of length LENGTH) and puts the result in BUFP.
+ Returns 0 if the pattern was valid, otherwise an error string.
+
+ Assumes the `allocated' (and perhaps `buffer') and `translate' fields
+ are set in BUFP on entry. */
+
+const char *
+re_compile_pattern (pattern, length, bufp)
+ const char *pattern;
+ size_t length;
+ struct re_pattern_buffer *bufp;
+{
+ reg_errcode_t ret;
+
+ /* And GNU code determines whether or not to get register information
+ by passing null for the REGS argument to re_match, etc., not by
+ setting no_sub, unless RE_NO_SUB is set. */
+ bufp->no_sub = !!(re_syntax_options & RE_NO_SUB);
+
+ /* Match anchors at newline. */
+ bufp->newline_anchor = 1;
+
+ ret = re_compile_internal (bufp, pattern, length, re_syntax_options);
+
+ if (!ret)
+ return NULL;
+ return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
+}
+#ifdef _LIBC
+weak_alias (__re_compile_pattern, re_compile_pattern)
+#endif
+
+/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
+ also be assigned to arbitrarily: each pattern buffer stores its own
+ syntax, so it can be changed between regex compilations. */
+/* This has no initializer because initialized variables in Emacs
+ become read-only after dumping. */
+reg_syntax_t re_syntax_options;
+
+
+/* Specify the precise syntax of regexps for compilation. This provides
+ for compatibility for various utilities which historically have
+ different, incompatible syntaxes.
+
+ The argument SYNTAX is a bit mask comprised of the various bits
+ defined in regex.h. We return the old syntax. */
+
+reg_syntax_t
+re_set_syntax (syntax)
+ reg_syntax_t syntax;
+{
+ reg_syntax_t ret = re_syntax_options;
+
+ re_syntax_options = syntax;
+ return ret;
+}
+#ifdef _LIBC
+weak_alias (__re_set_syntax, re_set_syntax)
+#endif
+
+int
+re_compile_fastmap (bufp)
+ struct re_pattern_buffer *bufp;
+{
+ re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
+ char *fastmap = bufp->fastmap;
+
+ memset (fastmap, '\0', sizeof (char) * SBC_MAX);
+ re_compile_fastmap_iter (bufp, dfa->init_state, fastmap);
+ if (dfa->init_state != dfa->init_state_word)
+ re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap);
+ if (dfa->init_state != dfa->init_state_nl)
+ re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap);
+ if (dfa->init_state != dfa->init_state_begbuf)
+ re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap);
+ bufp->fastmap_accurate = 1;
+ return 0;
+}
+#ifdef _LIBC
+weak_alias (__re_compile_fastmap, re_compile_fastmap)
+#endif
+
+static inline void
+__attribute ((always_inline))
+re_set_fastmap (char *fastmap, int icase, int ch)
+{
+ fastmap[ch] = 1;
+ if (icase)
+ fastmap[tolower (ch)] = 1;
+}
+
+/* Helper function for re_compile_fastmap.
+ Compile fastmap for the initial_state INIT_STATE. */
+
+static void
+re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
+ char *fastmap)
+{
+ re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
+ int node_cnt;
+ int icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE));
+ for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt)
+ {
+ int node = init_state->nodes.elems[node_cnt];
+ re_token_type_t type = dfa->nodes[node].type;
+
+ if (type == CHARACTER)
+ {
+ re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c);
+#ifdef RE_ENABLE_I18N
+ if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
+ {
+ unsigned char *buf = alloca (dfa->mb_cur_max), *p;
+ wchar_t wc;
+ mbstate_t state;
+
+ p = buf;
+ *p++ = dfa->nodes[node].opr.c;
+ while (++node < dfa->nodes_len
+ && dfa->nodes[node].type == CHARACTER
+ && dfa->nodes[node].mb_partial)
+ *p++ = dfa->nodes[node].opr.c;
+ memset (&state, '\0', sizeof (state));
+ if (__mbrtowc (&wc, (const char *) buf, p - buf,
+ &state) == p - buf
+ && (__wcrtomb ((char *) buf, towlower (wc), &state)
+ != (size_t) -1))
+ re_set_fastmap (fastmap, 0, buf[0]);
+ }
+#endif
+ }
+ else if (type == SIMPLE_BRACKET)
+ {
+ int i, ch;
+ for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
+ {
+ int j;
+ bitset_word_t w = dfa->nodes[node].opr.sbcset[i];
+ for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
+ if (w & ((bitset_word_t) 1 << j))
+ re_set_fastmap (fastmap, icase, ch);
+ }
+ }
+#ifdef RE_ENABLE_I18N
+ else if (type == COMPLEX_BRACKET)
+ {
+ re_charset_t *cset = dfa->nodes[node].opr.mbcset;
+ int i;
+
+# ifdef _LIBC
+ /* See if we have to try all bytes which start multiple collation
+ elements.
+ e.g. In da_DK, we want to catch 'a' since "aa" is a valid
+ collation element, and don't catch 'b' since 'b' is
+ the only collation element which starts from 'b' (and
+ it is caught by SIMPLE_BRACKET). */
+ if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0
+ && (cset->ncoll_syms || cset->nranges))
+ {
+ const int32_t *table = (const int32_t *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
+ for (i = 0; i < SBC_MAX; ++i)
+ if (table[i] < 0)
+ re_set_fastmap (fastmap, icase, i);
+ }
+# endif /* _LIBC */
+
+ /* See if we have to start the match at all multibyte characters,
+ i.e. where we would not find an invalid sequence. This only
+ applies to multibyte character sets; for single byte character
+ sets, the SIMPLE_BRACKET again suffices. */
+ if (dfa->mb_cur_max > 1
+ && (cset->nchar_classes || cset->non_match
+# ifdef _LIBC
+ || cset->nequiv_classes
+# endif /* _LIBC */
+ ))
+ {
+ unsigned char c = 0;
+ do
+ {
+ mbstate_t mbs;
+ memset (&mbs, 0, sizeof (mbs));
+ if (__mbrtowc (NULL, (char *) &c, 1, &mbs) == (size_t) -2)
+ re_set_fastmap (fastmap, false, (int) c);
+ }
+ while (++c != 0);
+ }
+
+ else
+ {
+ /* ... Else catch all bytes which can start the mbchars. */
+ for (i = 0; i < cset->nmbchars; ++i)
+ {
+ char buf[256];
+ mbstate_t state;
+ memset (&state, '\0', sizeof (state));
+ if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1)
+ re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
+ if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
+ {
+ if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state)
+ != (size_t) -1)
+ re_set_fastmap (fastmap, false, *(unsigned char *) buf);
+ }
+ }
+ }
+ }
+#endif /* RE_ENABLE_I18N */
+ else if (type == OP_PERIOD
+#ifdef RE_ENABLE_I18N
+ || type == OP_UTF8_PERIOD
+#endif /* RE_ENABLE_I18N */
+ || type == END_OF_RE)
+ {
+ memset (fastmap, '\1', sizeof (char) * SBC_MAX);
+ if (type == END_OF_RE)
+ bufp->can_be_null = 1;
+ return;
+ }
+ }
+}
+
+/* Entry point for POSIX code. */
+/* regcomp takes a regular expression as a string and compiles it.
+
+ PREG is a regex_t *. We do not expect any fields to be initialized,
+ since POSIX says we shouldn't. Thus, we set
+
+ `buffer' to the compiled pattern;
+ `used' to the length of the compiled pattern;
+ `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
+ REG_EXTENDED bit in CFLAGS is set; otherwise, to
+ RE_SYNTAX_POSIX_BASIC;
+ `newline_anchor' to REG_NEWLINE being set in CFLAGS;
+ `fastmap' to an allocated space for the fastmap;
+ `fastmap_accurate' to zero;
+ `re_nsub' to the number of subexpressions in PATTERN.
+
+ PATTERN is the address of the pattern string.
+
+ CFLAGS is a series of bits which affect compilation.
+
+ If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
+ use POSIX basic syntax.
+
+ If REG_NEWLINE is set, then . and [^...] don't match newline.
+ Also, regexec will try a match beginning after every newline.
+
+ If REG_ICASE is set, then we considers upper- and lowercase
+ versions of letters to be equivalent when matching.
+
+ If REG_NOSUB is set, then when PREG is passed to regexec, that
+ routine will report only success or failure, and nothing about the
+ registers.
+
+ It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
+ the return codes and their meanings.) */
+
+int
+regcomp (preg, pattern, cflags)
+ regex_t *__restrict preg;
+ const char *__restrict pattern;
+ int cflags;
+{
+ reg_errcode_t ret;
+ reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED
+ : RE_SYNTAX_POSIX_BASIC);
+
+ preg->buffer = NULL;
+ preg->allocated = 0;
+ preg->used = 0;
+
+ /* Try to allocate space for the fastmap. */
+ preg->fastmap = re_malloc (char, SBC_MAX);
+ if (BE (preg->fastmap == NULL, 0))
+ return REG_ESPACE;
+
+ syntax |= (cflags & REG_ICASE) ? RE_ICASE : 0;
+
+ /* If REG_NEWLINE is set, newlines are treated differently. */
+ if (cflags & REG_NEWLINE)
+ { /* REG_NEWLINE implies neither . nor [^...] match newline. */
+ syntax &= ~RE_DOT_NEWLINE;
+ syntax |= RE_HAT_LISTS_NOT_NEWLINE;
+ /* It also changes the matching behavior. */
+ preg->newline_anchor = 1;
+ }
+ else
+ preg->newline_anchor = 0;
+ preg->no_sub = !!(cflags & REG_NOSUB);
+ preg->translate = NULL;
+
+ ret = re_compile_internal (preg, pattern, strlen (pattern), syntax);
+
+ /* POSIX doesn't distinguish between an unmatched open-group and an
+ unmatched close-group: both are REG_EPAREN. */
+ if (ret == REG_ERPAREN)
+ ret = REG_EPAREN;
+
+ /* We have already checked preg->fastmap != NULL. */
+ if (BE (ret == REG_NOERROR, 1))
+ /* Compute the fastmap now, since regexec cannot modify the pattern
+ buffer. This function never fails in this implementation. */
+ (void) re_compile_fastmap (preg);
+ else
+ {
+ /* Some error occurred while compiling the expression. */
+ re_free (preg->fastmap);
+ preg->fastmap = NULL;
+ }
+
+ return (int) ret;
+}
+#ifdef _LIBC
+weak_alias (__regcomp, regcomp)
+#endif
+
+/* Returns a message corresponding to an error code, ERRCODE, returned
+ from either regcomp or regexec. We don't use PREG here. */
+
+size_t
+regerror (
+ int errcode,
+ const regex_t *__restrict preg,
+ char *__restrict errbuf,
+ size_t errbuf_size)
+{
+ const char *msg;
+ size_t msg_size;
+
+ if (BE (errcode < 0
+ || errcode >= (int) (sizeof (__re_error_msgid_idx)
+ / sizeof (__re_error_msgid_idx[0])), 0))
+ /* Only error codes returned by the rest of the code should be passed
+ to this routine. If we are given anything else, or if other regex
+ code generates an invalid error code, then the program has a bug.
+ Dump core so we can fix it. */
+ abort ();
+
+ msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]);
+
+ msg_size = strlen (msg) + 1; /* Includes the null. */
+
+ if (BE (errbuf_size != 0, 1))
+ {
+ if (BE (msg_size > errbuf_size, 0))
+ {
+#if defined HAVE_MEMPCPY || defined _LIBC
+ *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
+#else
+ memcpy (errbuf, msg, errbuf_size - 1);
+ errbuf[errbuf_size - 1] = 0;
+#endif
+ }
+ else
+ memcpy (errbuf, msg, msg_size);
+ }
+
+ return msg_size;
+}
+#ifdef _LIBC
+weak_alias (__regerror, regerror)
+#endif
+
+
+#ifdef RE_ENABLE_I18N
+/* This static array is used for the map to single-byte characters when
+ UTF-8 is used. Otherwise we would allocate memory just to initialize
+ it the same all the time. UTF-8 is the preferred encoding so this is
+ a worthwhile optimization. */
+static const bitset_t utf8_sb_map =
+{
+ /* Set the first 128 bits. */
+ [0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX
+};
+#endif
+
+
+static void
+free_dfa_content (re_dfa_t *dfa)
+{
+ int i, j;
+
+ if (dfa->nodes)
+ for (i = 0; i < dfa->nodes_len; ++i)
+ free_token (dfa->nodes + i);
+ re_free (dfa->nexts);
+ for (i = 0; i < dfa->nodes_len; ++i)
+ {
+ if (dfa->eclosures != NULL)
+ re_node_set_free (dfa->eclosures + i);
+ if (dfa->inveclosures != NULL)
+ re_node_set_free (dfa->inveclosures + i);
+ if (dfa->edests != NULL)
+ re_node_set_free (dfa->edests + i);
+ }
+ re_free (dfa->edests);
+ re_free (dfa->eclosures);
+ re_free (dfa->inveclosures);
+ re_free (dfa->nodes);
+
+ if (dfa->state_table)
+ for (i = 0; i <= dfa->state_hash_mask; ++i)
+ {
+ struct re_state_table_entry *entry = dfa->state_table + i;
+ for (j = 0; j < entry->num; ++j)
+ {
+ re_dfastate_t *state = entry->array[j];
+ free_state (state);
+ }
+ re_free (entry->array);
+ }
+ re_free (dfa->state_table);
+#ifdef RE_ENABLE_I18N
+ if (dfa->sb_char != utf8_sb_map)
+ re_free (dfa->sb_char);
+#endif
+ re_free (dfa->subexp_map);
+#ifdef DEBUG
+ re_free (dfa->re_str);
+#endif
+
+ re_free (dfa);
+}
+
+
+/* Free dynamically allocated space used by PREG. */
+
+void
+regfree (preg)
+ regex_t *preg;
+{
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ if (BE (dfa != NULL, 1))
+ free_dfa_content (dfa);
+ preg->buffer = NULL;
+ preg->allocated = 0;
+
+ re_free (preg->fastmap);
+ preg->fastmap = NULL;
+
+ re_free (preg->translate);
+ preg->translate = NULL;
+}
+#ifdef _LIBC
+weak_alias (__regfree, regfree)
+#endif
+
+/* Entry points compatible with 4.2 BSD regex library. We don't define
+ them unless specifically requested. */
+
+#if defined _REGEX_RE_COMP || defined _LIBC
+
+/* BSD has one and only one pattern buffer. */
+static struct re_pattern_buffer re_comp_buf;
+
+char *
+# ifdef _LIBC
+/* Make these definitions weak in libc, so POSIX programs can redefine
+ these names if they don't use our functions, and still use
+ regcomp/regexec above without link errors. */
+weak_function
+# endif
+re_comp (s)
+ const char *s;
+{
+ reg_errcode_t ret;
+ char *fastmap;
+
+ if (!s)
+ {
+ if (!re_comp_buf.buffer)
+ return gettext ("No previous regular expression");
+ return 0;
+ }
+
+ if (re_comp_buf.buffer)
+ {
+ fastmap = re_comp_buf.fastmap;
+ re_comp_buf.fastmap = NULL;
+ __regfree (&re_comp_buf);
+ memset (&re_comp_buf, '\0', sizeof (re_comp_buf));
+ re_comp_buf.fastmap = fastmap;
+ }
+
+ if (re_comp_buf.fastmap == NULL)
+ {
+ re_comp_buf.fastmap = (char *) malloc (SBC_MAX);
+ if (re_comp_buf.fastmap == NULL)
+ return (char *) gettext (__re_error_msgid
+ + __re_error_msgid_idx[(int) REG_ESPACE]);
+ }
+
+ /* Since `re_exec' always passes NULL for the `regs' argument, we
+ don't need to initialize the pattern buffer fields which affect it. */
+
+ /* Match anchors at newlines. */
+ re_comp_buf.newline_anchor = 1;
+
+ ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options);
+
+ if (!ret)
+ return NULL;
+
+ /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
+ return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
+}
+
+#ifdef _LIBC
+libc_freeres_fn (free_mem)
+{
+ __regfree (&re_comp_buf);
+}
+#endif
+
+#endif /* _REGEX_RE_COMP */
+
+/* Internal entry point.
+ Compile the regular expression PATTERN, whose length is LENGTH.
+ SYNTAX indicate regular expression's syntax. */
+
+static reg_errcode_t
+re_compile_internal (regex_t *preg, const char * pattern, size_t length,
+ reg_syntax_t syntax)
+{
+ reg_errcode_t err = REG_NOERROR;
+ re_dfa_t *dfa;
+ re_string_t regexp;
+
+ /* Initialize the pattern buffer. */
+ preg->fastmap_accurate = 0;
+ preg->syntax = syntax;
+ preg->not_bol = preg->not_eol = 0;
+ preg->used = 0;
+ preg->re_nsub = 0;
+ preg->can_be_null = 0;
+ preg->regs_allocated = REGS_UNALLOCATED;
+
+ /* Initialize the dfa. */
+ dfa = (re_dfa_t *) preg->buffer;
+ if (BE (preg->allocated < sizeof (re_dfa_t), 0))
+ {
+ /* If zero allocated, but buffer is non-null, try to realloc
+ enough space. This loses if buffer's address is bogus, but
+ that is the user's responsibility. If ->buffer is NULL this
+ is a simple allocation. */
+ dfa = re_realloc (preg->buffer, re_dfa_t, 1);
+ if (dfa == NULL)
+ return REG_ESPACE;
+ preg->allocated = sizeof (re_dfa_t);
+ preg->buffer = (unsigned char *) dfa;
+ }
+ preg->used = sizeof (re_dfa_t);
+
+ err = init_dfa (dfa, length);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ free_dfa_content (dfa);
+ preg->buffer = NULL;
+ preg->allocated = 0;
+ return err;
+ }
+#ifdef DEBUG
+ /* Note: length+1 will not overflow since it is checked in init_dfa. */
+ dfa->re_str = re_malloc (char, length + 1);
+ strncpy (dfa->re_str, pattern, length + 1);
+#endif
+
+ __libc_lock_init (dfa->lock);
+
+ err = re_string_construct (&regexp, pattern, length, preg->translate,
+ syntax & RE_ICASE, dfa);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_compile_internal_free_return:
+ free_workarea_compile (preg);
+ re_string_destruct (&regexp);
+ free_dfa_content (dfa);
+ preg->buffer = NULL;
+ preg->allocated = 0;
+ return err;
+ }
+
+ /* Parse the regular expression, and build a structure tree. */
+ preg->re_nsub = 0;
+ dfa->str_tree = parse (&regexp, preg, syntax, &err);
+ if (BE (dfa->str_tree == NULL, 0))
+ goto re_compile_internal_free_return;
+
+ /* Analyze the tree and create the nfa. */
+ err = analyze (preg);
+ if (BE (err != REG_NOERROR, 0))
+ goto re_compile_internal_free_return;
+
+#ifdef RE_ENABLE_I18N
+ /* If possible, do searching in single byte encoding to speed things up. */
+ if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL)
+ optimize_utf8 (dfa);
+#endif
+
+ /* Then create the initial state of the dfa. */
+ err = create_initial_state (dfa);
+
+ /* Release work areas. */
+ free_workarea_compile (preg);
+ re_string_destruct (&regexp);
+
+ if (BE (err != REG_NOERROR, 0))
+ {
+ free_dfa_content (dfa);
+ preg->buffer = NULL;
+ preg->allocated = 0;
+ }
+
+ return err;
+}
+
+/* Initialize DFA. We use the length of the regular expression PAT_LEN
+ as the initial length of some arrays. */
+
+static reg_errcode_t
+init_dfa (re_dfa_t *dfa, size_t pat_len)
+{
+ unsigned int table_size;
+#ifndef _LIBC
+ char *codeset_name;
+#endif
+
+ memset (dfa, '\0', sizeof (re_dfa_t));
+
+ /* Force allocation of str_tree_storage the first time. */
+ dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
+
+ /* Avoid overflows. */
+ if (pat_len == SIZE_MAX)
+ return REG_ESPACE;
+
+ dfa->nodes_alloc = pat_len + 1;
+ dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc);
+
+ /* table_size = 2 ^ ceil(log pat_len) */
+ for (table_size = 1; ; table_size <<= 1)
+ if (table_size > pat_len)
+ break;
+
+ dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size);
+ dfa->state_hash_mask = table_size - 1;
+
+ dfa->mb_cur_max = MB_CUR_MAX;
+#ifdef _LIBC
+ if (dfa->mb_cur_max == 6
+ && strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0)
+ dfa->is_utf8 = 1;
+ dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII)
+ != 0);
+#else
+# ifdef HAVE_LANGINFO_CODESET
+ codeset_name = nl_langinfo (CODESET);
+# else
+ codeset_name = getenv ("LC_ALL");
+ if (codeset_name == NULL || codeset_name[0] == '\0')
+ codeset_name = getenv ("LC_CTYPE");
+ if (codeset_name == NULL || codeset_name[0] == '\0')
+ codeset_name = getenv ("LANG");
+ if (codeset_name == NULL)
+ codeset_name = "";
+ else if (strchr (codeset_name, '.') != NULL)
+ codeset_name = strchr (codeset_name, '.') + 1;
+# endif
+
+ if (strcasecmp (codeset_name, "UTF-8") == 0
+ || strcasecmp (codeset_name, "UTF8") == 0)
+ dfa->is_utf8 = 1;
+
+ /* We check exhaustively in the loop below if this charset is a
+ superset of ASCII. */
+ dfa->map_notascii = 0;
+#endif
+
+#ifdef RE_ENABLE_I18N
+ if (dfa->mb_cur_max > 1)
+ {
+ if (dfa->is_utf8)
+ dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map;
+ else
+ {
+ int i, j, ch;
+
+ dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
+ if (BE (dfa->sb_char == NULL, 0))
+ return REG_ESPACE;
+
+ /* Set the bits corresponding to single byte chars. */
+ for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
+ for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
+ {
+ wint_t wch = __btowc (ch);
+ if (wch != WEOF)
+ dfa->sb_char[i] |= (bitset_word_t) 1 << j;
+# ifndef _LIBC
+ if (isascii (ch) && wch != ch)
+ dfa->map_notascii = 1;
+# endif
+ }
+ }
+ }
+#endif
+
+ if (BE (dfa->nodes == NULL || dfa->state_table == NULL, 0))
+ return REG_ESPACE;
+ return REG_NOERROR;
+}
+
+/* Initialize WORD_CHAR table, which indicate which character is
+ "word". In this case "word" means that it is the word construction
+ character used by some operators like "\<", "\>", etc. */
+
+static void
+internal_function
+init_word_char (re_dfa_t *dfa)
+{
+ int i, j, ch;
+ dfa->word_ops_used = 1;
+ for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
+ for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
+ if (isalnum (ch) || ch == '_')
+ dfa->word_char[i] |= (bitset_word_t) 1 << j;
+}
+
+/* Free the work area which are only used while compiling. */
+
+static void
+free_workarea_compile (regex_t *preg)
+{
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ bin_tree_storage_t *storage, *next;
+ for (storage = dfa->str_tree_storage; storage; storage = next)
+ {
+ next = storage->next;
+ re_free (storage);
+ }
+ dfa->str_tree_storage = NULL;
+ dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
+ dfa->str_tree = NULL;
+ re_free (dfa->org_indices);
+ dfa->org_indices = NULL;
+}
+
+/* Create initial states for all contexts. */
+
+static reg_errcode_t
+create_initial_state (re_dfa_t *dfa)
+{
+ int first, i;
+ reg_errcode_t err;
+ re_node_set init_nodes;
+
+ /* Initial states have the epsilon closure of the node which is
+ the first node of the regular expression. */
+ first = dfa->str_tree->first->node_idx;
+ dfa->init_node = first;
+ err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+
+ /* The back-references which are in initial states can epsilon transit,
+ since in this case all of the subexpressions can be null.
+ Then we add epsilon closures of the nodes which are the next nodes of
+ the back-references. */
+ if (dfa->nbackref > 0)
+ for (i = 0; i < init_nodes.nelem; ++i)
+ {
+ int node_idx = init_nodes.elems[i];
+ re_token_type_t type = dfa->nodes[node_idx].type;
+
+ int clexp_idx;
+ if (type != OP_BACK_REF)
+ continue;
+ for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx)
+ {
+ re_token_t *clexp_node;
+ clexp_node = dfa->nodes + init_nodes.elems[clexp_idx];
+ if (clexp_node->type == OP_CLOSE_SUBEXP
+ && clexp_node->opr.idx == dfa->nodes[node_idx].opr.idx)
+ break;
+ }
+ if (clexp_idx == init_nodes.nelem)
+ continue;
+
+ if (type == OP_BACK_REF)
+ {
+ int dest_idx = dfa->edests[node_idx].elems[0];
+ if (!re_node_set_contains (&init_nodes, dest_idx))
+ {
+ re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx);
+ i = 0;
+ }
+ }
+ }
+
+ /* It must be the first time to invoke acquire_state. */
+ dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0);
+ /* We don't check ERR here, since the initial state must not be NULL. */
+ if (BE (dfa->init_state == NULL, 0))
+ return err;
+ if (dfa->init_state->has_constraint)
+ {
+ dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes,
+ CONTEXT_WORD);
+ dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes,
+ CONTEXT_NEWLINE);
+ dfa->init_state_begbuf = re_acquire_state_context (&err, dfa,
+ &init_nodes,
+ CONTEXT_NEWLINE
+ | CONTEXT_BEGBUF);
+ if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL
+ || dfa->init_state_begbuf == NULL, 0))
+ return err;
+ }
+ else
+ dfa->init_state_word = dfa->init_state_nl
+ = dfa->init_state_begbuf = dfa->init_state;
+
+ re_node_set_free (&init_nodes);
+ return REG_NOERROR;
+}
+
+#ifdef RE_ENABLE_I18N
+/* If it is possible to do searching in single byte encoding instead of UTF-8
+ to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change
+ DFA nodes where needed. */
+
+static void
+optimize_utf8 (re_dfa_t *dfa)
+{
+ int node, i, mb_chars = 0, has_period = 0;
+
+ for (node = 0; node < dfa->nodes_len; ++node)
+ switch (dfa->nodes[node].type)
+ {
+ case CHARACTER:
+ if (dfa->nodes[node].opr.c >= 0x80)
+ mb_chars = 1;
+ break;
+ case ANCHOR:
+ switch (dfa->nodes[node].opr.ctx_type)
+ {
+ case LINE_FIRST:
+ case LINE_LAST:
+ case BUF_FIRST:
+ case BUF_LAST:
+ break;
+ default:
+ /* Word anchors etc. cannot be handled. It's okay to test
+ opr.ctx_type since constraints (for all DFA nodes) are
+ created by ORing one or more opr.ctx_type values. */
+ return;
+ }
+ break;
+ case OP_PERIOD:
+ has_period = 1;
+ break;
+ case OP_BACK_REF:
+ case OP_ALT:
+ case END_OF_RE:
+ case OP_DUP_ASTERISK:
+ case OP_OPEN_SUBEXP:
+ case OP_CLOSE_SUBEXP:
+ break;
+ case COMPLEX_BRACKET:
+ return;
+ case SIMPLE_BRACKET:
+ /* Just double check. The non-ASCII range starts at 0x80. */
+ assert (0x80 % BITSET_WORD_BITS == 0);
+ for (i = 0x80 / BITSET_WORD_BITS; i < BITSET_WORDS; ++i)
+ if (dfa->nodes[node].opr.sbcset[i])
+ return;
+ break;
+ default:
+ abort ();
+ }
+
+ if (mb_chars || has_period)
+ for (node = 0; node < dfa->nodes_len; ++node)
+ {
+ if (dfa->nodes[node].type == CHARACTER
+ && dfa->nodes[node].opr.c >= 0x80)
+ dfa->nodes[node].mb_partial = 0;
+ else if (dfa->nodes[node].type == OP_PERIOD)
+ dfa->nodes[node].type = OP_UTF8_PERIOD;
+ }
+
+ /* The search can be in single byte locale. */
+ dfa->mb_cur_max = 1;
+ dfa->is_utf8 = 0;
+ dfa->has_mb_node = dfa->nbackref > 0 || has_period;
+}
+#endif
+
+/* Analyze the structure tree, and calculate "first", "next", "edest",
+ "eclosure", and "inveclosure". */
+
+static reg_errcode_t
+analyze (regex_t *preg)
+{
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ reg_errcode_t ret;
+
+ /* Allocate arrays. */
+ dfa->nexts = re_malloc (int, dfa->nodes_alloc);
+ dfa->org_indices = re_malloc (int, dfa->nodes_alloc);
+ dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc);
+ dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc);
+ if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL
+ || dfa->eclosures == NULL, 0))
+ return REG_ESPACE;
+
+ dfa->subexp_map = re_malloc (int, preg->re_nsub);
+ if (dfa->subexp_map != NULL)
+ {
+ int i;
+ for (i = 0; i < preg->re_nsub; i++)
+ dfa->subexp_map[i] = i;
+ preorder (dfa->str_tree, optimize_subexps, dfa);
+ for (i = 0; i < preg->re_nsub; i++)
+ if (dfa->subexp_map[i] != i)
+ break;
+ if (i == preg->re_nsub)
+ {
+ free (dfa->subexp_map);
+ dfa->subexp_map = NULL;
+ }
+ }
+
+ ret = postorder (dfa->str_tree, lower_subexps, preg);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+ ret = postorder (dfa->str_tree, calc_first, dfa);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+ preorder (dfa->str_tree, calc_next, dfa);
+ ret = preorder (dfa->str_tree, link_nfa_nodes, dfa);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+ ret = calc_eclosure (dfa);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+
+ /* We only need this during the prune_impossible_nodes pass in regexec.c;
+ skip it if p_i_n will not run, as calc_inveclosure can be quadratic. */
+ if ((!preg->no_sub && preg->re_nsub > 0 && dfa->has_plural_match)
+ || dfa->nbackref)
+ {
+ dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len);
+ if (BE (dfa->inveclosures == NULL, 0))
+ return REG_ESPACE;
+ ret = calc_inveclosure (dfa);
+ }
+
+ return ret;
+}
+
+/* Our parse trees are very unbalanced, so we cannot use a stack to
+ implement parse tree visits. Instead, we use parent pointers and
+ some hairy code in these two functions. */
+static reg_errcode_t
+postorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
+ void *extra)
+{
+ bin_tree_t *node, *prev;
+
+ for (node = root; ; )
+ {
+ /* Descend down the tree, preferably to the left (or to the right
+ if that's the only child). */
+ while (node->left || node->right)
+ if (node->left)
+ node = node->left;
+ else
+ node = node->right;
+
+ do
+ {
+ reg_errcode_t err = fn (extra, node);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ if (node->parent == NULL)
+ return REG_NOERROR;
+ prev = node;
+ node = node->parent;
+ }
+ /* Go up while we have a node that is reached from the right. */
+ while (node->right == prev || node->right == NULL);
+ node = node->right;
+ }
+}
+
+static reg_errcode_t
+preorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
+ void *extra)
+{
+ bin_tree_t *node;
+
+ for (node = root; ; )
+ {
+ reg_errcode_t err = fn (extra, node);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+
+ /* Go to the left node, or up and to the right. */
+ if (node->left)
+ node = node->left;
+ else
+ {
+ bin_tree_t *prev = NULL;
+ while (node->right == prev || node->right == NULL)
+ {
+ prev = node;
+ node = node->parent;
+ if (!node)
+ return REG_NOERROR;
+ }
+ node = node->right;
+ }
+ }
+}
+
+/* Optimization pass: if a SUBEXP is entirely contained, strip it and tell
+ re_search_internal to map the inner one's opr.idx to this one's. Adjust
+ backreferences as well. Requires a preorder visit. */
+static reg_errcode_t
+optimize_subexps (void *extra, bin_tree_t *node)
+{
+ re_dfa_t *dfa = (re_dfa_t *) extra;
+
+ if (node->token.type == OP_BACK_REF && dfa->subexp_map)
+ {
+ int idx = node->token.opr.idx;
+ node->token.opr.idx = dfa->subexp_map[idx];
+ dfa->used_bkref_map |= 1 << node->token.opr.idx;
+ }
+
+ else if (node->token.type == SUBEXP
+ && node->left && node->left->token.type == SUBEXP)
+ {
+ int other_idx = node->left->token.opr.idx;
+
+ node->left = node->left->left;
+ if (node->left)
+ node->left->parent = node;
+
+ dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx];
+ if (other_idx < BITSET_WORD_BITS)
+ dfa->used_bkref_map &= ~((bitset_word_t) 1 << other_idx);
+ }
+
+ return REG_NOERROR;
+}
+
+/* Lowering pass: Turn each SUBEXP node into the appropriate concatenation
+ of OP_OPEN_SUBEXP, the body of the SUBEXP (if any) and OP_CLOSE_SUBEXP. */
+static reg_errcode_t
+lower_subexps (void *extra, bin_tree_t *node)
+{
+ regex_t *preg = (regex_t *) extra;
+ reg_errcode_t err = REG_NOERROR;
+
+ if (node->left && node->left->token.type == SUBEXP)
+ {
+ node->left = lower_subexp (&err, preg, node->left);
+ if (node->left)
+ node->left->parent = node;
+ }
+ if (node->right && node->right->token.type == SUBEXP)
+ {
+ node->right = lower_subexp (&err, preg, node->right);
+ if (node->right)
+ node->right->parent = node;
+ }
+
+ return err;
+}
+
+static bin_tree_t *
+lower_subexp (reg_errcode_t *err, regex_t *preg, bin_tree_t *node)
+{
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ bin_tree_t *body = node->left;
+ bin_tree_t *op, *cls, *tree1, *tree;
+
+ if (preg->no_sub
+ /* We do not optimize empty subexpressions, because otherwise we may
+ have bad CONCAT nodes with NULL children. This is obviously not
+ very common, so we do not lose much. An example that triggers
+ this case is the sed "script" /\(\)/x. */
+ && node->left != NULL
+ && (node->token.opr.idx >= BITSET_WORD_BITS
+ || !(dfa->used_bkref_map
+ & ((bitset_word_t) 1 << node->token.opr.idx))))
+ return node->left;
+
+ /* Convert the SUBEXP node to the concatenation of an
+ OP_OPEN_SUBEXP, the contents, and an OP_CLOSE_SUBEXP. */
+ op = create_tree (dfa, NULL, NULL, OP_OPEN_SUBEXP);
+ cls = create_tree (dfa, NULL, NULL, OP_CLOSE_SUBEXP);
+ tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls;
+ tree = create_tree (dfa, op, tree1, CONCAT);
+ if (BE (tree == NULL || tree1 == NULL || op == NULL || cls == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+
+ op->token.opr.idx = cls->token.opr.idx = node->token.opr.idx;
+ op->token.opt_subexp = cls->token.opt_subexp = node->token.opt_subexp;
+ return tree;
+}
+
+/* Pass 1 in building the NFA: compute FIRST and create unlinked automaton
+ nodes. Requires a postorder visit. */
+static reg_errcode_t
+calc_first (void *extra, bin_tree_t *node)
+{
+ re_dfa_t *dfa = (re_dfa_t *) extra;
+ if (node->token.type == CONCAT)
+ {
+ node->first = node->left->first;
+ node->node_idx = node->left->node_idx;
+ }
+ else
+ {
+ node->first = node;
+ node->node_idx = re_dfa_add_node (dfa, node->token);
+ if (BE (node->node_idx == -1, 0))
+ return REG_ESPACE;
+ if (node->token.type == ANCHOR)
+ dfa->nodes[node->node_idx].constraint = node->token.opr.ctx_type;
+ }
+ return REG_NOERROR;
+}
+
+/* Pass 2: compute NEXT on the tree. Preorder visit. */
+static reg_errcode_t
+calc_next (void *extra, bin_tree_t *node)
+{
+ switch (node->token.type)
+ {
+ case OP_DUP_ASTERISK:
+ node->left->next = node;
+ break;
+ case CONCAT:
+ node->left->next = node->right->first;
+ node->right->next = node->next;
+ break;
+ default:
+ if (node->left)
+ node->left->next = node->next;
+ if (node->right)
+ node->right->next = node->next;
+ break;
+ }
+ return REG_NOERROR;
+}
+
+/* Pass 3: link all DFA nodes to their NEXT node (any order will do). */
+static reg_errcode_t
+link_nfa_nodes (void *extra, bin_tree_t *node)
+{
+ re_dfa_t *dfa = (re_dfa_t *) extra;
+ int idx = node->node_idx;
+ reg_errcode_t err = REG_NOERROR;
+
+ switch (node->token.type)
+ {
+ case CONCAT:
+ break;
+
+ case END_OF_RE:
+ assert (node->next == NULL);
+ break;
+
+ case OP_DUP_ASTERISK:
+ case OP_ALT:
+ {
+ int left, right;
+ dfa->has_plural_match = 1;
+ if (node->left != NULL)
+ left = node->left->first->node_idx;
+ else
+ left = node->next->node_idx;
+ if (node->right != NULL)
+ right = node->right->first->node_idx;
+ else
+ right = node->next->node_idx;
+ assert (left > -1);
+ assert (right > -1);
+ err = re_node_set_init_2 (dfa->edests + idx, left, right);
+ }
+ break;
+
+ case ANCHOR:
+ case OP_OPEN_SUBEXP:
+ case OP_CLOSE_SUBEXP:
+ err = re_node_set_init_1 (dfa->edests + idx, node->next->node_idx);
+ break;
+
+ case OP_BACK_REF:
+ dfa->nexts[idx] = node->next->node_idx;
+ if (node->token.type == OP_BACK_REF)
+ re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]);
+ break;
+
+ default:
+ assert (!IS_EPSILON_NODE (node->token.type));
+ dfa->nexts[idx] = node->next->node_idx;
+ break;
+ }
+
+ return err;
+}
+
+/* Duplicate the epsilon closure of the node ROOT_NODE.
+ Note that duplicated nodes have constraint INIT_CONSTRAINT in addition
+ to their own constraint. */
+
+static reg_errcode_t
+internal_function
+duplicate_node_closure (re_dfa_t *dfa, int top_org_node, int top_clone_node,
+ int root_node, unsigned int init_constraint)
+{
+ int org_node, clone_node, ret;
+ unsigned int constraint = init_constraint;
+ for (org_node = top_org_node, clone_node = top_clone_node;;)
+ {
+ int org_dest, clone_dest;
+ if (dfa->nodes[org_node].type == OP_BACK_REF)
+ {
+ /* If the back reference epsilon-transit, its destination must
+ also have the constraint. Then duplicate the epsilon closure
+ of the destination of the back reference, and store it in
+ edests of the back reference. */
+ org_dest = dfa->nexts[org_node];
+ re_node_set_empty (dfa->edests + clone_node);
+ clone_dest = duplicate_node (dfa, org_dest, constraint);
+ if (BE (clone_dest == -1, 0))
+ return REG_ESPACE;
+ dfa->nexts[clone_node] = dfa->nexts[org_node];
+ ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+ if (BE (ret < 0, 0))
+ return REG_ESPACE;
+ }
+ else if (dfa->edests[org_node].nelem == 0)
+ {
+ /* In case of the node can't epsilon-transit, don't duplicate the
+ destination and store the original destination as the
+ destination of the node. */
+ dfa->nexts[clone_node] = dfa->nexts[org_node];
+ break;
+ }
+ else if (dfa->edests[org_node].nelem == 1)
+ {
+ /* In case of the node can epsilon-transit, and it has only one
+ destination. */
+ org_dest = dfa->edests[org_node].elems[0];
+ re_node_set_empty (dfa->edests + clone_node);
+ /* If the node is root_node itself, it means the epsilon clsoure
+ has a loop. Then tie it to the destination of the root_node. */
+ if (org_node == root_node && clone_node != org_node)
+ {
+ ret = re_node_set_insert (dfa->edests + clone_node, org_dest);
+ if (BE (ret < 0, 0))
+ return REG_ESPACE;
+ break;
+ }
+ /* In case of the node has another constraint, add it. */
+ constraint |= dfa->nodes[org_node].constraint;
+ clone_dest = duplicate_node (dfa, org_dest, constraint);
+ if (BE (clone_dest == -1, 0))
+ return REG_ESPACE;
+ ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+ if (BE (ret < 0, 0))
+ return REG_ESPACE;
+ }
+ else /* dfa->edests[org_node].nelem == 2 */
+ {
+ /* In case of the node can epsilon-transit, and it has two
+ destinations. In the bin_tree_t and DFA, that's '|' and '*'. */
+ org_dest = dfa->edests[org_node].elems[0];
+ re_node_set_empty (dfa->edests + clone_node);
+ /* Search for a duplicated node which satisfies the constraint. */
+ clone_dest = search_duplicated_node (dfa, org_dest, constraint);
+ if (clone_dest == -1)
+ {
+ /* There is no such duplicated node, create a new one. */
+ reg_errcode_t err;
+ clone_dest = duplicate_node (dfa, org_dest, constraint);
+ if (BE (clone_dest == -1, 0))
+ return REG_ESPACE;
+ ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+ if (BE (ret < 0, 0))
+ return REG_ESPACE;
+ err = duplicate_node_closure (dfa, org_dest, clone_dest,
+ root_node, constraint);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ else
+ {
+ /* There is a duplicated node which satisfies the constraint,
+ use it to avoid infinite loop. */
+ ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+ if (BE (ret < 0, 0))
+ return REG_ESPACE;
+ }
+
+ org_dest = dfa->edests[org_node].elems[1];
+ clone_dest = duplicate_node (dfa, org_dest, constraint);
+ if (BE (clone_dest == -1, 0))
+ return REG_ESPACE;
+ ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+ if (BE (ret < 0, 0))
+ return REG_ESPACE;
+ }
+ org_node = org_dest;
+ clone_node = clone_dest;
+ }
+ return REG_NOERROR;
+}
+
+/* Search for a node which is duplicated from the node ORG_NODE, and
+ satisfies the constraint CONSTRAINT. */
+
+static int
+search_duplicated_node (const re_dfa_t *dfa, int org_node,
+ unsigned int constraint)
+{
+ int idx;
+ for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx)
+ {
+ if (org_node == dfa->org_indices[idx]
+ && constraint == dfa->nodes[idx].constraint)
+ return idx; /* Found. */
+ }
+ return -1; /* Not found. */
+}
+
+/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.
+ Return the index of the new node, or -1 if insufficient storage is
+ available. */
+
+static int
+duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint)
+{
+ int dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]);
+ if (BE (dup_idx != -1, 1))
+ {
+ dfa->nodes[dup_idx].constraint = constraint;
+ dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].constraint;
+ dfa->nodes[dup_idx].duplicated = 1;
+
+ /* Store the index of the original node. */
+ dfa->org_indices[dup_idx] = org_idx;
+ }
+ return dup_idx;
+}
+
+static reg_errcode_t
+calc_inveclosure (re_dfa_t *dfa)
+{
+ int src, idx, ret;
+ for (idx = 0; idx < dfa->nodes_len; ++idx)
+ re_node_set_init_empty (dfa->inveclosures + idx);
+
+ for (src = 0; src < dfa->nodes_len; ++src)
+ {
+ int *elems = dfa->eclosures[src].elems;
+ for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx)
+ {
+ ret = re_node_set_insert_last (dfa->inveclosures + elems[idx], src);
+ if (BE (ret == -1, 0))
+ return REG_ESPACE;
+ }
+ }
+
+ return REG_NOERROR;
+}
+
+/* Calculate "eclosure" for all the node in DFA. */
+
+static reg_errcode_t
+calc_eclosure (re_dfa_t *dfa)
+{
+ int node_idx, incomplete;
+#ifdef DEBUG
+ assert (dfa->nodes_len > 0);
+#endif
+ incomplete = 0;
+ /* For each nodes, calculate epsilon closure. */
+ for (node_idx = 0; ; ++node_idx)
+ {
+ reg_errcode_t err;
+ re_node_set eclosure_elem;
+ if (node_idx == dfa->nodes_len)
+ {
+ if (!incomplete)
+ break;
+ incomplete = 0;
+ node_idx = 0;
+ }
+
+#ifdef DEBUG
+ assert (dfa->eclosures[node_idx].nelem != -1);
+#endif
+
+ /* If we have already calculated, skip it. */
+ if (dfa->eclosures[node_idx].nelem != 0)
+ continue;
+ /* Calculate epsilon closure of `node_idx'. */
+ err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+
+ if (dfa->eclosures[node_idx].nelem == 0)
+ {
+ incomplete = 1;
+ re_node_set_free (&eclosure_elem);
+ }
+ }
+ return REG_NOERROR;
+}
+
+/* Calculate epsilon closure of NODE. */
+
+static reg_errcode_t
+calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, int node, int root)
+{
+ reg_errcode_t err;
+ int i, incomplete;
+ re_node_set eclosure;
+ incomplete = 0;
+ err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+
+ /* This indicates that we are calculating this node now.
+ We reference this value to avoid infinite loop. */
+ dfa->eclosures[node].nelem = -1;
+
+ /* If the current node has constraints, duplicate all nodes
+ since they must inherit the constraints. */
+ if (dfa->nodes[node].constraint
+ && dfa->edests[node].nelem
+ && !dfa->nodes[dfa->edests[node].elems[0]].duplicated)
+ {
+ err = duplicate_node_closure (dfa, node, node, node,
+ dfa->nodes[node].constraint);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+
+ /* Expand each epsilon destination nodes. */
+ if (IS_EPSILON_NODE(dfa->nodes[node].type))
+ for (i = 0; i < dfa->edests[node].nelem; ++i)
+ {
+ re_node_set eclosure_elem;
+ int edest = dfa->edests[node].elems[i];
+ /* If calculating the epsilon closure of `edest' is in progress,
+ return intermediate result. */
+ if (dfa->eclosures[edest].nelem == -1)
+ {
+ incomplete = 1;
+ continue;
+ }
+ /* If we haven't calculated the epsilon closure of `edest' yet,
+ calculate now. Otherwise use calculated epsilon closure. */
+ if (dfa->eclosures[edest].nelem == 0)
+ {
+ err = calc_eclosure_iter (&eclosure_elem, dfa, edest, 0);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ else
+ eclosure_elem = dfa->eclosures[edest];
+ /* Merge the epsilon closure of `edest'. */
+ re_node_set_merge (&eclosure, &eclosure_elem);
+ /* If the epsilon closure of `edest' is incomplete,
+ the epsilon closure of this node is also incomplete. */
+ if (dfa->eclosures[edest].nelem == 0)
+ {
+ incomplete = 1;
+ re_node_set_free (&eclosure_elem);
+ }
+ }
+
+ /* Epsilon closures include itself. */
+ re_node_set_insert (&eclosure, node);
+ if (incomplete && !root)
+ dfa->eclosures[node].nelem = 0;
+ else
+ dfa->eclosures[node] = eclosure;
+ *new_set = eclosure;
+ return REG_NOERROR;
+}
+
+/* Functions for token which are used in the parser. */
+
+/* Fetch a token from INPUT.
+ We must not use this function inside bracket expressions. */
+
+static void
+internal_function
+fetch_token (re_token_t *result, re_string_t *input, reg_syntax_t syntax)
+{
+ re_string_skip_bytes (input, peek_token (result, input, syntax));
+}
+
+/* Peek a token from INPUT, and return the length of the token.
+ We must not use this function inside bracket expressions. */
+
+static int
+internal_function
+peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
+{
+ unsigned char c;
+
+ if (re_string_eoi (input))
+ {
+ token->type = END_OF_RE;
+ return 0;
+ }
+
+ c = re_string_peek_byte (input, 0);
+ token->opr.c = c;
+
+ token->word_char = 0;
+#ifdef RE_ENABLE_I18N
+ token->mb_partial = 0;
+ if (input->mb_cur_max > 1 &&
+ !re_string_first_byte (input, re_string_cur_idx (input)))
+ {
+ token->type = CHARACTER;
+ token->mb_partial = 1;
+ return 1;
+ }
+#endif
+ if (c == '\\')
+ {
+ unsigned char c2;
+ if (re_string_cur_idx (input) + 1 >= re_string_length (input))
+ {
+ token->type = BACK_SLASH;
+ return 1;
+ }
+
+ c2 = re_string_peek_byte_case (input, 1);
+ token->opr.c = c2;
+ token->type = CHARACTER;
+#ifdef RE_ENABLE_I18N
+ if (input->mb_cur_max > 1)
+ {
+ wint_t wc = re_string_wchar_at (input,
+ re_string_cur_idx (input) + 1);
+ token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
+ }
+ else
+#endif
+ token->word_char = IS_WORD_CHAR (c2) != 0;
+
+ switch (c2)
+ {
+ case '|':
+ if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR))
+ token->type = OP_ALT;
+ break;
+ case '1': case '2': case '3': case '4': case '5':
+ case '6': case '7': case '8': case '9':
+ if (!(syntax & RE_NO_BK_REFS))
+ {
+ token->type = OP_BACK_REF;
+ token->opr.idx = c2 - '1';
+ }
+ break;
+ case '<':
+ if (!(syntax & RE_NO_GNU_OPS))
+ {
+ token->type = ANCHOR;
+ token->opr.ctx_type = WORD_FIRST;
+ }
+ break;
+ case '>':
+ if (!(syntax & RE_NO_GNU_OPS))
+ {
+ token->type = ANCHOR;
+ token->opr.ctx_type = WORD_LAST;
+ }
+ break;
+ case 'b':
+ if (!(syntax & RE_NO_GNU_OPS))
+ {
+ token->type = ANCHOR;
+ token->opr.ctx_type = WORD_DELIM;
+ }
+ break;
+ case 'B':
+ if (!(syntax & RE_NO_GNU_OPS))
+ {
+ token->type = ANCHOR;
+ token->opr.ctx_type = NOT_WORD_DELIM;
+ }
+ break;
+ case 'w':
+ if (!(syntax & RE_NO_GNU_OPS))
+ token->type = OP_WORD;
+ break;
+ case 'W':
+ if (!(syntax & RE_NO_GNU_OPS))
+ token->type = OP_NOTWORD;
+ break;
+ case 's':
+ if (!(syntax & RE_NO_GNU_OPS))
+ token->type = OP_SPACE;
+ break;
+ case 'S':
+ if (!(syntax & RE_NO_GNU_OPS))
+ token->type = OP_NOTSPACE;
+ break;
+ case '`':
+ if (!(syntax & RE_NO_GNU_OPS))
+ {
+ token->type = ANCHOR;
+ token->opr.ctx_type = BUF_FIRST;
+ }
+ break;
+ case '\'':
+ if (!(syntax & RE_NO_GNU_OPS))
+ {
+ token->type = ANCHOR;
+ token->opr.ctx_type = BUF_LAST;
+ }
+ break;
+ case '(':
+ if (!(syntax & RE_NO_BK_PARENS))
+ token->type = OP_OPEN_SUBEXP;
+ break;
+ case ')':
+ if (!(syntax & RE_NO_BK_PARENS))
+ token->type = OP_CLOSE_SUBEXP;
+ break;
+ case '+':
+ if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
+ token->type = OP_DUP_PLUS;
+ break;
+ case '?':
+ if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
+ token->type = OP_DUP_QUESTION;
+ break;
+ case '{':
+ if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
+ token->type = OP_OPEN_DUP_NUM;
+ break;
+ case '}':
+ if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
+ token->type = OP_CLOSE_DUP_NUM;
+ break;
+ default:
+ break;
+ }
+ return 2;
+ }
+
+ token->type = CHARACTER;
+#ifdef RE_ENABLE_I18N
+ if (input->mb_cur_max > 1)
+ {
+ wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input));
+ token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
+ }
+ else
+#endif
+ token->word_char = IS_WORD_CHAR (token->opr.c);
+
+ switch (c)
+ {
+ case '\n':
+ if (syntax & RE_NEWLINE_ALT)
+ token->type = OP_ALT;
+ break;
+ case '|':
+ if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR))
+ token->type = OP_ALT;
+ break;
+ case '*':
+ token->type = OP_DUP_ASTERISK;
+ break;
+ case '+':
+ if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
+ token->type = OP_DUP_PLUS;
+ break;
+ case '?':
+ if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
+ token->type = OP_DUP_QUESTION;
+ break;
+ case '{':
+ if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
+ token->type = OP_OPEN_DUP_NUM;
+ break;
+ case '}':
+ if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
+ token->type = OP_CLOSE_DUP_NUM;
+ break;
+ case '(':
+ if (syntax & RE_NO_BK_PARENS)
+ token->type = OP_OPEN_SUBEXP;
+ break;
+ case ')':
+ if (syntax & RE_NO_BK_PARENS)
+ token->type = OP_CLOSE_SUBEXP;
+ break;
+ case '[':
+ token->type = OP_OPEN_BRACKET;
+ break;
+ case '.':
+ token->type = OP_PERIOD;
+ break;
+ case '^':
+ if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS | RE_CARET_ANCHORS_HERE)) &&
+ re_string_cur_idx (input) != 0)
+ {
+ char prev = re_string_peek_byte (input, -1);
+ if (!(syntax & RE_NEWLINE_ALT) || prev != '\n')
+ break;
+ }
+ token->type = ANCHOR;
+ token->opr.ctx_type = LINE_FIRST;
+ break;
+ case '$':
+ if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) &&
+ re_string_cur_idx (input) + 1 != re_string_length (input))
+ {
+ re_token_t next;
+ re_string_skip_bytes (input, 1);
+ peek_token (&next, input, syntax);
+ re_string_skip_bytes (input, -1);
+ if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP)
+ break;
+ }
+ token->type = ANCHOR;
+ token->opr.ctx_type = LINE_LAST;
+ break;
+ default:
+ break;
+ }
+ return 1;
+}
+
+/* Peek a token from INPUT, and return the length of the token.
+ We must not use this function out of bracket expressions. */
+
+static int
+internal_function
+peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
+{
+ unsigned char c;
+ if (re_string_eoi (input))
+ {
+ token->type = END_OF_RE;
+ return 0;
+ }
+ c = re_string_peek_byte (input, 0);
+ token->opr.c = c;
+
+#ifdef RE_ENABLE_I18N
+ if (input->mb_cur_max > 1 &&
+ !re_string_first_byte (input, re_string_cur_idx (input)))
+ {
+ token->type = CHARACTER;
+ return 1;
+ }
+#endif /* RE_ENABLE_I18N */
+
+ if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS)
+ && re_string_cur_idx (input) + 1 < re_string_length (input))
+ {
+ /* In this case, '\' escape a character. */
+ unsigned char c2;
+ re_string_skip_bytes (input, 1);
+ c2 = re_string_peek_byte (input, 0);
+ token->opr.c = c2;
+ token->type = CHARACTER;
+ return 1;
+ }
+ if (c == '[') /* '[' is a special char in a bracket exps. */
+ {
+ unsigned char c2;
+ int token_len;
+ if (re_string_cur_idx (input) + 1 < re_string_length (input))
+ c2 = re_string_peek_byte (input, 1);
+ else
+ c2 = 0;
+ token->opr.c = c2;
+ token_len = 2;
+ switch (c2)
+ {
+ case '.':
+ token->type = OP_OPEN_COLL_ELEM;
+ break;
+ case '=':
+ token->type = OP_OPEN_EQUIV_CLASS;
+ break;
+ case ':':
+ if (syntax & RE_CHAR_CLASSES)
+ {
+ token->type = OP_OPEN_CHAR_CLASS;
+ break;
+ }
+ /* else fall through. */
+ default:
+ token->type = CHARACTER;
+ token->opr.c = c;
+ token_len = 1;
+ break;
+ }
+ return token_len;
+ }
+ switch (c)
+ {
+ case '-':
+ token->type = OP_CHARSET_RANGE;
+ break;
+ case ']':
+ token->type = OP_CLOSE_BRACKET;
+ break;
+ case '^':
+ token->type = OP_NON_MATCH_LIST;
+ break;
+ default:
+ token->type = CHARACTER;
+ }
+ return 1;
+}
+
+/* Functions for parser. */
+
+/* Entry point of the parser.
+ Parse the regular expression REGEXP and return the structure tree.
+ If an error is occured, ERR is set by error code, and return NULL.
+ This function build the following tree, from regular expression <reg_exp>:
+ CAT
+ / \
+ / \
+ <reg_exp> EOR
+
+ CAT means concatenation.
+ EOR means end of regular expression. */
+
+static bin_tree_t *
+parse (re_string_t *regexp, regex_t *preg, reg_syntax_t syntax,
+ reg_errcode_t *err)
+{
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ bin_tree_t *tree, *eor, *root;
+ re_token_t current_token;
+ dfa->syntax = syntax;
+ fetch_token (&current_token, regexp, syntax | RE_CARET_ANCHORS_HERE);
+ tree = parse_reg_exp (regexp, preg, &current_token, syntax, 0, err);
+ if (BE (*err != REG_NOERROR && tree == NULL, 0))
+ return NULL;
+ eor = create_tree (dfa, NULL, NULL, END_OF_RE);
+ if (tree != NULL)
+ root = create_tree (dfa, tree, eor, CONCAT);
+ else
+ root = eor;
+ if (BE (eor == NULL || root == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ return root;
+}
+
+/* This function build the following tree, from regular expression
+ <branch1>|<branch2>:
+ ALT
+ / \
+ / \
+ <branch1> <branch2>
+
+ ALT means alternative, which represents the operator `|'. */
+
+static bin_tree_t *
+parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
+ reg_syntax_t syntax, int nest, reg_errcode_t *err)
+{
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ bin_tree_t *tree, *branch = NULL;
+ tree = parse_branch (regexp, preg, token, syntax, nest, err);
+ if (BE (*err != REG_NOERROR && tree == NULL, 0))
+ return NULL;
+
+ while (token->type == OP_ALT)
+ {
+ fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE);
+ if (token->type != OP_ALT && token->type != END_OF_RE
+ && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
+ {
+ branch = parse_branch (regexp, preg, token, syntax, nest, err);
+ if (BE (*err != REG_NOERROR && branch == NULL, 0))
+ return NULL;
+ }
+ else
+ branch = NULL;
+ tree = create_tree (dfa, tree, branch, OP_ALT);
+ if (BE (tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ }
+ return tree;
+}
+
+/* This function build the following tree, from regular expression
+ <exp1><exp2>:
+ CAT
+ / \
+ / \
+ <exp1> <exp2>
+
+ CAT means concatenation. */
+
+static bin_tree_t *
+parse_branch (re_string_t *regexp, regex_t *preg, re_token_t *token,
+ reg_syntax_t syntax, int nest, reg_errcode_t *err)
+{
+ bin_tree_t *tree, *exp;
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ tree = parse_expression (regexp, preg, token, syntax, nest, err);
+ if (BE (*err != REG_NOERROR && tree == NULL, 0))
+ return NULL;
+
+ while (token->type != OP_ALT && token->type != END_OF_RE
+ && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
+ {
+ exp = parse_expression (regexp, preg, token, syntax, nest, err);
+ if (BE (*err != REG_NOERROR && exp == NULL, 0))
+ {
+ return NULL;
+ }
+ if (tree != NULL && exp != NULL)
+ {
+ tree = create_tree (dfa, tree, exp, CONCAT);
+ if (tree == NULL)
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ }
+ else if (tree == NULL)
+ tree = exp;
+ /* Otherwise exp == NULL, we don't need to create new tree. */
+ }
+ return tree;
+}
+
+/* This function build the following tree, from regular expression a*:
+ *
+ |
+ a
+*/
+
+static bin_tree_t *
+parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
+ reg_syntax_t syntax, int nest, reg_errcode_t *err)
+{
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ bin_tree_t *tree;
+ switch (token->type)
+ {
+ case CHARACTER:
+ tree = create_token_tree (dfa, NULL, NULL, token);
+ if (BE (tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+#ifdef RE_ENABLE_I18N
+ if (dfa->mb_cur_max > 1)
+ {
+ while (!re_string_eoi (regexp)
+ && !re_string_first_byte (regexp, re_string_cur_idx (regexp)))
+ {
+ bin_tree_t *mbc_remain;
+ fetch_token (token, regexp, syntax);
+ mbc_remain = create_token_tree (dfa, NULL, NULL, token);
+ tree = create_tree (dfa, tree, mbc_remain, CONCAT);
+ if (BE (mbc_remain == NULL || tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ }
+ }
+#endif
+ break;
+ case OP_OPEN_SUBEXP:
+ tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err);
+ if (BE (*err != REG_NOERROR && tree == NULL, 0))
+ return NULL;
+ break;
+ case OP_OPEN_BRACKET:
+ tree = parse_bracket_exp (regexp, dfa, token, syntax, err);
+ if (BE (*err != REG_NOERROR && tree == NULL, 0))
+ return NULL;
+ break;
+ case OP_BACK_REF:
+ if (!BE (dfa->completed_bkref_map & (1 << token->opr.idx), 1))
+ {
+ *err = REG_ESUBREG;
+ return NULL;
+ }
+ dfa->used_bkref_map |= 1 << token->opr.idx;
+ tree = create_token_tree (dfa, NULL, NULL, token);
+ if (BE (tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ ++dfa->nbackref;
+ dfa->has_mb_node = 1;
+ break;
+ case OP_OPEN_DUP_NUM:
+ if (syntax & RE_CONTEXT_INVALID_DUP)
+ {
+ *err = REG_BADRPT;
+ return NULL;
+ }
+ /* FALLTHROUGH */
+ case OP_DUP_ASTERISK:
+ case OP_DUP_PLUS:
+ case OP_DUP_QUESTION:
+ if (syntax & RE_CONTEXT_INVALID_OPS)
+ {
+ *err = REG_BADRPT;
+ return NULL;
+ }
+ else if (syntax & RE_CONTEXT_INDEP_OPS)
+ {
+ fetch_token (token, regexp, syntax);
+ return parse_expression (regexp, preg, token, syntax, nest, err);
+ }
+ /* else fall through */
+ case OP_CLOSE_SUBEXP:
+ if ((token->type == OP_CLOSE_SUBEXP) &&
+ !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD))
+ {
+ *err = REG_ERPAREN;
+ return NULL;
+ }
+ /* else fall through */
+ case OP_CLOSE_DUP_NUM:
+ /* We treat it as a normal character. */
+
+ /* Then we can these characters as normal characters. */
+ token->type = CHARACTER;
+ /* mb_partial and word_char bits should be initialized already
+ by peek_token. */
+ tree = create_token_tree (dfa, NULL, NULL, token);
+ if (BE (tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ break;
+ case ANCHOR:
+ if ((token->opr.ctx_type
+ & (WORD_DELIM | NOT_WORD_DELIM | WORD_FIRST | WORD_LAST))
+ && dfa->word_ops_used == 0)
+ init_word_char (dfa);
+ if (token->opr.ctx_type == WORD_DELIM
+ || token->opr.ctx_type == NOT_WORD_DELIM)
+ {
+ bin_tree_t *tree_first, *tree_last;
+ if (token->opr.ctx_type == WORD_DELIM)
+ {
+ token->opr.ctx_type = WORD_FIRST;
+ tree_first = create_token_tree (dfa, NULL, NULL, token);
+ token->opr.ctx_type = WORD_LAST;
+ }
+ else
+ {
+ token->opr.ctx_type = INSIDE_WORD;
+ tree_first = create_token_tree (dfa, NULL, NULL, token);
+ token->opr.ctx_type = INSIDE_NOTWORD;
+ }
+ tree_last = create_token_tree (dfa, NULL, NULL, token);
+ tree = create_tree (dfa, tree_first, tree_last, OP_ALT);
+ if (BE (tree_first == NULL || tree_last == NULL || tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ }
+ else
+ {
+ tree = create_token_tree (dfa, NULL, NULL, token);
+ if (BE (tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ }
+ /* We must return here, since ANCHORs can't be followed
+ by repetition operators.
+ eg. RE"^*" is invalid or "<ANCHOR(^)><CHAR(*)>",
+ it must not be "<ANCHOR(^)><REPEAT(*)>". */
+ fetch_token (token, regexp, syntax);
+ return tree;
+ case OP_PERIOD:
+ tree = create_token_tree (dfa, NULL, NULL, token);
+ if (BE (tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ if (dfa->mb_cur_max > 1)
+ dfa->has_mb_node = 1;
+ break;
+ case OP_WORD:
+ case OP_NOTWORD:
+ tree = build_charclass_op (dfa, regexp->trans,
+ (const unsigned char *) "alnum",
+ (const unsigned char *) "_",
+ token->type == OP_NOTWORD, err);
+ if (BE (*err != REG_NOERROR && tree == NULL, 0))
+ return NULL;
+ break;
+ case OP_SPACE:
+ case OP_NOTSPACE:
+ tree = build_charclass_op (dfa, regexp->trans,
+ (const unsigned char *) "space",
+ (const unsigned char *) "",
+ token->type == OP_NOTSPACE, err);
+ if (BE (*err != REG_NOERROR && tree == NULL, 0))
+ return NULL;
+ break;
+ case OP_ALT:
+ case END_OF_RE:
+ return NULL;
+ case BACK_SLASH:
+ *err = REG_EESCAPE;
+ return NULL;
+ default:
+ /* Must not happen? */
+#ifdef DEBUG
+ assert (0);
+#endif
+ return NULL;
+ }
+ fetch_token (token, regexp, syntax);
+
+ while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS
+ || token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM)
+ {
+ tree = parse_dup_op (tree, regexp, dfa, token, syntax, err);
+ if (BE (*err != REG_NOERROR && tree == NULL, 0))
+ return NULL;
+ /* In BRE consecutive duplications are not allowed. */
+ if ((syntax & RE_CONTEXT_INVALID_DUP)
+ && (token->type == OP_DUP_ASTERISK
+ || token->type == OP_OPEN_DUP_NUM))
+ {
+ *err = REG_BADRPT;
+ return NULL;
+ }
+ }
+
+ return tree;
+}
+
+/* This function build the following tree, from regular expression
+ (<reg_exp>):
+ SUBEXP
+ |
+ <reg_exp>
+*/
+
+static bin_tree_t *
+parse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
+ reg_syntax_t syntax, int nest, reg_errcode_t *err)
+{
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ bin_tree_t *tree;
+ size_t cur_nsub;
+ cur_nsub = preg->re_nsub++;
+
+ fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE);
+
+ /* The subexpression may be a null string. */
+ if (token->type == OP_CLOSE_SUBEXP)
+ tree = NULL;
+ else
+ {
+ tree = parse_reg_exp (regexp, preg, token, syntax, nest, err);
+ if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0))
+ *err = REG_EPAREN;
+ if (BE (*err != REG_NOERROR, 0))
+ return NULL;
+ }
+
+ if (cur_nsub <= '9' - '1')
+ dfa->completed_bkref_map |= 1 << cur_nsub;
+
+ tree = create_tree (dfa, tree, NULL, SUBEXP);
+ if (BE (tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ tree->token.opr.idx = cur_nsub;
+ return tree;
+}
+
+/* This function parse repetition operators like "*", "+", "{1,3}" etc. */
+
+static bin_tree_t *
+parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
+ re_token_t *token, reg_syntax_t syntax, reg_errcode_t *err)
+{
+ bin_tree_t *tree = NULL, *old_tree = NULL;
+ int i, start, end, start_idx = re_string_cur_idx (regexp);
+ re_token_t start_token = *token;
+
+ if (token->type == OP_OPEN_DUP_NUM)
+ {
+ end = 0;
+ start = fetch_number (regexp, token, syntax);
+ if (start == -1)
+ {
+ if (token->type == CHARACTER && token->opr.c == ',')
+ start = 0; /* We treat "{,m}" as "{0,m}". */
+ else
+ {
+ *err = REG_BADBR; /* <re>{} is invalid. */
+ return NULL;
+ }
+ }
+ if (BE (start != -2, 1))
+ {
+ /* We treat "{n}" as "{n,n}". */
+ end = ((token->type == OP_CLOSE_DUP_NUM) ? start
+ : ((token->type == CHARACTER && token->opr.c == ',')
+ ? fetch_number (regexp, token, syntax) : -2));
+ }
+ if (BE (start == -2 || end == -2, 0))
+ {
+ /* Invalid sequence. */
+ if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0))
+ {
+ if (token->type == END_OF_RE)
+ *err = REG_EBRACE;
+ else
+ *err = REG_BADBR;
+
+ return NULL;
+ }
+
+ /* If the syntax bit is set, rollback. */
+ re_string_set_index (regexp, start_idx);
+ *token = start_token;
+ token->type = CHARACTER;
+ /* mb_partial and word_char bits should be already initialized by
+ peek_token. */
+ return elem;
+ }
+
+ if (BE (end != -1 && start > end, 0))
+ {
+ /* First number greater than second. */
+ *err = REG_BADBR;
+ return NULL;
+ }
+ }
+ else
+ {
+ start = (token->type == OP_DUP_PLUS) ? 1 : 0;
+ end = (token->type == OP_DUP_QUESTION) ? 1 : -1;
+ }
+
+ fetch_token (token, regexp, syntax);
+
+ if (BE (elem == NULL, 0))
+ return NULL;
+ if (BE (start == 0 && end == 0, 0))
+ {
+ postorder (elem, free_tree, NULL);
+ return NULL;
+ }
+
+ /* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}". */
+ if (BE (start > 0, 0))
+ {
+ tree = elem;
+ for (i = 2; i <= start; ++i)
+ {
+ elem = duplicate_tree (elem, dfa);
+ tree = create_tree (dfa, tree, elem, CONCAT);
+ if (BE (elem == NULL || tree == NULL, 0))
+ goto parse_dup_op_espace;
+ }
+
+ if (start == end)
+ return tree;
+
+ /* Duplicate ELEM before it is marked optional. */
+ elem = duplicate_tree (elem, dfa);
+ old_tree = tree;
+ }
+ else
+ old_tree = NULL;
+
+ if (elem->token.type == SUBEXP)
+ postorder (elem, mark_opt_subexp, (void *) (long) elem->token.opr.idx);
+
+ tree = create_tree (dfa, elem, NULL, (end == -1 ? OP_DUP_ASTERISK : OP_ALT));
+ if (BE (tree == NULL, 0))
+ goto parse_dup_op_espace;
+
+ /* This loop is actually executed only when end != -1,
+ to rewrite <re>{0,n} as (<re>(<re>...<re>?)?)?... We have
+ already created the start+1-th copy. */
+ for (i = start + 2; i <= end; ++i)
+ {
+ elem = duplicate_tree (elem, dfa);
+ tree = create_tree (dfa, tree, elem, CONCAT);
+ if (BE (elem == NULL || tree == NULL, 0))
+ goto parse_dup_op_espace;
+
+ tree = create_tree (dfa, tree, NULL, OP_ALT);
+ if (BE (tree == NULL, 0))
+ goto parse_dup_op_espace;
+ }
+
+ if (old_tree)
+ tree = create_tree (dfa, old_tree, tree, CONCAT);
+
+ return tree;
+
+ parse_dup_op_espace:
+ *err = REG_ESPACE;
+ return NULL;
+}
+
+/* Size of the names for collating symbol/equivalence_class/character_class.
+ I'm not sure, but maybe enough. */
+#define BRACKET_NAME_BUF_SIZE 32
+
+#ifndef _LIBC
+ /* Local function for parse_bracket_exp only used in case of NOT _LIBC.
+ Build the range expression which starts from START_ELEM, and ends
+ at END_ELEM. The result are written to MBCSET and SBCSET.
+ RANGE_ALLOC is the allocated size of mbcset->range_starts, and
+ mbcset->range_ends, is a pointer argument sinse we may
+ update it. */
+
+static reg_errcode_t
+internal_function
+# ifdef RE_ENABLE_I18N
+build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,
+ bracket_elem_t *start_elem, bracket_elem_t *end_elem)
+# else /* not RE_ENABLE_I18N */
+build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem,
+ bracket_elem_t *end_elem)
+# endif /* not RE_ENABLE_I18N */
+{
+ unsigned int start_ch, end_ch;
+ /* Equivalence Classes and Character Classes can't be a range start/end. */
+ if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
+ || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
+ 0))
+ return REG_ERANGE;
+
+ /* We can handle no multi character collating elements without libc
+ support. */
+ if (BE ((start_elem->type == COLL_SYM
+ && strlen ((char *) start_elem->opr.name) > 1)
+ || (end_elem->type == COLL_SYM
+ && strlen ((char *) end_elem->opr.name) > 1), 0))
+ return REG_ECOLLATE;
+
+# ifdef RE_ENABLE_I18N
+ {
+ wchar_t wc;
+ wint_t start_wc;
+ wint_t end_wc;
+ wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
+
+ start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch
+ : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
+ : 0));
+ end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch
+ : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
+ : 0));
+ start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM)
+ ? __btowc (start_ch) : start_elem->opr.wch);
+ end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM)
+ ? __btowc (end_ch) : end_elem->opr.wch);
+ if (start_wc == WEOF || end_wc == WEOF)
+ return REG_ECOLLATE;
+ cmp_buf[0] = start_wc;
+ cmp_buf[4] = end_wc;
+ if (wcscoll (cmp_buf, cmp_buf + 4) > 0)
+ return REG_ERANGE;
+
+ /* Got valid collation sequence values, add them as a new entry.
+ However, for !_LIBC we have no collation elements: if the
+ character set is single byte, the single byte character set
+ that we build below suffices. parse_bracket_exp passes
+ no MBCSET if dfa->mb_cur_max == 1. */
+ if (mbcset)
+ {
+ /* Check the space of the arrays. */
+ if (BE (*range_alloc == mbcset->nranges, 0))
+ {
+ /* There is not enough space, need realloc. */
+ wchar_t *new_array_start, *new_array_end;
+ int new_nranges;
+
+ /* +1 in case of mbcset->nranges is 0. */
+ new_nranges = 2 * mbcset->nranges + 1;
+ /* Use realloc since mbcset->range_starts and mbcset->range_ends
+ are NULL if *range_alloc == 0. */
+ new_array_start = re_realloc (mbcset->range_starts, wchar_t,
+ new_nranges);
+ new_array_end = re_realloc (mbcset->range_ends, wchar_t,
+ new_nranges);
+
+ if (BE (new_array_start == NULL || new_array_end == NULL, 0))
+ return REG_ESPACE;
+
+ mbcset->range_starts = new_array_start;
+ mbcset->range_ends = new_array_end;
+ *range_alloc = new_nranges;
+ }
+
+ mbcset->range_starts[mbcset->nranges] = start_wc;
+ mbcset->range_ends[mbcset->nranges++] = end_wc;
+ }
+
+ /* Build the table for single byte characters. */
+ for (wc = 0; wc < SBC_MAX; ++wc)
+ {
+ cmp_buf[2] = wc;
+ if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
+ && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
+ bitset_set (sbcset, wc);
+ }
+ }
+# else /* not RE_ENABLE_I18N */
+ {
+ unsigned int ch;
+ start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch
+ : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
+ : 0));
+ end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch
+ : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
+ : 0));
+ if (start_ch > end_ch)
+ return REG_ERANGE;
+ /* Build the table for single byte characters. */
+ for (ch = 0; ch < SBC_MAX; ++ch)
+ if (start_ch <= ch && ch <= end_ch)
+ bitset_set (sbcset, ch);
+ }
+# endif /* not RE_ENABLE_I18N */
+ return REG_NOERROR;
+}
+#endif /* not _LIBC */
+
+#ifndef _LIBC
+/* Helper function for parse_bracket_exp only used in case of NOT _LIBC..
+ Build the collating element which is represented by NAME.
+ The result are written to MBCSET and SBCSET.
+ COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
+ pointer argument since we may update it. */
+
+static reg_errcode_t
+internal_function
+# ifdef RE_ENABLE_I18N
+build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
+ int *coll_sym_alloc, const unsigned char *name)
+# else /* not RE_ENABLE_I18N */
+build_collating_symbol (bitset_t sbcset, const unsigned char *name)
+# endif /* not RE_ENABLE_I18N */
+{
+ size_t name_len = strlen ((const char *) name);
+ if (BE (name_len != 1, 0))
+ return REG_ECOLLATE;
+ else
+ {
+ bitset_set (sbcset, name[0]);
+ return REG_NOERROR;
+ }
+}
+#endif /* not _LIBC */
+
+/* This function parse bracket expression like "[abc]", "[a-c]",
+ "[[.a-a.]]" etc. */
+
+static bin_tree_t *
+parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
+ reg_syntax_t syntax, reg_errcode_t *err)
+{
+#ifdef _LIBC
+ const unsigned char *collseqmb;
+ const char *collseqwc;
+ uint32_t nrules;
+ int32_t table_size;
+ const int32_t *symb_table;
+ const unsigned char *extra;
+
+ /* Local function for parse_bracket_exp used in _LIBC environement.
+ Seek the collating symbol entry correspondings to NAME.
+ Return the index of the symbol in the SYMB_TABLE. */
+
+ auto inline int32_t
+ __attribute ((always_inline))
+ seek_collating_symbol_entry (name, name_len)
+ const unsigned char *name;
+ size_t name_len;
+ {
+ int32_t hash = elem_hash ((const char *) name, name_len);
+ int32_t elem = hash % table_size;
+ if (symb_table[2 * elem] != 0)
+ {
+ int32_t second = hash % (table_size - 2) + 1;
+
+ do
+ {
+ /* First compare the hashing value. */
+ if (symb_table[2 * elem] == hash
+ /* Compare the length of the name. */
+ && name_len == extra[symb_table[2 * elem + 1]]
+ /* Compare the name. */
+ && memcmp (name, &extra[symb_table[2 * elem + 1] + 1],
+ name_len) == 0)
+ {
+ /* Yep, this is the entry. */
+ break;
+ }
+
+ /* Next entry. */
+ elem += second;
+ }
+ while (symb_table[2 * elem] != 0);
+ }
+ return elem;
+ }
+
+ /* Local function for parse_bracket_exp used in _LIBC environment.
+ Look up the collation sequence value of BR_ELEM.
+ Return the value if succeeded, UINT_MAX otherwise. */
+
+ auto inline unsigned int
+ __attribute ((always_inline))
+ lookup_collation_sequence_value (br_elem)
+ bracket_elem_t *br_elem;
+ {
+ if (br_elem->type == SB_CHAR)
+ {
+ /*
+ if (MB_CUR_MAX == 1)
+ */
+ if (nrules == 0)
+ return collseqmb[br_elem->opr.ch];
+ else
+ {
+ wint_t wc = __btowc (br_elem->opr.ch);
+ return __collseq_table_lookup (collseqwc, wc);
+ }
+ }
+ else if (br_elem->type == MB_CHAR)
+ {
+ if (nrules != 0)
+ return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
+ }
+ else if (br_elem->type == COLL_SYM)
+ {
+ size_t sym_name_len = strlen ((char *) br_elem->opr.name);
+ if (nrules != 0)
+ {
+ int32_t elem, idx;
+ elem = seek_collating_symbol_entry (br_elem->opr.name,
+ sym_name_len);
+ if (symb_table[2 * elem] != 0)
+ {
+ /* We found the entry. */
+ idx = symb_table[2 * elem + 1];
+ /* Skip the name of collating element name. */
+ idx += 1 + extra[idx];
+ /* Skip the byte sequence of the collating element. */
+ idx += 1 + extra[idx];
+ /* Adjust for the alignment. */
+ idx = (idx + 3) & ~3;
+ /* Skip the multibyte collation sequence value. */
+ idx += sizeof (unsigned int);
+ /* Skip the wide char sequence of the collating element. */
+ idx += sizeof (unsigned int) *
+ (1 + *(unsigned int *) (extra + idx));
+ /* Return the collation sequence value. */
+ return *(unsigned int *) (extra + idx);
+ }
+ else if (symb_table[2 * elem] == 0 && sym_name_len == 1)
+ {
+ /* No valid character. Match it as a single byte
+ character. */
+ return collseqmb[br_elem->opr.name[0]];
+ }
+ }
+ else if (sym_name_len == 1)
+ return collseqmb[br_elem->opr.name[0]];
+ }
+ return UINT_MAX;
+ }
+
+ /* Local function for parse_bracket_exp used in _LIBC environement.
+ Build the range expression which starts from START_ELEM, and ends
+ at END_ELEM. The result are written to MBCSET and SBCSET.
+ RANGE_ALLOC is the allocated size of mbcset->range_starts, and
+ mbcset->range_ends, is a pointer argument sinse we may
+ update it. */
+
+ auto inline reg_errcode_t
+ __attribute ((always_inline))
+ build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem)
+ re_charset_t *mbcset;
+ int *range_alloc;
+ bitset_t sbcset;
+ bracket_elem_t *start_elem, *end_elem;
+ {
+ unsigned int ch;
+ uint32_t start_collseq;
+ uint32_t end_collseq;
+
+ /* Equivalence Classes and Character Classes can't be a range
+ start/end. */
+ if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
+ || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
+ 0))
+ return REG_ERANGE;
+
+ start_collseq = lookup_collation_sequence_value (start_elem);
+ end_collseq = lookup_collation_sequence_value (end_elem);
+ /* Check start/end collation sequence values. */
+ if (BE (start_collseq == UINT_MAX || end_collseq == UINT_MAX, 0))
+ return REG_ECOLLATE;
+ if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0))
+ return REG_ERANGE;
+
+ /* Got valid collation sequence values, add them as a new entry.
+ However, if we have no collation elements, and the character set
+ is single byte, the single byte character set that we
+ build below suffices. */
+ if (nrules > 0 || dfa->mb_cur_max > 1)
+ {
+ /* Check the space of the arrays. */
+ if (BE (*range_alloc == mbcset->nranges, 0))
+ {
+ /* There is not enough space, need realloc. */
+ uint32_t *new_array_start;
+ uint32_t *new_array_end;
+ int new_nranges;
+
+ /* +1 in case of mbcset->nranges is 0. */
+ new_nranges = 2 * mbcset->nranges + 1;
+ new_array_start = re_realloc (mbcset->range_starts, uint32_t,
+ new_nranges);
+ new_array_end = re_realloc (mbcset->range_ends, uint32_t,
+ new_nranges);
+
+ if (BE (new_array_start == NULL || new_array_end == NULL, 0))
+ return REG_ESPACE;
+
+ mbcset->range_starts = new_array_start;
+ mbcset->range_ends = new_array_end;
+ *range_alloc = new_nranges;
+ }
+
+ mbcset->range_starts[mbcset->nranges] = start_collseq;
+ mbcset->range_ends[mbcset->nranges++] = end_collseq;
+ }
+
+ /* Build the table for single byte characters. */
+ for (ch = 0; ch < SBC_MAX; ch++)
+ {
+ uint32_t ch_collseq;
+ /*
+ if (MB_CUR_MAX == 1)
+ */
+ if (nrules == 0)
+ ch_collseq = collseqmb[ch];
+ else
+ ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
+ if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
+ bitset_set (sbcset, ch);
+ }
+ return REG_NOERROR;
+ }
+
+ /* Local function for parse_bracket_exp used in _LIBC environement.
+ Build the collating element which is represented by NAME.
+ The result are written to MBCSET and SBCSET.
+ COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
+ pointer argument sinse we may update it. */
+
+ auto inline reg_errcode_t
+ __attribute ((always_inline))
+ build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name)
+ re_charset_t *mbcset;
+ int *coll_sym_alloc;
+ bitset_t sbcset;
+ const unsigned char *name;
+ {
+ int32_t elem, idx;
+ size_t name_len = strlen ((const char *) name);
+ if (nrules != 0)
+ {
+ elem = seek_collating_symbol_entry (name, name_len);
+ if (symb_table[2 * elem] != 0)
+ {
+ /* We found the entry. */
+ idx = symb_table[2 * elem + 1];
+ /* Skip the name of collating element name. */
+ idx += 1 + extra[idx];
+ }
+ else if (symb_table[2 * elem] == 0 && name_len == 1)
+ {
+ /* No valid character, treat it as a normal
+ character. */
+ bitset_set (sbcset, name[0]);
+ return REG_NOERROR;
+ }
+ else
+ return REG_ECOLLATE;
+
+ /* Got valid collation sequence, add it as a new entry. */
+ /* Check the space of the arrays. */
+ if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0))
+ {
+ /* Not enough, realloc it. */
+ /* +1 in case of mbcset->ncoll_syms is 0. */
+ int new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
+ /* Use realloc since mbcset->coll_syms is NULL
+ if *alloc == 0. */
+ int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
+ new_coll_sym_alloc);
+ if (BE (new_coll_syms == NULL, 0))
+ return REG_ESPACE;
+ mbcset->coll_syms = new_coll_syms;
+ *coll_sym_alloc = new_coll_sym_alloc;
+ }
+ mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
+ return REG_NOERROR;
+ }
+ else
+ {
+ if (BE (name_len != 1, 0))
+ return REG_ECOLLATE;
+ else
+ {
+ bitset_set (sbcset, name[0]);
+ return REG_NOERROR;
+ }
+ }
+ }
+#endif
+
+ re_token_t br_token;
+ re_bitset_ptr_t sbcset;
+#ifdef RE_ENABLE_I18N
+ re_charset_t *mbcset;
+ int coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0;
+ int equiv_class_alloc = 0, char_class_alloc = 0;
+#endif /* not RE_ENABLE_I18N */
+ int non_match = 0;
+ bin_tree_t *work_tree;
+ int token_len;
+ int first_round = 1;
+#ifdef _LIBC
+ collseqmb = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
+ nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+ if (nrules)
+ {
+ /*
+ if (MB_CUR_MAX > 1)
+ */
+ collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
+ table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB);
+ symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE,
+ _NL_COLLATE_SYMB_TABLEMB);
+ extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
+ _NL_COLLATE_SYMB_EXTRAMB);
+ }
+#endif
+ sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
+#ifdef RE_ENABLE_I18N
+ mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
+#endif /* RE_ENABLE_I18N */
+#ifdef RE_ENABLE_I18N
+ if (BE (sbcset == NULL || mbcset == NULL, 0))
+#else
+ if (BE (sbcset == NULL, 0))
+#endif /* RE_ENABLE_I18N */
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+
+ token_len = peek_token_bracket (token, regexp, syntax);
+ if (BE (token->type == END_OF_RE, 0))
+ {
+ *err = REG_BADPAT;
+ goto parse_bracket_exp_free_return;
+ }
+ if (token->type == OP_NON_MATCH_LIST)
+ {
+#ifdef RE_ENABLE_I18N
+ mbcset->non_match = 1;
+#endif /* not RE_ENABLE_I18N */
+ non_match = 1;
+ if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
+ bitset_set (sbcset, '\n');
+ re_string_skip_bytes (regexp, token_len); /* Skip a token. */
+ token_len = peek_token_bracket (token, regexp, syntax);
+ if (BE (token->type == END_OF_RE, 0))
+ {
+ *err = REG_BADPAT;
+ goto parse_bracket_exp_free_return;
+ }
+ }
+
+ /* We treat the first ']' as a normal character. */
+ if (token->type == OP_CLOSE_BRACKET)
+ token->type = CHARACTER;
+
+ while (1)
+ {
+ bracket_elem_t start_elem, end_elem;
+ unsigned char start_name_buf[BRACKET_NAME_BUF_SIZE];
+ unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE];
+ reg_errcode_t ret;
+ int token_len2 = 0, is_range_exp = 0;
+ re_token_t token2;
+
+ start_elem.opr.name = start_name_buf;
+ ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa,
+ syntax, first_round);
+ if (BE (ret != REG_NOERROR, 0))
+ {
+ *err = ret;
+ goto parse_bracket_exp_free_return;
+ }
+ first_round = 0;
+
+ /* Get information about the next token. We need it in any case. */
+ token_len = peek_token_bracket (token, regexp, syntax);
+
+ /* Do not check for ranges if we know they are not allowed. */
+ if (start_elem.type != CHAR_CLASS && start_elem.type != EQUIV_CLASS)
+ {
+ if (BE (token->type == END_OF_RE, 0))
+ {
+ *err = REG_EBRACK;
+ goto parse_bracket_exp_free_return;
+ }
+ if (token->type == OP_CHARSET_RANGE)
+ {
+ re_string_skip_bytes (regexp, token_len); /* Skip '-'. */
+ token_len2 = peek_token_bracket (&token2, regexp, syntax);
+ if (BE (token2.type == END_OF_RE, 0))
+ {
+ *err = REG_EBRACK;
+ goto parse_bracket_exp_free_return;
+ }
+ if (token2.type == OP_CLOSE_BRACKET)
+ {
+ /* We treat the last '-' as a normal character. */
+ re_string_skip_bytes (regexp, -token_len);
+ token->type = CHARACTER;
+ }
+ else
+ is_range_exp = 1;
+ }
+ }
+
+ if (is_range_exp == 1)
+ {
+ end_elem.opr.name = end_name_buf;
+ ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2,
+ dfa, syntax, 1);
+ if (BE (ret != REG_NOERROR, 0))
+ {
+ *err = ret;
+ goto parse_bracket_exp_free_return;
+ }
+
+ token_len = peek_token_bracket (token, regexp, syntax);
+
+#ifdef _LIBC
+ *err = build_range_exp (sbcset, mbcset, &range_alloc,
+ &start_elem, &end_elem);
+#else
+# ifdef RE_ENABLE_I18N
+ *err = build_range_exp (sbcset,
+ dfa->mb_cur_max > 1 ? mbcset : NULL,
+ &range_alloc, &start_elem, &end_elem);
+# else
+ *err = build_range_exp (sbcset, &start_elem, &end_elem);
+# endif
+#endif /* RE_ENABLE_I18N */
+ if (BE (*err != REG_NOERROR, 0))
+ goto parse_bracket_exp_free_return;
+ }
+ else
+ {
+ switch (start_elem.type)
+ {
+ case SB_CHAR:
+ bitset_set (sbcset, start_elem.opr.ch);
+ break;
+#ifdef RE_ENABLE_I18N
+ case MB_CHAR:
+ /* Check whether the array has enough space. */
+ if (BE (mbchar_alloc == mbcset->nmbchars, 0))
+ {
+ wchar_t *new_mbchars;
+ /* Not enough, realloc it. */
+ /* +1 in case of mbcset->nmbchars is 0. */
+ mbchar_alloc = 2 * mbcset->nmbchars + 1;
+ /* Use realloc since array is NULL if *alloc == 0. */
+ new_mbchars = re_realloc (mbcset->mbchars, wchar_t,
+ mbchar_alloc);
+ if (BE (new_mbchars == NULL, 0))
+ goto parse_bracket_exp_espace;
+ mbcset->mbchars = new_mbchars;
+ }
+ mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch;
+ break;
+#endif /* RE_ENABLE_I18N */
+ case EQUIV_CLASS:
+ *err = build_equiv_class (sbcset,
+#ifdef RE_ENABLE_I18N
+ mbcset, &equiv_class_alloc,
+#endif /* RE_ENABLE_I18N */
+ start_elem.opr.name);
+ if (BE (*err != REG_NOERROR, 0))
+ goto parse_bracket_exp_free_return;
+ break;
+ case COLL_SYM:
+ *err = build_collating_symbol (sbcset,
+#ifdef RE_ENABLE_I18N
+ mbcset, &coll_sym_alloc,
+#endif /* RE_ENABLE_I18N */
+ start_elem.opr.name);
+ if (BE (*err != REG_NOERROR, 0))
+ goto parse_bracket_exp_free_return;
+ break;
+ case CHAR_CLASS:
+ *err = build_charclass (regexp->trans, sbcset,
+#ifdef RE_ENABLE_I18N
+ mbcset, &char_class_alloc,
+#endif /* RE_ENABLE_I18N */
+ start_elem.opr.name, syntax);
+ if (BE (*err != REG_NOERROR, 0))
+ goto parse_bracket_exp_free_return;
+ break;
+ default:
+ assert (0);
+ break;
+ }
+ }
+ if (BE (token->type == END_OF_RE, 0))
+ {
+ *err = REG_EBRACK;
+ goto parse_bracket_exp_free_return;
+ }
+ if (token->type == OP_CLOSE_BRACKET)
+ break;
+ }
+
+ re_string_skip_bytes (regexp, token_len); /* Skip a token. */
+
+ /* If it is non-matching list. */
+ if (non_match)
+ bitset_not (sbcset);
+
+#ifdef RE_ENABLE_I18N
+ /* Ensure only single byte characters are set. */
+ if (dfa->mb_cur_max > 1)
+ bitset_mask (sbcset, dfa->sb_char);
+
+ if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes
+ || mbcset->nranges || (dfa->mb_cur_max > 1 && (mbcset->nchar_classes
+ || mbcset->non_match)))
+ {
+ bin_tree_t *mbc_tree;
+ int sbc_idx;
+ /* Build a tree for complex bracket. */
+ dfa->has_mb_node = 1;
+ br_token.type = COMPLEX_BRACKET;
+ br_token.opr.mbcset = mbcset;
+ mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
+ if (BE (mbc_tree == NULL, 0))
+ goto parse_bracket_exp_espace;
+ for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx)
+ if (sbcset[sbc_idx])
+ break;
+ /* If there are no bits set in sbcset, there is no point
+ of having both SIMPLE_BRACKET and COMPLEX_BRACKET. */
+ if (sbc_idx < BITSET_WORDS)
+ {
+ /* Build a tree for simple bracket. */
+ br_token.type = SIMPLE_BRACKET;
+ br_token.opr.sbcset = sbcset;
+ work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
+ if (BE (work_tree == NULL, 0))
+ goto parse_bracket_exp_espace;
+
+ /* Then join them by ALT node. */
+ work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT);
+ if (BE (work_tree == NULL, 0))
+ goto parse_bracket_exp_espace;
+ }
+ else
+ {
+ re_free (sbcset);
+ work_tree = mbc_tree;
+ }
+ }
+ else
+#endif /* not RE_ENABLE_I18N */
+ {
+#ifdef RE_ENABLE_I18N
+ free_charset (mbcset);
+#endif
+ /* Build a tree for simple bracket. */
+ br_token.type = SIMPLE_BRACKET;
+ br_token.opr.sbcset = sbcset;
+ work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
+ if (BE (work_tree == NULL, 0))
+ goto parse_bracket_exp_espace;
+ }
+ return work_tree;
+
+ parse_bracket_exp_espace:
+ *err = REG_ESPACE;
+ parse_bracket_exp_free_return:
+ re_free (sbcset);
+#ifdef RE_ENABLE_I18N
+ free_charset (mbcset);
+#endif /* RE_ENABLE_I18N */
+ return NULL;
+}
+
+/* Parse an element in the bracket expression. */
+
+static reg_errcode_t
+parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp,
+ re_token_t *token, int token_len, re_dfa_t *dfa,
+ reg_syntax_t syntax, int accept_hyphen)
+{
+#ifdef RE_ENABLE_I18N
+ int cur_char_size;
+ cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp));
+ if (cur_char_size > 1)
+ {
+ elem->type = MB_CHAR;
+ elem->opr.wch = re_string_wchar_at (regexp, re_string_cur_idx (regexp));
+ re_string_skip_bytes (regexp, cur_char_size);
+ return REG_NOERROR;
+ }
+#endif /* RE_ENABLE_I18N */
+ re_string_skip_bytes (regexp, token_len); /* Skip a token. */
+ if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS
+ || token->type == OP_OPEN_EQUIV_CLASS)
+ return parse_bracket_symbol (elem, regexp, token);
+ if (BE (token->type == OP_CHARSET_RANGE, 0) && !accept_hyphen)
+ {
+ /* A '-' must only appear as anything but a range indicator before
+ the closing bracket. Everything else is an error. */
+ re_token_t token2;
+ (void) peek_token_bracket (&token2, regexp, syntax);
+ if (token2.type != OP_CLOSE_BRACKET)
+ /* The actual error value is not standardized since this whole
+ case is undefined. But ERANGE makes good sense. */
+ return REG_ERANGE;
+ }
+ elem->type = SB_CHAR;
+ elem->opr.ch = token->opr.c;
+ return REG_NOERROR;
+}
+
+/* Parse a bracket symbol in the bracket expression. Bracket symbols are
+ such as [:<character_class>:], [.<collating_element>.], and
+ [=<equivalent_class>=]. */
+
+static reg_errcode_t
+parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp,
+ re_token_t *token)
+{
+ unsigned char ch, delim = token->opr.c;
+ int i = 0;
+ if (re_string_eoi(regexp))
+ return REG_EBRACK;
+ for (;; ++i)
+ {
+ if (i >= BRACKET_NAME_BUF_SIZE)
+ return REG_EBRACK;
+ if (token->type == OP_OPEN_CHAR_CLASS)
+ ch = re_string_fetch_byte_case (regexp);
+ else
+ ch = re_string_fetch_byte (regexp);
+ if (re_string_eoi(regexp))
+ return REG_EBRACK;
+ if (ch == delim && re_string_peek_byte (regexp, 0) == ']')
+ break;
+ elem->opr.name[i] = ch;
+ }
+ re_string_skip_bytes (regexp, 1);
+ elem->opr.name[i] = '\0';
+ switch (token->type)
+ {
+ case OP_OPEN_COLL_ELEM:
+ elem->type = COLL_SYM;
+ break;
+ case OP_OPEN_EQUIV_CLASS:
+ elem->type = EQUIV_CLASS;
+ break;
+ case OP_OPEN_CHAR_CLASS:
+ elem->type = CHAR_CLASS;
+ break;
+ default:
+ break;
+ }
+ return REG_NOERROR;
+}
+
+ /* Helper function for parse_bracket_exp.
+ Build the equivalence class which is represented by NAME.
+ The result are written to MBCSET and SBCSET.
+ EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes,
+ is a pointer argument sinse we may update it. */
+
+static reg_errcode_t
+#ifdef RE_ENABLE_I18N
+build_equiv_class (bitset_t sbcset, re_charset_t *mbcset,
+ int *equiv_class_alloc, const unsigned char *name)
+#else /* not RE_ENABLE_I18N */
+build_equiv_class (bitset_t sbcset, const unsigned char *name)
+#endif /* not RE_ENABLE_I18N */
+{
+#ifdef _LIBC
+ uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+ if (nrules != 0)
+ {
+ const int32_t *table, *indirect;
+ const unsigned char *weights, *extra, *cp;
+ unsigned char char_buf[2];
+ int32_t idx1, idx2;
+ unsigned int ch;
+ size_t len;
+ /* This #include defines a local function! */
+# include <locale/weight.h>
+ /* Calculate the index for equivalence class. */
+ cp = name;
+ table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
+ weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
+ _NL_COLLATE_WEIGHTMB);
+ extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
+ _NL_COLLATE_EXTRAMB);
+ indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
+ _NL_COLLATE_INDIRECTMB);
+ idx1 = findidx (&cp);
+ if (BE (idx1 == 0 || cp < name + strlen ((const char *) name), 0))
+ /* This isn't a valid character. */
+ return REG_ECOLLATE;
+
+ /* Build single byte matcing table for this equivalence class. */
+ char_buf[1] = (unsigned char) '\0';
+ len = weights[idx1 & 0xffffff];
+ for (ch = 0; ch < SBC_MAX; ++ch)
+ {
+ char_buf[0] = ch;
+ cp = char_buf;
+ idx2 = findidx (&cp);
+/*
+ idx2 = table[ch];
+*/
+ if (idx2 == 0)
+ /* This isn't a valid character. */
+ continue;
+ /* Compare only if the length matches and the collation rule
+ index is the same. */
+ if (len == weights[idx2 & 0xffffff] && (idx1 >> 24) == (idx2 >> 24))
+ {
+ int cnt = 0;
+
+ while (cnt <= len &&
+ weights[(idx1 & 0xffffff) + 1 + cnt]
+ == weights[(idx2 & 0xffffff) + 1 + cnt])
+ ++cnt;
+
+ if (cnt > len)
+ bitset_set (sbcset, ch);
+ }
+ }
+ /* Check whether the array has enough space. */
+ if (BE (*equiv_class_alloc == mbcset->nequiv_classes, 0))
+ {
+ /* Not enough, realloc it. */
+ /* +1 in case of mbcset->nequiv_classes is 0. */
+ int new_equiv_class_alloc = 2 * mbcset->nequiv_classes + 1;
+ /* Use realloc since the array is NULL if *alloc == 0. */
+ int32_t *new_equiv_classes = re_realloc (mbcset->equiv_classes,
+ int32_t,
+ new_equiv_class_alloc);
+ if (BE (new_equiv_classes == NULL, 0))
+ return REG_ESPACE;
+ mbcset->equiv_classes = new_equiv_classes;
+ *equiv_class_alloc = new_equiv_class_alloc;
+ }
+ mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1;
+ }
+ else
+#endif /* _LIBC */
+ {
+ if (BE (strlen ((const char *) name) != 1, 0))
+ return REG_ECOLLATE;
+ bitset_set (sbcset, *name);
+ }
+ return REG_NOERROR;
+}
+
+ /* Helper function for parse_bracket_exp.
+ Build the character class which is represented by NAME.
+ The result are written to MBCSET and SBCSET.
+ CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes,
+ is a pointer argument sinse we may update it. */
+
+static reg_errcode_t
+#ifdef RE_ENABLE_I18N
+build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
+ re_charset_t *mbcset, int *char_class_alloc,
+ const unsigned char *class_name, reg_syntax_t syntax)
+#else /* not RE_ENABLE_I18N */
+build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
+ const unsigned char *class_name, reg_syntax_t syntax)
+#endif /* not RE_ENABLE_I18N */
+{
+ int i;
+ const char *name = (const char *) class_name;
+
+ /* In case of REG_ICASE "upper" and "lower" match the both of
+ upper and lower cases. */
+ if ((syntax & RE_ICASE)
+ && (strcmp (name, "upper") == 0 || strcmp (name, "lower") == 0))
+ name = "alpha";
+
+#ifdef RE_ENABLE_I18N
+ /* Check the space of the arrays. */
+ if (BE (*char_class_alloc == mbcset->nchar_classes, 0))
+ {
+ /* Not enough, realloc it. */
+ /* +1 in case of mbcset->nchar_classes is 0. */
+ int new_char_class_alloc = 2 * mbcset->nchar_classes + 1;
+ /* Use realloc since array is NULL if *alloc == 0. */
+ wctype_t *new_char_classes = re_realloc (mbcset->char_classes, wctype_t,
+ new_char_class_alloc);
+ if (BE (new_char_classes == NULL, 0))
+ return REG_ESPACE;
+ mbcset->char_classes = new_char_classes;
+ *char_class_alloc = new_char_class_alloc;
+ }
+ mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name);
+#endif /* RE_ENABLE_I18N */
+
+#define BUILD_CHARCLASS_LOOP(ctype_func) \
+ do { \
+ if (BE (trans != NULL, 0)) \
+ { \
+ for (i = 0; i < SBC_MAX; ++i) \
+ if (ctype_func (i)) \
+ bitset_set (sbcset, trans[i]); \
+ } \
+ else \
+ { \
+ for (i = 0; i < SBC_MAX; ++i) \
+ if (ctype_func (i)) \
+ bitset_set (sbcset, i); \
+ } \
+ } while (0)
+
+ if (strcmp (name, "alnum") == 0)
+ BUILD_CHARCLASS_LOOP (isalnum);
+ else if (strcmp (name, "cntrl") == 0)
+ BUILD_CHARCLASS_LOOP (iscntrl);
+ else if (strcmp (name, "lower") == 0)
+ BUILD_CHARCLASS_LOOP (islower);
+ else if (strcmp (name, "space") == 0)
+ BUILD_CHARCLASS_LOOP (isspace);
+ else if (strcmp (name, "alpha") == 0)
+ BUILD_CHARCLASS_LOOP (isalpha);
+ else if (strcmp (name, "digit") == 0)
+ BUILD_CHARCLASS_LOOP (isdigit);
+ else if (strcmp (name, "print") == 0)
+ BUILD_CHARCLASS_LOOP (isprint);
+ else if (strcmp (name, "upper") == 0)
+ BUILD_CHARCLASS_LOOP (isupper);
+ else if (strcmp (name, "blank") == 0)
+ BUILD_CHARCLASS_LOOP (isblank);
+ else if (strcmp (name, "graph") == 0)
+ BUILD_CHARCLASS_LOOP (isgraph);
+ else if (strcmp (name, "punct") == 0)
+ BUILD_CHARCLASS_LOOP (ispunct);
+ else if (strcmp (name, "xdigit") == 0)
+ BUILD_CHARCLASS_LOOP (isxdigit);
+ else
+ return REG_ECTYPE;
+
+ return REG_NOERROR;
+}
+
+static bin_tree_t *
+build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
+ const unsigned char *class_name,
+ const unsigned char *extra, int non_match,
+ reg_errcode_t *err)
+{
+ re_bitset_ptr_t sbcset;
+#ifdef RE_ENABLE_I18N
+ re_charset_t *mbcset;
+ int alloc = 0;
+#endif /* not RE_ENABLE_I18N */
+ reg_errcode_t ret;
+ re_token_t br_token;
+ bin_tree_t *tree;
+
+ sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
+#ifdef RE_ENABLE_I18N
+ mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
+#endif /* RE_ENABLE_I18N */
+
+#ifdef RE_ENABLE_I18N
+ if (BE (sbcset == NULL || mbcset == NULL, 0))
+#else /* not RE_ENABLE_I18N */
+ if (BE (sbcset == NULL, 0))
+#endif /* not RE_ENABLE_I18N */
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+
+ if (non_match)
+ {
+#ifdef RE_ENABLE_I18N
+ mbcset->non_match = 1;
+#endif /* not RE_ENABLE_I18N */
+ }
+
+ /* We don't care the syntax in this case. */
+ ret = build_charclass (trans, sbcset,
+#ifdef RE_ENABLE_I18N
+ mbcset, &alloc,
+#endif /* RE_ENABLE_I18N */
+ class_name, 0);
+
+ if (BE (ret != REG_NOERROR, 0))
+ {
+ re_free (sbcset);
+#ifdef RE_ENABLE_I18N
+ free_charset (mbcset);
+#endif /* RE_ENABLE_I18N */
+ *err = ret;
+ return NULL;
+ }
+ /* \w match '_' also. */
+ for (; *extra; extra++)
+ bitset_set (sbcset, *extra);
+
+ /* If it is non-matching list. */
+ if (non_match)
+ bitset_not (sbcset);
+
+#ifdef RE_ENABLE_I18N
+ /* Ensure only single byte characters are set. */
+ if (dfa->mb_cur_max > 1)
+ bitset_mask (sbcset, dfa->sb_char);
+#endif
+
+ /* Build a tree for simple bracket. */
+ br_token.type = SIMPLE_BRACKET;
+ br_token.opr.sbcset = sbcset;
+ tree = create_token_tree (dfa, NULL, NULL, &br_token);
+ if (BE (tree == NULL, 0))
+ goto build_word_op_espace;
+
+#ifdef RE_ENABLE_I18N
+ if (dfa->mb_cur_max > 1)
+ {
+ bin_tree_t *mbc_tree;
+ /* Build a tree for complex bracket. */
+ br_token.type = COMPLEX_BRACKET;
+ br_token.opr.mbcset = mbcset;
+ dfa->has_mb_node = 1;
+ mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
+ if (BE (mbc_tree == NULL, 0))
+ goto build_word_op_espace;
+ /* Then join them by ALT node. */
+ tree = create_tree (dfa, tree, mbc_tree, OP_ALT);
+ if (BE (mbc_tree != NULL, 1))
+ return tree;
+ }
+ else
+ {
+ free_charset (mbcset);
+ return tree;
+ }
+#else /* not RE_ENABLE_I18N */
+ return tree;
+#endif /* not RE_ENABLE_I18N */
+
+ build_word_op_espace:
+ re_free (sbcset);
+#ifdef RE_ENABLE_I18N
+ free_charset (mbcset);
+#endif /* RE_ENABLE_I18N */
+ *err = REG_ESPACE;
+ return NULL;
+}
+
+/* This is intended for the expressions like "a{1,3}".
+ Fetch a number from `input', and return the number.
+ Return -1, if the number field is empty like "{,1}".
+ Return -2, If an error is occured. */
+
+static int
+fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax)
+{
+ int num = -1;
+ unsigned char c;
+ while (1)
+ {
+ fetch_token (token, input, syntax);
+ c = token->opr.c;
+ if (BE (token->type == END_OF_RE, 0))
+ return -2;
+ if (token->type == OP_CLOSE_DUP_NUM || c == ',')
+ break;
+ num = ((token->type != CHARACTER || c < '0' || '9' < c || num == -2)
+ ? -2 : ((num == -1) ? c - '0' : num * 10 + c - '0'));
+ num = (num > RE_DUP_MAX) ? -2 : num;
+ }
+ return num;
+}
+
+#ifdef RE_ENABLE_I18N
+static void
+free_charset (re_charset_t *cset)
+{
+ re_free (cset->mbchars);
+# ifdef _LIBC
+ re_free (cset->coll_syms);
+ re_free (cset->equiv_classes);
+ re_free (cset->range_starts);
+ re_free (cset->range_ends);
+# endif
+ re_free (cset->char_classes);
+ re_free (cset);
+}
+#endif /* RE_ENABLE_I18N */
+
+/* Functions for binary tree operation. */
+
+/* Create a tree node. */
+
+static bin_tree_t *
+create_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
+ re_token_type_t type)
+{
+ re_token_t t;
+ t.type = type;
+ return create_token_tree (dfa, left, right, &t);
+}
+
+static bin_tree_t *
+create_token_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
+ const re_token_t *token)
+{
+ bin_tree_t *tree;
+ if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0))
+ {
+ bin_tree_storage_t *storage = re_malloc (bin_tree_storage_t, 1);
+
+ if (storage == NULL)
+ return NULL;
+ storage->next = dfa->str_tree_storage;
+ dfa->str_tree_storage = storage;
+ dfa->str_tree_storage_idx = 0;
+ }
+ tree = &dfa->str_tree_storage->data[dfa->str_tree_storage_idx++];
+
+ tree->parent = NULL;
+ tree->left = left;
+ tree->right = right;
+ tree->token = *token;
+ tree->token.duplicated = 0;
+ tree->token.opt_subexp = 0;
+ tree->first = NULL;
+ tree->next = NULL;
+ tree->node_idx = -1;
+
+ if (left != NULL)
+ left->parent = tree;
+ if (right != NULL)
+ right->parent = tree;
+ return tree;
+}
+
+/* Mark the tree SRC as an optional subexpression.
+ To be called from preorder or postorder. */
+
+static reg_errcode_t
+mark_opt_subexp (void *extra, bin_tree_t *node)
+{
+ int idx = (int) (long) extra;
+ if (node->token.type == SUBEXP && node->token.opr.idx == idx)
+ node->token.opt_subexp = 1;
+
+ return REG_NOERROR;
+}
+
+/* Free the allocated memory inside NODE. */
+
+static void
+free_token (re_token_t *node)
+{
+#ifdef RE_ENABLE_I18N
+ if (node->type == COMPLEX_BRACKET && node->duplicated == 0)
+ free_charset (node->opr.mbcset);
+ else
+#endif /* RE_ENABLE_I18N */
+ if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
+ re_free (node->opr.sbcset);
+}
+
+/* Worker function for tree walking. Free the allocated memory inside NODE
+ and its children. */
+
+static reg_errcode_t
+free_tree (void *extra, bin_tree_t *node)
+{
+ free_token (&node->token);
+ return REG_NOERROR;
+}
+
+
+/* Duplicate the node SRC, and return new node. This is a preorder
+ visit similar to the one implemented by the generic visitor, but
+ we need more infrastructure to maintain two parallel trees --- so,
+ it's easier to duplicate. */
+
+static bin_tree_t *
+duplicate_tree (const bin_tree_t *root, re_dfa_t *dfa)
+{
+ const bin_tree_t *node;
+ bin_tree_t *dup_root;
+ bin_tree_t **p_new = &dup_root, *dup_node = root->parent;
+
+ for (node = root; ; )
+ {
+ /* Create a new tree and link it back to the current parent. */
+ *p_new = create_token_tree (dfa, NULL, NULL, &node->token);
+ if (*p_new == NULL)
+ return NULL;
+ (*p_new)->parent = dup_node;
+ (*p_new)->token.duplicated = 1;
+ dup_node = *p_new;
+
+ /* Go to the left node, or up and to the right. */
+ if (node->left)
+ {
+ node = node->left;
+ p_new = &dup_node->left;
+ }
+ else
+ {
+ const bin_tree_t *prev = NULL;
+ while (node->right == prev || node->right == NULL)
+ {
+ prev = node;
+ node = node->parent;
+ dup_node = dup_node->parent;
+ if (!node)
+ return dup_root;
+ }
+ node = node->right;
+ p_new = &dup_node->right;
+ }
+ }
+}
diff --git a/gnu_regex/regex.c b/gnu_regex/regex.c
new file mode 100644
index 0000000..bec9f9d
--- /dev/null
+++ b/gnu_regex/regex.c
@@ -0,0 +1,74 @@
+/* Extended regular expression matching and search library.
+ Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+/* Make sure noone compiles this code with a C++ compiler. */
+#ifdef __cplusplus
+# error "This is C code, use a C compiler"
+#endif
+
+#ifdef _LIBC
+/* We have to keep the namespace clean. */
+# define regfree(preg) __regfree (preg)
+# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
+# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
+# define regerror(errcode, preg, errbuf, errbuf_size) \
+ __regerror(errcode, preg, errbuf, errbuf_size)
+# define re_set_registers(bu, re, nu, st, en) \
+ __re_set_registers (bu, re, nu, st, en)
+# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
+ __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
+# define re_match(bufp, string, size, pos, regs) \
+ __re_match (bufp, string, size, pos, regs)
+# define re_search(bufp, string, size, startpos, range, regs) \
+ __re_search (bufp, string, size, startpos, range, regs)
+# define re_compile_pattern(pattern, length, bufp) \
+ __re_compile_pattern (pattern, length, bufp)
+# define re_set_syntax(syntax) __re_set_syntax (syntax)
+# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
+ __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
+# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
+
+# include "../locale/localeinfo.h"
+#endif
+
+/* On some systems, limits.h sets RE_DUP_MAX to a lower value than
+ GNU regex allows. Include it before <regex.h>, which correctly
+ #undefs RE_DUP_MAX and sets it to the right value. */
+#include <limits.h>
+
+#include "regex.h"
+#include "regex_internal.h"
+
+#include "regex_internal.c"
+#include "regcomp.c"
+#include "regexec.c"
+
+/* Binary backward compatibility. */
+#if _LIBC
+# include <shlib-compat.h>
+# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3)
+link_warning (re_max_failures, "the 're_max_failures' variable is obsolete and will go away.")
+int re_max_failures = 2000;
+# endif
+#endif
diff --git a/gnu_regex/regex.h b/gnu_regex/regex.h
new file mode 100644
index 0000000..2132772
--- /dev/null
+++ b/gnu_regex/regex.h
@@ -0,0 +1,575 @@
+/* Definitions for data structures and routines for the regular
+ expression library.
+ Copyright (C) 1985,1989-93,1995-98,2000,2001,2002,2003,2005,2006,2008
+ Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifndef _REGEX_H
+#define _REGEX_H 1
+
+#include <sys/types.h>
+
+/* Allow the use in C++ code. */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* The following two types have to be signed and unsigned integer type
+ wide enough to hold a value of a pointer. For most ANSI compilers
+ ptrdiff_t and size_t should be likely OK. Still size of these two
+ types is 2 for Microsoft C. Ugh... */
+typedef long int s_reg_t;
+typedef unsigned long int active_reg_t;
+
+/* The following bits are used to determine the regexp syntax we
+ recognize. The set/not-set meanings are chosen so that Emacs syntax
+ remains the value 0. The bits are given in alphabetical order, and
+ the definitions shifted by one from the previous bit; thus, when we
+ add or remove a bit, only one other definition need change. */
+typedef unsigned long int reg_syntax_t;
+
+#ifdef __USE_GNU
+/* If this bit is not set, then \ inside a bracket expression is literal.
+ If set, then such a \ quotes the following character. */
+# define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1)
+
+/* If this bit is not set, then + and ? are operators, and \+ and \? are
+ literals.
+ If set, then \+ and \? are operators and + and ? are literals. */
+# define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
+
+/* If this bit is set, then character classes are supported. They are:
+ [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
+ [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
+ If not set, then character classes are not supported. */
+# define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
+
+/* If this bit is set, then ^ and $ are always anchors (outside bracket
+ expressions, of course).
+ If this bit is not set, then it depends:
+ ^ is an anchor if it is at the beginning of a regular
+ expression or after an open-group or an alternation operator;
+ $ is an anchor if it is at the end of a regular expression, or
+ before a close-group or an alternation operator.
+
+ This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
+ POSIX draft 11.2 says that * etc. in leading positions is undefined.
+ We already implemented a previous draft which made those constructs
+ invalid, though, so we haven't changed the code back. */
+# define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
+
+/* If this bit is set, then special characters are always special
+ regardless of where they are in the pattern.
+ If this bit is not set, then special characters are special only in
+ some contexts; otherwise they are ordinary. Specifically,
+ * + ? and intervals are only special when not after the beginning,
+ open-group, or alternation operator. */
+# define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
+
+/* If this bit is set, then *, +, ?, and { cannot be first in an re or
+ immediately after an alternation or begin-group operator. */
+# define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
+
+/* If this bit is set, then . matches newline.
+ If not set, then it doesn't. */
+# define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
+
+/* If this bit is set, then . doesn't match NUL.
+ If not set, then it does. */
+# define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
+
+/* If this bit is set, nonmatching lists [^...] do not match newline.
+ If not set, they do. */
+# define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
+
+/* If this bit is set, either \{...\} or {...} defines an
+ interval, depending on RE_NO_BK_BRACES.
+ If not set, \{, \}, {, and } are literals. */
+# define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
+
+/* If this bit is set, +, ? and | aren't recognized as operators.
+ If not set, they are. */
+# define RE_LIMITED_OPS (RE_INTERVALS << 1)
+
+/* If this bit is set, newline is an alternation operator.
+ If not set, newline is literal. */
+# define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
+
+/* If this bit is set, then `{...}' defines an interval, and \{ and \}
+ are literals.
+ If not set, then `\{...\}' defines an interval. */
+# define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
+
+/* If this bit is set, (...) defines a group, and \( and \) are literals.
+ If not set, \(...\) defines a group, and ( and ) are literals. */
+# define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
+
+/* If this bit is set, then \<digit> matches <digit>.
+ If not set, then \<digit> is a back-reference. */
+# define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
+
+/* If this bit is set, then | is an alternation operator, and \| is literal.
+ If not set, then \| is an alternation operator, and | is literal. */
+# define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
+
+/* If this bit is set, then an ending range point collating higher
+ than the starting range point, as in [z-a], is invalid.
+ If not set, then when ending range point collates higher than the
+ starting range point, the range is ignored. */
+# define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
+
+/* If this bit is set, then an unmatched ) is ordinary.
+ If not set, then an unmatched ) is invalid. */
+# define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
+
+/* If this bit is set, succeed as soon as we match the whole pattern,
+ without further backtracking. */
+# define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
+
+/* If this bit is set, do not process the GNU regex operators.
+ If not set, then the GNU regex operators are recognized. */
+# define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1)
+
+/* If this bit is set, turn on internal regex debugging.
+ If not set, and debugging was on, turn it off.
+ This only works if regex.c is compiled -DDEBUG.
+ We define this bit always, so that all that's needed to turn on
+ debugging is to recompile regex.c; the calling code can always have
+ this bit set, and it won't affect anything in the normal case. */
+# define RE_DEBUG (RE_NO_GNU_OPS << 1)
+
+/* If this bit is set, a syntactically invalid interval is treated as
+ a string of ordinary characters. For example, the ERE 'a{1' is
+ treated as 'a\{1'. */
+# define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1)
+
+/* If this bit is set, then ignore case when matching.
+ If not set, then case is significant. */
+# define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1)
+
+/* This bit is used internally like RE_CONTEXT_INDEP_ANCHORS but only
+ for ^, because it is difficult to scan the regex backwards to find
+ whether ^ should be special. */
+# define RE_CARET_ANCHORS_HERE (RE_ICASE << 1)
+
+/* If this bit is set, then \{ cannot be first in an bre or
+ immediately after an alternation or begin-group operator. */
+# define RE_CONTEXT_INVALID_DUP (RE_CARET_ANCHORS_HERE << 1)
+
+/* If this bit is set, then no_sub will be set to 1 during
+ re_compile_pattern. */
+# define RE_NO_SUB (RE_CONTEXT_INVALID_DUP << 1)
+#endif
+
+/* This global variable defines the particular regexp syntax to use (for
+ some interfaces). When a regexp is compiled, the syntax used is
+ stored in the pattern buffer, so changing this does not affect
+ already-compiled regexps. */
+extern reg_syntax_t re_syntax_options;
+
+#ifdef __USE_GNU
+/* Define combinations of the above bits for the standard possibilities.
+ (The [[[ comments delimit what gets put into the Texinfo file, so
+ don't delete them!) */
+/* [[[begin syntaxes]]] */
+#define RE_SYNTAX_EMACS 0
+
+#define RE_SYNTAX_AWK \
+ (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \
+ | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \
+ | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
+
+#define RE_SYNTAX_GNU_AWK \
+ ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG) \
+ & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS \
+ | RE_CONTEXT_INVALID_OPS ))
+
+#define RE_SYNTAX_POSIX_AWK \
+ (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \
+ | RE_INTERVALS | RE_NO_GNU_OPS)
+
+#define RE_SYNTAX_GREP \
+ (RE_BK_PLUS_QM | RE_CHAR_CLASSES \
+ | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \
+ | RE_NEWLINE_ALT)
+
+#define RE_SYNTAX_EGREP \
+ (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \
+ | RE_NEWLINE_ALT | RE_NO_BK_PARENS \
+ | RE_NO_BK_VBAR)
+
+#define RE_SYNTAX_POSIX_EGREP \
+ (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES \
+ | RE_INVALID_INTERVAL_ORD)
+
+/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
+#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
+
+#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
+
+/* Syntax bits common to both basic and extended POSIX regex syntax. */
+#define _RE_SYNTAX_POSIX_COMMON \
+ (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \
+ | RE_INTERVALS | RE_NO_EMPTY_RANGES)
+
+#define RE_SYNTAX_POSIX_BASIC \
+ (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM | RE_CONTEXT_INVALID_DUP)
+
+/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
+ RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this
+ isn't minimal, since other operators, such as \`, aren't disabled. */
+#define RE_SYNTAX_POSIX_MINIMAL_BASIC \
+ (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
+
+#define RE_SYNTAX_POSIX_EXTENDED \
+ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \
+ | RE_NO_BK_PARENS | RE_NO_BK_VBAR \
+ | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD)
+
+/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is
+ removed and RE_NO_BK_REFS is added. */
+#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \
+ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \
+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \
+ | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD)
+/* [[[end syntaxes]]] */
+
+/* Maximum number of duplicates an interval can allow. Some systems
+ (erroneously) define this in other header files, but we want our
+ value, so remove any previous define. */
+# ifdef RE_DUP_MAX
+# undef RE_DUP_MAX
+# endif
+/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows. */
+# define RE_DUP_MAX (0x7fff)
+#endif
+
+
+/* POSIX `cflags' bits (i.e., information for `regcomp'). */
+
+/* If this bit is set, then use extended regular expression syntax.
+ If not set, then use basic regular expression syntax. */
+#define REG_EXTENDED 1
+
+/* If this bit is set, then ignore case when matching.
+ If not set, then case is significant. */
+#define REG_ICASE (REG_EXTENDED << 1)
+
+/* If this bit is set, then anchors do not match at newline
+ characters in the string.
+ If not set, then anchors do match at newlines. */
+#define REG_NEWLINE (REG_ICASE << 1)
+
+/* If this bit is set, then report only success or fail in regexec.
+ If not set, then returns differ between not matching and errors. */
+#define REG_NOSUB (REG_NEWLINE << 1)
+
+
+/* POSIX `eflags' bits (i.e., information for regexec). */
+
+/* If this bit is set, then the beginning-of-line operator doesn't match
+ the beginning of the string (presumably because it's not the
+ beginning of a line).
+ If not set, then the beginning-of-line operator does match the
+ beginning of the string. */
+#define REG_NOTBOL 1
+
+/* Like REG_NOTBOL, except for the end-of-line. */
+#define REG_NOTEOL (1 << 1)
+
+/* Use PMATCH[0] to delimit the start and end of the search in the
+ buffer. */
+#define REG_STARTEND (1 << 2)
+
+
+/* If any error codes are removed, changed, or added, update the
+ `re_error_msg' table in regex.c. */
+typedef enum
+{
+#if defined _XOPEN_SOURCE || defined __USE_XOPEN2K
+ REG_ENOSYS = -1, /* This will never happen for this implementation. */
+#endif
+
+ REG_NOERROR = 0, /* Success. */
+ REG_NOMATCH, /* Didn't find a match (for regexec). */
+
+ /* POSIX regcomp return error codes. (In the order listed in the
+ standard.) */
+ REG_BADPAT, /* Invalid pattern. */
+ REG_ECOLLATE, /* Inalid collating element. */
+ REG_ECTYPE, /* Invalid character class name. */
+ REG_EESCAPE, /* Trailing backslash. */
+ REG_ESUBREG, /* Invalid back reference. */
+ REG_EBRACK, /* Unmatched left bracket. */
+ REG_EPAREN, /* Parenthesis imbalance. */
+ REG_EBRACE, /* Unmatched \{. */
+ REG_BADBR, /* Invalid contents of \{\}. */
+ REG_ERANGE, /* Invalid range end. */
+ REG_ESPACE, /* Ran out of memory. */
+ REG_BADRPT, /* No preceding re for repetition op. */
+
+ /* Error codes we've added. */
+ REG_EEND, /* Premature end. */
+ REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */
+ REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */
+} reg_errcode_t;
+
+/* This data structure represents a compiled pattern. Before calling
+ the pattern compiler, the fields `buffer', `allocated', `fastmap',
+ `translate', and `no_sub' can be set. After the pattern has been
+ compiled, the `re_nsub' field is available. All other fields are
+ private to the regex routines. */
+
+#ifndef RE_TRANSLATE_TYPE
+# define __RE_TRANSLATE_TYPE unsigned char *
+# ifdef __USE_GNU
+# define RE_TRANSLATE_TYPE __RE_TRANSLATE_TYPE
+# endif
+#endif
+
+#ifdef __USE_GNU
+# define __REPB_PREFIX(name) name
+#else
+# define __REPB_PREFIX(name) __##name
+#endif
+
+struct re_pattern_buffer
+{
+ /* Space that holds the compiled pattern. It is declared as
+ `unsigned char *' because its elements are sometimes used as
+ array indexes. */
+ unsigned char *__REPB_PREFIX(buffer);
+
+ /* Number of bytes to which `buffer' points. */
+ unsigned long int __REPB_PREFIX(allocated);
+
+ /* Number of bytes actually used in `buffer'. */
+ unsigned long int __REPB_PREFIX(used);
+
+ /* Syntax setting with which the pattern was compiled. */
+ reg_syntax_t __REPB_PREFIX(syntax);
+
+ /* Pointer to a fastmap, if any, otherwise zero. re_search uses the
+ fastmap, if there is one, to skip over impossible starting points
+ for matches. */
+ char *__REPB_PREFIX(fastmap);
+
+ /* Either a translate table to apply to all characters before
+ comparing them, or zero for no translation. The translation is
+ applied to a pattern when it is compiled and to a string when it
+ is matched. */
+ __RE_TRANSLATE_TYPE __REPB_PREFIX(translate);
+
+ /* Number of subexpressions found by the compiler. */
+ size_t re_nsub;
+
+ /* Zero if this pattern cannot match the empty string, one else.
+ Well, in truth it's used only in `re_search_2', to see whether or
+ not we should use the fastmap, so we don't set this absolutely
+ perfectly; see `re_compile_fastmap' (the `duplicate' case). */
+ unsigned __REPB_PREFIX(can_be_null) : 1;
+
+ /* If REGS_UNALLOCATED, allocate space in the `regs' structure
+ for `max (RE_NREGS, re_nsub + 1)' groups.
+ If REGS_REALLOCATE, reallocate space if necessary.
+ If REGS_FIXED, use what's there. */
+#ifdef __USE_GNU
+# define REGS_UNALLOCATED 0
+# define REGS_REALLOCATE 1
+# define REGS_FIXED 2
+#endif
+ unsigned __REPB_PREFIX(regs_allocated) : 2;
+
+ /* Set to zero when `regex_compile' compiles a pattern; set to one
+ by `re_compile_fastmap' if it updates the fastmap. */
+ unsigned __REPB_PREFIX(fastmap_accurate) : 1;
+
+ /* If set, `re_match_2' does not return information about
+ subexpressions. */
+ unsigned __REPB_PREFIX(no_sub) : 1;
+
+ /* If set, a beginning-of-line anchor doesn't match at the beginning
+ of the string. */
+ unsigned __REPB_PREFIX(not_bol) : 1;
+
+ /* Similarly for an end-of-line anchor. */
+ unsigned __REPB_PREFIX(not_eol) : 1;
+
+ /* If true, an anchor at a newline matches. */
+ unsigned __REPB_PREFIX(newline_anchor) : 1;
+};
+
+typedef struct re_pattern_buffer regex_t;
+
+/* Type for byte offsets within the string. POSIX mandates this. */
+typedef int regoff_t;
+
+
+#ifdef __USE_GNU
+/* This is the structure we store register match data in. See
+ regex.texinfo for a full description of what registers match. */
+struct re_registers
+{
+ unsigned num_regs;
+ regoff_t *start;
+ regoff_t *end;
+};
+
+
+/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
+ `re_match_2' returns information about at least this many registers
+ the first time a `regs' structure is passed. */
+# ifndef RE_NREGS
+# define RE_NREGS 30
+# endif
+#endif
+
+
+/* POSIX specification for registers. Aside from the different names than
+ `re_registers', POSIX uses an array of structures, instead of a
+ structure of arrays. */
+typedef struct
+{
+ regoff_t rm_so; /* Byte offset from string's start to substring's start. */
+ regoff_t rm_eo; /* Byte offset from string's start to substring's end. */
+} regmatch_t;
+
+/* Declarations for routines. */
+
+#ifdef __USE_GNU
+/* Sets the current default syntax to SYNTAX, and return the old syntax.
+ You can also simply assign to the `re_syntax_options' variable. */
+extern reg_syntax_t re_set_syntax (reg_syntax_t __syntax);
+
+/* Compile the regular expression PATTERN, with length LENGTH
+ and syntax given by the global `re_syntax_options', into the buffer
+ BUFFER. Return NULL if successful, and an error string if not. */
+extern const char *re_compile_pattern (const char *__pattern, size_t __length,
+ struct re_pattern_buffer *__buffer);
+
+
+/* Compile a fastmap for the compiled pattern in BUFFER; used to
+ accelerate searches. Return 0 if successful and -2 if was an
+ internal error. */
+extern int re_compile_fastmap (struct re_pattern_buffer *__buffer);
+
+
+/* Search in the string STRING (with length LENGTH) for the pattern
+ compiled into BUFFER. Start searching at position START, for RANGE
+ characters. Return the starting position of the match, -1 for no
+ match, or -2 for an internal error. Also return register
+ information in REGS (if REGS and BUFFER->no_sub are nonzero). */
+extern int re_search (struct re_pattern_buffer *__buffer, const char *__string,
+ int __length, int __start, int __range,
+ struct re_registers *__regs);
+
+
+/* Like `re_search', but search in the concatenation of STRING1 and
+ STRING2. Also, stop searching at index START + STOP. */
+extern int re_search_2 (struct re_pattern_buffer *__buffer,
+ const char *__string1, int __length1,
+ const char *__string2, int __length2, int __start,
+ int __range, struct re_registers *__regs, int __stop);
+
+
+/* Like `re_search', but return how many characters in STRING the regexp
+ in BUFFER matched, starting at position START. */
+extern int re_match (struct re_pattern_buffer *__buffer, const char *__string,
+ int __length, int __start, struct re_registers *__regs);
+
+
+/* Relates to `re_match' as `re_search_2' relates to `re_search'. */
+extern int re_match_2 (struct re_pattern_buffer *__buffer,
+ const char *__string1, int __length1,
+ const char *__string2, int __length2, int __start,
+ struct re_registers *__regs, int __stop);
+
+
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+ ENDS. Subsequent matches using BUFFER and REGS will use this memory
+ for recording register information. STARTS and ENDS must be
+ allocated with malloc, and must each be at least `NUM_REGS * sizeof
+ (regoff_t)' bytes long.
+
+ If NUM_REGS == 0, then subsequent matches should allocate their own
+ register data.
+
+ Unless this function is called, the first search or match using
+ PATTERN_BUFFER will allocate its own register data, without
+ freeing the old data. */
+extern void re_set_registers (struct re_pattern_buffer *__buffer,
+ struct re_registers *__regs,
+ unsigned int __num_regs,
+ regoff_t *__starts, regoff_t *__ends);
+#endif /* Use GNU */
+
+#if defined _REGEX_RE_COMP || (defined _LIBC && defined __USE_BSD)
+# ifndef _CRAY
+/* 4.2 bsd compatibility. */
+extern char *re_comp (const char *);
+extern int re_exec (const char *);
+# endif
+#endif
+
+/* GCC 2.95 and later have "__restrict"; C99 compilers have
+ "restrict", and "configure" may have defined "restrict". */
+#ifndef __restrict
+# if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__))
+# if defined restrict || 199901L <= __STDC_VERSION__
+# define __restrict restrict
+# else
+# define __restrict
+# endif
+# endif
+#endif
+/* gcc 3.1 and up support the [restrict] syntax. */
+#ifndef __restrict_arr
+# if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) \
+ && !defined __GNUG__
+# define __restrict_arr __restrict
+# else
+# define __restrict_arr
+# endif
+#endif
+
+/* POSIX compatibility. */
+extern int regcomp (regex_t *__restrict __preg,
+ const char *__restrict __pattern,
+ int __cflags);
+
+extern int regexec (const regex_t *__restrict __preg,
+ const char *__restrict __string, size_t __nmatch,
+ regmatch_t __pmatch[__restrict_arr],
+ int __eflags);
+
+extern size_t regerror (int __errcode, const regex_t *__restrict __preg,
+ char *__restrict __errbuf, size_t __errbuf_size);
+
+extern void regfree (regex_t *__preg);
+
+
+#ifdef __cplusplus
+}
+#endif /* C++ */
+
+#endif /* regex.h */
diff --git a/gnu_regex/regex_internal.c b/gnu_regex/regex_internal.c
new file mode 100644
index 0000000..c9da2b9
--- /dev/null
+++ b/gnu_regex/regex_internal.c
@@ -0,0 +1,1713 @@
+/* Extended regular expression matching and search library.
+ Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+static void re_string_construct_common (const char *str, int len,
+ re_string_t *pstr,
+ RE_TRANSLATE_TYPE trans, int icase,
+ const re_dfa_t *dfa) internal_function;
+static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
+ const re_node_set *nodes,
+ unsigned int hash) internal_function;
+static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
+ const re_node_set *nodes,
+ unsigned int context,
+ unsigned int hash) internal_function;
+
+/* Functions for string operation. */
+
+/* This function allocate the buffers. It is necessary to call
+ re_string_reconstruct before using the object. */
+
+static reg_errcode_t
+internal_function
+re_string_allocate (re_string_t *pstr, const char *str, int len, int init_len,
+ RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa)
+{
+ reg_errcode_t ret;
+ int init_buf_len;
+
+ /* Ensure at least one character fits into the buffers. */
+ if (init_len < dfa->mb_cur_max)
+ init_len = dfa->mb_cur_max;
+ init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
+ re_string_construct_common (str, len, pstr, trans, icase, dfa);
+
+ ret = re_string_realloc_buffers (pstr, init_buf_len);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+
+ pstr->word_char = dfa->word_char;
+ pstr->word_ops_used = dfa->word_ops_used;
+ pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
+ pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
+ pstr->valid_raw_len = pstr->valid_len;
+ return REG_NOERROR;
+}
+
+/* This function allocate the buffers, and initialize them. */
+
+static reg_errcode_t
+internal_function
+re_string_construct (re_string_t *pstr, const char *str, int len,
+ RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa)
+{
+ reg_errcode_t ret;
+ memset (pstr, '\0', sizeof (re_string_t));
+ re_string_construct_common (str, len, pstr, trans, icase, dfa);
+
+ if (len > 0)
+ {
+ ret = re_string_realloc_buffers (pstr, len + 1);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+ }
+ pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
+
+ if (icase)
+ {
+#ifdef RE_ENABLE_I18N
+ if (dfa->mb_cur_max > 1)
+ {
+ while (1)
+ {
+ ret = build_wcs_upper_buffer (pstr);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+ if (pstr->valid_raw_len >= len)
+ break;
+ if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
+ break;
+ ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+ }
+ }
+ else
+#endif /* RE_ENABLE_I18N */
+ build_upper_buffer (pstr);
+ }
+ else
+ {
+#ifdef RE_ENABLE_I18N
+ if (dfa->mb_cur_max > 1)
+ build_wcs_buffer (pstr);
+ else
+#endif /* RE_ENABLE_I18N */
+ {
+ if (trans != NULL)
+ re_string_translate_buffer (pstr);
+ else
+ {
+ pstr->valid_len = pstr->bufs_len;
+ pstr->valid_raw_len = pstr->bufs_len;
+ }
+ }
+ }
+
+ return REG_NOERROR;
+}
+
+/* Helper functions for re_string_allocate, and re_string_construct. */
+
+static reg_errcode_t
+internal_function
+re_string_realloc_buffers (re_string_t *pstr, int new_buf_len)
+{
+#ifdef RE_ENABLE_I18N
+ if (pstr->mb_cur_max > 1)
+ {
+ wint_t *new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
+ if (BE (new_wcs == NULL, 0))
+ return REG_ESPACE;
+ pstr->wcs = new_wcs;
+ if (pstr->offsets != NULL)
+ {
+ int *new_offsets = re_realloc (pstr->offsets, int, new_buf_len);
+ if (BE (new_offsets == NULL, 0))
+ return REG_ESPACE;
+ pstr->offsets = new_offsets;
+ }
+ }
+#endif /* RE_ENABLE_I18N */
+ if (pstr->mbs_allocated)
+ {
+ unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
+ new_buf_len);
+ if (BE (new_mbs == NULL, 0))
+ return REG_ESPACE;
+ pstr->mbs = new_mbs;
+ }
+ pstr->bufs_len = new_buf_len;
+ return REG_NOERROR;
+}
+
+
+static void
+internal_function
+re_string_construct_common (const char *str, int len, re_string_t *pstr,
+ RE_TRANSLATE_TYPE trans, int icase,
+ const re_dfa_t *dfa)
+{
+ pstr->raw_mbs = (const unsigned char *) str;
+ pstr->len = len;
+ pstr->raw_len = len;
+ pstr->trans = trans;
+ pstr->icase = icase ? 1 : 0;
+ pstr->mbs_allocated = (trans != NULL || icase);
+ pstr->mb_cur_max = dfa->mb_cur_max;
+ pstr->is_utf8 = dfa->is_utf8;
+ pstr->map_notascii = dfa->map_notascii;
+ pstr->stop = pstr->len;
+ pstr->raw_stop = pstr->stop;
+}
+
+#ifdef RE_ENABLE_I18N
+
+/* Build wide character buffer PSTR->WCS.
+ If the byte sequence of the string are:
+ <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
+ Then wide character buffer will be:
+ <wc1> , WEOF , <wc2> , WEOF , <wc3>
+ We use WEOF for padding, they indicate that the position isn't
+ a first byte of a multibyte character.
+
+ Note that this function assumes PSTR->VALID_LEN elements are already
+ built and starts from PSTR->VALID_LEN. */
+
+static void
+internal_function
+build_wcs_buffer (re_string_t *pstr)
+{
+#ifdef _LIBC
+ unsigned char buf[MB_LEN_MAX];
+ assert (MB_LEN_MAX >= pstr->mb_cur_max);
+#else
+ unsigned char buf[64];
+#endif
+ mbstate_t prev_st;
+ int byte_idx, end_idx, remain_len;
+ size_t mbclen;
+
+ /* Build the buffers from pstr->valid_len to either pstr->len or
+ pstr->bufs_len. */
+ end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
+ for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
+ {
+ wchar_t wc;
+ const char *p;
+
+ remain_len = end_idx - byte_idx;
+ prev_st = pstr->cur_state;
+ /* Apply the translation if we need. */
+ if (BE (pstr->trans != NULL, 0))
+ {
+ int i, ch;
+
+ for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
+ {
+ ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
+ buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
+ }
+ p = (const char *) buf;
+ }
+ else
+ p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
+ mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
+ if (BE (mbclen == (size_t) -2, 0))
+ {
+ /* The buffer doesn't have enough space, finish to build. */
+ pstr->cur_state = prev_st;
+ break;
+ }
+ else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
+ {
+ /* We treat these cases as a singlebyte character. */
+ mbclen = 1;
+ wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
+ if (BE (pstr->trans != NULL, 0))
+ wc = pstr->trans[wc];
+ pstr->cur_state = prev_st;
+ }
+
+ /* Write wide character and padding. */
+ pstr->wcs[byte_idx++] = wc;
+ /* Write paddings. */
+ for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
+ pstr->wcs[byte_idx++] = WEOF;
+ }
+ pstr->valid_len = byte_idx;
+ pstr->valid_raw_len = byte_idx;
+}
+
+/* Build wide character buffer PSTR->WCS like build_wcs_buffer,
+ but for REG_ICASE. */
+
+static reg_errcode_t
+internal_function
+build_wcs_upper_buffer (re_string_t *pstr)
+{
+ mbstate_t prev_st;
+ int src_idx, byte_idx, end_idx, remain_len;
+ size_t mbclen;
+#ifdef _LIBC
+ char buf[MB_LEN_MAX];
+ assert (MB_LEN_MAX >= pstr->mb_cur_max);
+#else
+ char buf[64];
+#endif
+
+ byte_idx = pstr->valid_len;
+ end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
+
+ /* The following optimization assumes that ASCII characters can be
+ mapped to wide characters with a simple cast. */
+ if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
+ {
+ while (byte_idx < end_idx)
+ {
+ wchar_t wc;
+
+ if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
+ && mbsinit (&pstr->cur_state))
+ {
+ /* In case of a singlebyte character. */
+ pstr->mbs[byte_idx]
+ = toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
+ /* The next step uses the assumption that wchar_t is encoded
+ ASCII-safe: all ASCII values can be converted like this. */
+ pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
+ ++byte_idx;
+ continue;
+ }
+
+ remain_len = end_idx - byte_idx;
+ prev_st = pstr->cur_state;
+ mbclen = __mbrtowc (&wc,
+ ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
+ + byte_idx), remain_len, &pstr->cur_state);
+ if (BE (mbclen + 2 > 2, 1))
+ {
+ wchar_t wcu = wc;
+ if (iswlower (wc))
+ {
+ size_t mbcdlen;
+
+ wcu = towupper (wc);
+ mbcdlen = wcrtomb (buf, wcu, &prev_st);
+ if (BE (mbclen == mbcdlen, 1))
+ memcpy (pstr->mbs + byte_idx, buf, mbclen);
+ else
+ {
+ src_idx = byte_idx;
+ goto offsets_needed;
+ }
+ }
+ else
+ memcpy (pstr->mbs + byte_idx,
+ pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
+ pstr->wcs[byte_idx++] = wcu;
+ /* Write paddings. */
+ for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
+ pstr->wcs[byte_idx++] = WEOF;
+ }
+ else if (mbclen == (size_t) -1 || mbclen == 0)
+ {
+ /* It is an invalid character or '\0'. Just use the byte. */
+ int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
+ pstr->mbs[byte_idx] = ch;
+ /* And also cast it to wide char. */
+ pstr->wcs[byte_idx++] = (wchar_t) ch;
+ if (BE (mbclen == (size_t) -1, 0))
+ pstr->cur_state = prev_st;
+ }
+ else
+ {
+ /* The buffer doesn't have enough space, finish to build. */
+ pstr->cur_state = prev_st;
+ break;
+ }
+ }
+ pstr->valid_len = byte_idx;
+ pstr->valid_raw_len = byte_idx;
+ return REG_NOERROR;
+ }
+ else
+ for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
+ {
+ wchar_t wc;
+ const char *p;
+ offsets_needed:
+ remain_len = end_idx - byte_idx;
+ prev_st = pstr->cur_state;
+ if (BE (pstr->trans != NULL, 0))
+ {
+ int i, ch;
+
+ for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
+ {
+ ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
+ buf[i] = pstr->trans[ch];
+ }
+ p = (const char *) buf;
+ }
+ else
+ p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
+ mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
+ if (BE (mbclen + 2 > 2, 1))
+ {
+ wchar_t wcu = wc;
+ if (iswlower (wc))
+ {
+ size_t mbcdlen;
+
+ wcu = towupper (wc);
+ mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
+ if (BE (mbclen == mbcdlen, 1))
+ memcpy (pstr->mbs + byte_idx, buf, mbclen);
+ else if (mbcdlen != (size_t) -1)
+ {
+ size_t i;
+
+ if (byte_idx + mbcdlen > pstr->bufs_len)
+ {
+ pstr->cur_state = prev_st;
+ break;
+ }
+
+ if (pstr->offsets == NULL)
+ {
+ pstr->offsets = re_malloc (int, pstr->bufs_len);
+
+ if (pstr->offsets == NULL)
+ return REG_ESPACE;
+ }
+ if (!pstr->offsets_needed)
+ {
+ for (i = 0; i < (size_t) byte_idx; ++i)
+ pstr->offsets[i] = i;
+ pstr->offsets_needed = 1;
+ }
+
+ memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
+ pstr->wcs[byte_idx] = wcu;
+ pstr->offsets[byte_idx] = src_idx;
+ for (i = 1; i < mbcdlen; ++i)
+ {
+ pstr->offsets[byte_idx + i]
+ = src_idx + (i < mbclen ? i : mbclen - 1);
+ pstr->wcs[byte_idx + i] = WEOF;
+ }
+ pstr->len += mbcdlen - mbclen;
+ if (pstr->raw_stop > src_idx)
+ pstr->stop += mbcdlen - mbclen;
+ end_idx = (pstr->bufs_len > pstr->len)
+ ? pstr->len : pstr->bufs_len;
+ byte_idx += mbcdlen;
+ src_idx += mbclen;
+ continue;
+ }
+ else
+ memcpy (pstr->mbs + byte_idx, p, mbclen);
+ }
+ else
+ memcpy (pstr->mbs + byte_idx, p, mbclen);
+
+ if (BE (pstr->offsets_needed != 0, 0))
+ {
+ size_t i;
+ for (i = 0; i < mbclen; ++i)
+ pstr->offsets[byte_idx + i] = src_idx + i;
+ }
+ src_idx += mbclen;
+
+ pstr->wcs[byte_idx++] = wcu;
+ /* Write paddings. */
+ for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
+ pstr->wcs[byte_idx++] = WEOF;
+ }
+ else if (mbclen == (size_t) -1 || mbclen == 0)
+ {
+ /* It is an invalid character or '\0'. Just use the byte. */
+ int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
+
+ if (BE (pstr->trans != NULL, 0))
+ ch = pstr->trans [ch];
+ pstr->mbs[byte_idx] = ch;
+
+ if (BE (pstr->offsets_needed != 0, 0))
+ pstr->offsets[byte_idx] = src_idx;
+ ++src_idx;
+
+ /* And also cast it to wide char. */
+ pstr->wcs[byte_idx++] = (wchar_t) ch;
+ if (BE (mbclen == (size_t) -1, 0))
+ pstr->cur_state = prev_st;
+ }
+ else
+ {
+ /* The buffer doesn't have enough space, finish to build. */
+ pstr->cur_state = prev_st;
+ break;
+ }
+ }
+ pstr->valid_len = byte_idx;
+ pstr->valid_raw_len = src_idx;
+ return REG_NOERROR;
+}
+
+/* Skip characters until the index becomes greater than NEW_RAW_IDX.
+ Return the index. */
+
+static int
+internal_function
+re_string_skip_chars (re_string_t *pstr, int new_raw_idx, wint_t *last_wc)
+{
+ mbstate_t prev_st;
+ int rawbuf_idx;
+ size_t mbclen;
+ wchar_t wc = WEOF;
+
+ /* Skip the characters which are not necessary to check. */
+ for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
+ rawbuf_idx < new_raw_idx;)
+ {
+ int remain_len;
+ remain_len = pstr->len - rawbuf_idx;
+ prev_st = pstr->cur_state;
+ mbclen = __mbrtowc (&wc, (const char *) pstr->raw_mbs + rawbuf_idx,
+ remain_len, &pstr->cur_state);
+ if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
+ {
+ /* We treat these cases as a single byte character. */
+ if (mbclen == 0 || remain_len == 0)
+ wc = L'\0';
+ else
+ wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx);
+ mbclen = 1;
+ pstr->cur_state = prev_st;
+ }
+ /* Then proceed the next character. */
+ rawbuf_idx += mbclen;
+ }
+ *last_wc = (wint_t) wc;
+ return rawbuf_idx;
+}
+#endif /* RE_ENABLE_I18N */
+
+/* Build the buffer PSTR->MBS, and apply the translation if we need.
+ This function is used in case of REG_ICASE. */
+
+static void
+internal_function
+build_upper_buffer (re_string_t *pstr)
+{
+ int char_idx, end_idx;
+ end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
+
+ for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
+ {
+ int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
+ if (BE (pstr->trans != NULL, 0))
+ ch = pstr->trans[ch];
+ if (islower (ch))
+ pstr->mbs[char_idx] = toupper (ch);
+ else
+ pstr->mbs[char_idx] = ch;
+ }
+ pstr->valid_len = char_idx;
+ pstr->valid_raw_len = char_idx;
+}
+
+/* Apply TRANS to the buffer in PSTR. */
+
+static void
+internal_function
+re_string_translate_buffer (re_string_t *pstr)
+{
+ int buf_idx, end_idx;
+ end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
+
+ for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
+ {
+ int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
+ pstr->mbs[buf_idx] = pstr->trans[ch];
+ }
+
+ pstr->valid_len = buf_idx;
+ pstr->valid_raw_len = buf_idx;
+}
+
+/* This function re-construct the buffers.
+ Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
+ convert to upper case in case of REG_ICASE, apply translation. */
+
+static reg_errcode_t
+internal_function
+re_string_reconstruct (re_string_t *pstr, int idx, int eflags)
+{
+ int offset = idx - pstr->raw_mbs_idx;
+ if (BE (offset < 0, 0))
+ {
+ /* Reset buffer. */
+#ifdef RE_ENABLE_I18N
+ if (pstr->mb_cur_max > 1)
+ memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
+#endif /* RE_ENABLE_I18N */
+ pstr->len = pstr->raw_len;
+ pstr->stop = pstr->raw_stop;
+ pstr->valid_len = 0;
+ pstr->raw_mbs_idx = 0;
+ pstr->valid_raw_len = 0;
+ pstr->offsets_needed = 0;
+ pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
+ : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
+ if (!pstr->mbs_allocated)
+ pstr->mbs = (unsigned char *) pstr->raw_mbs;
+ offset = idx;
+ }
+
+ if (BE (offset != 0, 1))
+ {
+ /* Should the already checked characters be kept? */
+ if (BE (offset < pstr->valid_raw_len, 1))
+ {
+ /* Yes, move them to the front of the buffer. */
+#ifdef RE_ENABLE_I18N
+ if (BE (pstr->offsets_needed, 0))
+ {
+ int low = 0, high = pstr->valid_len, mid;
+ do
+ {
+ mid = (high + low) / 2;
+ if (pstr->offsets[mid] > offset)
+ high = mid;
+ else if (pstr->offsets[mid] < offset)
+ low = mid + 1;
+ else
+ break;
+ }
+ while (low < high);
+ if (pstr->offsets[mid] < offset)
+ ++mid;
+ pstr->tip_context = re_string_context_at (pstr, mid - 1,
+ eflags);
+ /* This can be quite complicated, so handle specially
+ only the common and easy case where the character with
+ different length representation of lower and upper
+ case is present at or after offset. */
+ if (pstr->valid_len > offset
+ && mid == offset && pstr->offsets[mid] == offset)
+ {
+ memmove (pstr->wcs, pstr->wcs + offset,
+ (pstr->valid_len - offset) * sizeof (wint_t));
+ memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
+ pstr->valid_len -= offset;
+ pstr->valid_raw_len -= offset;
+ for (low = 0; low < pstr->valid_len; low++)
+ pstr->offsets[low] = pstr->offsets[low + offset] - offset;
+ }
+ else
+ {
+ /* Otherwise, just find out how long the partial multibyte
+ character at offset is and fill it with WEOF/255. */
+ pstr->len = pstr->raw_len - idx + offset;
+ pstr->stop = pstr->raw_stop - idx + offset;
+ pstr->offsets_needed = 0;
+ while (mid > 0 && pstr->offsets[mid - 1] == offset)
+ --mid;
+ while (mid < pstr->valid_len)
+ if (pstr->wcs[mid] != WEOF)
+ break;
+ else
+ ++mid;
+ if (mid == pstr->valid_len)
+ pstr->valid_len = 0;
+ else
+ {
+ pstr->valid_len = pstr->offsets[mid] - offset;
+ if (pstr->valid_len)
+ {
+ for (low = 0; low < pstr->valid_len; ++low)
+ pstr->wcs[low] = WEOF;
+ memset (pstr->mbs, 255, pstr->valid_len);
+ }
+ }
+ pstr->valid_raw_len = pstr->valid_len;
+ }
+ }
+ else
+#endif
+ {
+ pstr->tip_context = re_string_context_at (pstr, offset - 1,
+ eflags);
+#ifdef RE_ENABLE_I18N
+ if (pstr->mb_cur_max > 1)
+ memmove (pstr->wcs, pstr->wcs + offset,
+ (pstr->valid_len - offset) * sizeof (wint_t));
+#endif /* RE_ENABLE_I18N */
+ if (BE (pstr->mbs_allocated, 0))
+ memmove (pstr->mbs, pstr->mbs + offset,
+ pstr->valid_len - offset);
+ pstr->valid_len -= offset;
+ pstr->valid_raw_len -= offset;
+#if DEBUG
+ assert (pstr->valid_len > 0);
+#endif
+ }
+ }
+ else
+ {
+ /* No, skip all characters until IDX. */
+ int prev_valid_len = pstr->valid_len;
+
+#ifdef RE_ENABLE_I18N
+ if (BE (pstr->offsets_needed, 0))
+ {
+ pstr->len = pstr->raw_len - idx + offset;
+ pstr->stop = pstr->raw_stop - idx + offset;
+ pstr->offsets_needed = 0;
+ }
+#endif
+ pstr->valid_len = 0;
+#ifdef RE_ENABLE_I18N
+ if (pstr->mb_cur_max > 1)
+ {
+ int wcs_idx;
+ wint_t wc = WEOF;
+
+ if (pstr->is_utf8)
+ {
+ const unsigned char *raw, *p, *q, *end;
+
+ /* Special case UTF-8. Multi-byte chars start with any
+ byte other than 0x80 - 0xbf. */
+ raw = pstr->raw_mbs + pstr->raw_mbs_idx;
+ end = raw + (offset - pstr->mb_cur_max);
+ if (end < pstr->raw_mbs)
+ end = pstr->raw_mbs;
+ p = raw + offset - 1;
+#ifdef _LIBC
+ /* We know the wchar_t encoding is UCS4, so for the simple
+ case, ASCII characters, skip the conversion step. */
+ if (isascii (*p) && BE (pstr->trans == NULL, 1))
+ {
+ memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
+ /* pstr->valid_len = 0; */
+ wc = (wchar_t) *p;
+ }
+ else
+#endif
+ for (; p >= end; --p)
+ if ((*p & 0xc0) != 0x80)
+ {
+ mbstate_t cur_state;
+ wchar_t wc2;
+ int mlen = raw + pstr->len - p;
+ unsigned char buf[6];
+ size_t mbclen;
+
+ q = p;
+ if (BE (pstr->trans != NULL, 0))
+ {
+ int i = mlen < 6 ? mlen : 6;
+ while (--i >= 0)
+ buf[i] = pstr->trans[p[i]];
+ q = buf;
+ }
+ /* XXX Don't use mbrtowc, we know which conversion
+ to use (UTF-8 -> UCS4). */
+ memset (&cur_state, 0, sizeof (cur_state));
+ mbclen = __mbrtowc (&wc2, (const char *) p, mlen,
+ &cur_state);
+ if (raw + offset - p <= mbclen
+ && mbclen < (size_t) -2)
+ {
+ memset (&pstr->cur_state, '\0',
+ sizeof (mbstate_t));
+ pstr->valid_len = mbclen - (raw + offset - p);
+ wc = wc2;
+ }
+ break;
+ }
+ }
+
+ if (wc == WEOF)
+ pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
+ if (wc == WEOF)
+ pstr->tip_context
+ = re_string_context_at (pstr, prev_valid_len - 1, eflags);
+ else
+ pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
+ && IS_WIDE_WORD_CHAR (wc))
+ ? CONTEXT_WORD
+ : ((IS_WIDE_NEWLINE (wc)
+ && pstr->newline_anchor)
+ ? CONTEXT_NEWLINE : 0));
+ if (BE (pstr->valid_len, 0))
+ {
+ for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
+ pstr->wcs[wcs_idx] = WEOF;
+ if (pstr->mbs_allocated)
+ memset (pstr->mbs, 255, pstr->valid_len);
+ }
+ pstr->valid_raw_len = pstr->valid_len;
+ }
+ else
+#endif /* RE_ENABLE_I18N */
+ {
+ int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
+ pstr->valid_raw_len = 0;
+ if (pstr->trans)
+ c = pstr->trans[c];
+ pstr->tip_context = (bitset_contain (pstr->word_char, c)
+ ? CONTEXT_WORD
+ : ((IS_NEWLINE (c) && pstr->newline_anchor)
+ ? CONTEXT_NEWLINE : 0));
+ }
+ }
+ if (!BE (pstr->mbs_allocated, 0))
+ pstr->mbs += offset;
+ }
+ pstr->raw_mbs_idx = idx;
+ pstr->len -= offset;
+ pstr->stop -= offset;
+
+ /* Then build the buffers. */
+#ifdef RE_ENABLE_I18N
+ if (pstr->mb_cur_max > 1)
+ {
+ if (pstr->icase)
+ {
+ reg_errcode_t ret = build_wcs_upper_buffer (pstr);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+ }
+ else
+ build_wcs_buffer (pstr);
+ }
+ else
+#endif /* RE_ENABLE_I18N */
+ if (BE (pstr->mbs_allocated, 0))
+ {
+ if (pstr->icase)
+ build_upper_buffer (pstr);
+ else if (pstr->trans != NULL)
+ re_string_translate_buffer (pstr);
+ }
+ else
+ pstr->valid_len = pstr->len;
+
+ pstr->cur_idx = 0;
+ return REG_NOERROR;
+}
+
+static unsigned char
+internal_function __attribute ((pure))
+re_string_peek_byte_case (const re_string_t *pstr, int idx)
+{
+ int ch, off;
+
+ /* Handle the common (easiest) cases first. */
+ if (BE (!pstr->mbs_allocated, 1))
+ return re_string_peek_byte (pstr, idx);
+
+#ifdef RE_ENABLE_I18N
+ if (pstr->mb_cur_max > 1
+ && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
+ return re_string_peek_byte (pstr, idx);
+#endif
+
+ off = pstr->cur_idx + idx;
+#ifdef RE_ENABLE_I18N
+ if (pstr->offsets_needed)
+ off = pstr->offsets[off];
+#endif
+
+ ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
+
+#ifdef RE_ENABLE_I18N
+ /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
+ this function returns CAPITAL LETTER I instead of first byte of
+ DOTLESS SMALL LETTER I. The latter would confuse the parser,
+ since peek_byte_case doesn't advance cur_idx in any way. */
+ if (pstr->offsets_needed && !isascii (ch))
+ return re_string_peek_byte (pstr, idx);
+#endif
+
+ return ch;
+}
+
+static unsigned char
+internal_function __attribute ((pure))
+re_string_fetch_byte_case (re_string_t *pstr)
+{
+ if (BE (!pstr->mbs_allocated, 1))
+ return re_string_fetch_byte (pstr);
+
+#ifdef RE_ENABLE_I18N
+ if (pstr->offsets_needed)
+ {
+ int off, ch;
+
+ /* For tr_TR.UTF-8 [[:islower:]] there is
+ [[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip
+ in that case the whole multi-byte character and return
+ the original letter. On the other side, with
+ [[: DOTLESS SMALL LETTER I return [[:I, as doing
+ anything else would complicate things too much. */
+
+ if (!re_string_first_byte (pstr, pstr->cur_idx))
+ return re_string_fetch_byte (pstr);
+
+ off = pstr->offsets[pstr->cur_idx];
+ ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
+
+ if (! isascii (ch))
+ return re_string_fetch_byte (pstr);
+
+ re_string_skip_bytes (pstr,
+ re_string_char_size_at (pstr, pstr->cur_idx));
+ return ch;
+ }
+#endif
+
+ return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
+}
+
+static void
+internal_function
+re_string_destruct (re_string_t *pstr)
+{
+#ifdef RE_ENABLE_I18N
+ re_free (pstr->wcs);
+ re_free (pstr->offsets);
+#endif /* RE_ENABLE_I18N */
+ if (pstr->mbs_allocated)
+ re_free (pstr->mbs);
+}
+
+/* Return the context at IDX in INPUT. */
+
+static unsigned int
+internal_function
+re_string_context_at (const re_string_t *input, int idx, int eflags)
+{
+ int c;
+ if (BE (idx < 0, 0))
+ /* In this case, we use the value stored in input->tip_context,
+ since we can't know the character in input->mbs[-1] here. */
+ return input->tip_context;
+ if (BE (idx == input->len, 0))
+ return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
+ : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
+#ifdef RE_ENABLE_I18N
+ if (input->mb_cur_max > 1)
+ {
+ wint_t wc;
+ int wc_idx = idx;
+ while(input->wcs[wc_idx] == WEOF)
+ {
+#ifdef DEBUG
+ /* It must not happen. */
+ assert (wc_idx >= 0);
+#endif
+ --wc_idx;
+ if (wc_idx < 0)
+ return input->tip_context;
+ }
+ wc = input->wcs[wc_idx];
+ if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
+ return CONTEXT_WORD;
+ return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
+ ? CONTEXT_NEWLINE : 0);
+ }
+ else
+#endif
+ {
+ c = re_string_byte_at (input, idx);
+ if (bitset_contain (input->word_char, c))
+ return CONTEXT_WORD;
+ return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
+ }
+}
+
+/* Functions for set operation. */
+
+static reg_errcode_t
+internal_function
+re_node_set_alloc (re_node_set *set, int size)
+{
+ set->alloc = size;
+ set->nelem = 0;
+ set->elems = re_malloc (int, size);
+ if (BE (set->elems == NULL, 0))
+ return REG_ESPACE;
+ return REG_NOERROR;
+}
+
+static reg_errcode_t
+internal_function
+re_node_set_init_1 (re_node_set *set, int elem)
+{
+ set->alloc = 1;
+ set->nelem = 1;
+ set->elems = re_malloc (int, 1);
+ if (BE (set->elems == NULL, 0))
+ {
+ set->alloc = set->nelem = 0;
+ return REG_ESPACE;
+ }
+ set->elems[0] = elem;
+ return REG_NOERROR;
+}
+
+static reg_errcode_t
+internal_function
+re_node_set_init_2 (re_node_set *set, int elem1, int elem2)
+{
+ set->alloc = 2;
+ set->elems = re_malloc (int, 2);
+ if (BE (set->elems == NULL, 0))
+ return REG_ESPACE;
+ if (elem1 == elem2)
+ {
+ set->nelem = 1;
+ set->elems[0] = elem1;
+ }
+ else
+ {
+ set->nelem = 2;
+ if (elem1 < elem2)
+ {
+ set->elems[0] = elem1;
+ set->elems[1] = elem2;
+ }
+ else
+ {
+ set->elems[0] = elem2;
+ set->elems[1] = elem1;
+ }
+ }
+ return REG_NOERROR;
+}
+
+static reg_errcode_t
+internal_function
+re_node_set_init_copy (re_node_set *dest, const re_node_set *src)
+{
+ dest->nelem = src->nelem;
+ if (src->nelem > 0)
+ {
+ dest->alloc = dest->nelem;
+ dest->elems = re_malloc (int, dest->alloc);
+ if (BE (dest->elems == NULL, 0))
+ {
+ dest->alloc = dest->nelem = 0;
+ return REG_ESPACE;
+ }
+ memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
+ }
+ else
+ re_node_set_init_empty (dest);
+ return REG_NOERROR;
+}
+
+/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
+ DEST. Return value indicate the error code or REG_NOERROR if succeeded.
+ Note: We assume dest->elems is NULL, when dest->alloc is 0. */
+
+static reg_errcode_t
+internal_function
+re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
+ const re_node_set *src2)
+{
+ int i1, i2, is, id, delta, sbase;
+ if (src1->nelem == 0 || src2->nelem == 0)
+ return REG_NOERROR;
+
+ /* We need dest->nelem + 2 * elems_in_intersection; this is a
+ conservative estimate. */
+ if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
+ {
+ int new_alloc = src1->nelem + src2->nelem + dest->alloc;
+ int *new_elems = re_realloc (dest->elems, int, new_alloc);
+ if (BE (new_elems == NULL, 0))
+ return REG_ESPACE;
+ dest->elems = new_elems;
+ dest->alloc = new_alloc;
+ }
+
+ /* Find the items in the intersection of SRC1 and SRC2, and copy
+ into the top of DEST those that are not already in DEST itself. */
+ sbase = dest->nelem + src1->nelem + src2->nelem;
+ i1 = src1->nelem - 1;
+ i2 = src2->nelem - 1;
+ id = dest->nelem - 1;
+ for (;;)
+ {
+ if (src1->elems[i1] == src2->elems[i2])
+ {
+ /* Try to find the item in DEST. Maybe we could binary search? */
+ while (id >= 0 && dest->elems[id] > src1->elems[i1])
+ --id;
+
+ if (id < 0 || dest->elems[id] != src1->elems[i1])
+ dest->elems[--sbase] = src1->elems[i1];
+
+ if (--i1 < 0 || --i2 < 0)
+ break;
+ }
+
+ /* Lower the highest of the two items. */
+ else if (src1->elems[i1] < src2->elems[i2])
+ {
+ if (--i2 < 0)
+ break;
+ }
+ else
+ {
+ if (--i1 < 0)
+ break;
+ }
+ }
+
+ id = dest->nelem - 1;
+ is = dest->nelem + src1->nelem + src2->nelem - 1;
+ delta = is - sbase + 1;
+
+ /* Now copy. When DELTA becomes zero, the remaining
+ DEST elements are already in place; this is more or
+ less the same loop that is in re_node_set_merge. */
+ dest->nelem += delta;
+ if (delta > 0 && id >= 0)
+ for (;;)
+ {
+ if (dest->elems[is] > dest->elems[id])
+ {
+ /* Copy from the top. */
+ dest->elems[id + delta--] = dest->elems[is--];
+ if (delta == 0)
+ break;
+ }
+ else
+ {
+ /* Slide from the bottom. */
+ dest->elems[id + delta] = dest->elems[id];
+ if (--id < 0)
+ break;
+ }
+ }
+
+ /* Copy remaining SRC elements. */
+ memcpy (dest->elems, dest->elems + sbase, delta * sizeof (int));
+
+ return REG_NOERROR;
+}
+
+/* Calculate the union set of the sets SRC1 and SRC2. And store it to
+ DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
+
+static reg_errcode_t
+internal_function
+re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
+ const re_node_set *src2)
+{
+ int i1, i2, id;
+ if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
+ {
+ dest->alloc = src1->nelem + src2->nelem;
+ dest->elems = re_malloc (int, dest->alloc);
+ if (BE (dest->elems == NULL, 0))
+ return REG_ESPACE;
+ }
+ else
+ {
+ if (src1 != NULL && src1->nelem > 0)
+ return re_node_set_init_copy (dest, src1);
+ else if (src2 != NULL && src2->nelem > 0)
+ return re_node_set_init_copy (dest, src2);
+ else
+ re_node_set_init_empty (dest);
+ return REG_NOERROR;
+ }
+ for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
+ {
+ if (src1->elems[i1] > src2->elems[i2])
+ {
+ dest->elems[id++] = src2->elems[i2++];
+ continue;
+ }
+ if (src1->elems[i1] == src2->elems[i2])
+ ++i2;
+ dest->elems[id++] = src1->elems[i1++];
+ }
+ if (i1 < src1->nelem)
+ {
+ memcpy (dest->elems + id, src1->elems + i1,
+ (src1->nelem - i1) * sizeof (int));
+ id += src1->nelem - i1;
+ }
+ else if (i2 < src2->nelem)
+ {
+ memcpy (dest->elems + id, src2->elems + i2,
+ (src2->nelem - i2) * sizeof (int));
+ id += src2->nelem - i2;
+ }
+ dest->nelem = id;
+ return REG_NOERROR;
+}
+
+/* Calculate the union set of the sets DEST and SRC. And store it to
+ DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
+
+static reg_errcode_t
+internal_function
+re_node_set_merge (re_node_set *dest, const re_node_set *src)
+{
+ int is, id, sbase, delta;
+ if (src == NULL || src->nelem == 0)
+ return REG_NOERROR;
+ if (dest->alloc < 2 * src->nelem + dest->nelem)
+ {
+ int new_alloc = 2 * (src->nelem + dest->alloc);
+ int *new_buffer = re_realloc (dest->elems, int, new_alloc);
+ if (BE (new_buffer == NULL, 0))
+ return REG_ESPACE;
+ dest->elems = new_buffer;
+ dest->alloc = new_alloc;
+ }
+
+ if (BE (dest->nelem == 0, 0))
+ {
+ dest->nelem = src->nelem;
+ memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
+ return REG_NOERROR;
+ }
+
+ /* Copy into the top of DEST the items of SRC that are not
+ found in DEST. Maybe we could binary search in DEST? */
+ for (sbase = dest->nelem + 2 * src->nelem,
+ is = src->nelem - 1, id = dest->nelem - 1; is >= 0 && id >= 0; )
+ {
+ if (dest->elems[id] == src->elems[is])
+ is--, id--;
+ else if (dest->elems[id] < src->elems[is])
+ dest->elems[--sbase] = src->elems[is--];
+ else /* if (dest->elems[id] > src->elems[is]) */
+ --id;
+ }
+
+ if (is >= 0)
+ {
+ /* If DEST is exhausted, the remaining items of SRC must be unique. */
+ sbase -= is + 1;
+ memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (int));
+ }
+
+ id = dest->nelem - 1;
+ is = dest->nelem + 2 * src->nelem - 1;
+ delta = is - sbase + 1;
+ if (delta == 0)
+ return REG_NOERROR;
+
+ /* Now copy. When DELTA becomes zero, the remaining
+ DEST elements are already in place. */
+ dest->nelem += delta;
+ for (;;)
+ {
+ if (dest->elems[is] > dest->elems[id])
+ {
+ /* Copy from the top. */
+ dest->elems[id + delta--] = dest->elems[is--];
+ if (delta == 0)
+ break;
+ }
+ else
+ {
+ /* Slide from the bottom. */
+ dest->elems[id + delta] = dest->elems[id];
+ if (--id < 0)
+ {
+ /* Copy remaining SRC elements. */
+ memcpy (dest->elems, dest->elems + sbase,
+ delta * sizeof (int));
+ break;
+ }
+ }
+ }
+
+ return REG_NOERROR;
+}
+
+/* Insert the new element ELEM to the re_node_set* SET.
+ SET should not already have ELEM.
+ return -1 if an error is occured, return 1 otherwise. */
+
+static int
+internal_function
+re_node_set_insert (re_node_set *set, int elem)
+{
+ int idx;
+ /* In case the set is empty. */
+ if (set->alloc == 0)
+ {
+ if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1))
+ return 1;
+ else
+ return -1;
+ }
+
+ if (BE (set->nelem, 0) == 0)
+ {
+ /* We already guaranteed above that set->alloc != 0. */
+ set->elems[0] = elem;
+ ++set->nelem;
+ return 1;
+ }
+
+ /* Realloc if we need. */
+ if (set->alloc == set->nelem)
+ {
+ int *new_elems;
+ set->alloc = set->alloc * 2;
+ new_elems = re_realloc (set->elems, int, set->alloc);
+ if (BE (new_elems == NULL, 0))
+ return -1;
+ set->elems = new_elems;
+ }
+
+ /* Move the elements which follows the new element. Test the
+ first element separately to skip a check in the inner loop. */
+ if (elem < set->elems[0])
+ {
+ idx = 0;
+ for (idx = set->nelem; idx > 0; idx--)
+ set->elems[idx] = set->elems[idx - 1];
+ }
+ else
+ {
+ for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
+ set->elems[idx] = set->elems[idx - 1];
+ }
+
+ /* Insert the new element. */
+ set->elems[idx] = elem;
+ ++set->nelem;
+ return 1;
+}
+
+/* Insert the new element ELEM to the re_node_set* SET.
+ SET should not already have any element greater than or equal to ELEM.
+ Return -1 if an error is occured, return 1 otherwise. */
+
+static int
+internal_function
+re_node_set_insert_last (re_node_set *set, int elem)
+{
+ /* Realloc if we need. */
+ if (set->alloc == set->nelem)
+ {
+ int *new_elems;
+ set->alloc = (set->alloc + 1) * 2;
+ new_elems = re_realloc (set->elems, int, set->alloc);
+ if (BE (new_elems == NULL, 0))
+ return -1;
+ set->elems = new_elems;
+ }
+
+ /* Insert the new element. */
+ set->elems[set->nelem++] = elem;
+ return 1;
+}
+
+/* Compare two node sets SET1 and SET2.
+ return 1 if SET1 and SET2 are equivalent, return 0 otherwise. */
+
+static int
+internal_function __attribute ((pure))
+re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
+{
+ int i;
+ if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
+ return 0;
+ for (i = set1->nelem ; --i >= 0 ; )
+ if (set1->elems[i] != set2->elems[i])
+ return 0;
+ return 1;
+}
+
+/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */
+
+static int
+internal_function __attribute ((pure))
+re_node_set_contains (const re_node_set *set, int elem)
+{
+ unsigned int idx, right, mid;
+ if (set->nelem <= 0)
+ return 0;
+
+ /* Binary search the element. */
+ idx = 0;
+ right = set->nelem - 1;
+ while (idx < right)
+ {
+ mid = (idx + right) / 2;
+ if (set->elems[mid] < elem)
+ idx = mid + 1;
+ else
+ right = mid;
+ }
+ return set->elems[idx] == elem ? idx + 1 : 0;
+}
+
+static void
+internal_function
+re_node_set_remove_at (re_node_set *set, int idx)
+{
+ if (idx < 0 || idx >= set->nelem)
+ return;
+ --set->nelem;
+ for (; idx < set->nelem; idx++)
+ set->elems[idx] = set->elems[idx + 1];
+}
+
+
+/* Add the token TOKEN to dfa->nodes, and return the index of the token.
+ Or return -1, if an error will be occured. */
+
+static int
+internal_function
+re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
+{
+ int type = token.type;
+ if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
+ {
+ size_t new_nodes_alloc = dfa->nodes_alloc * 2;
+ int *new_nexts, *new_indices;
+ re_node_set *new_edests, *new_eclosures;
+ re_token_t *new_nodes;
+
+ /* Avoid overflows. */
+ if (BE (new_nodes_alloc < dfa->nodes_alloc, 0))
+ return -1;
+
+ new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
+ if (BE (new_nodes == NULL, 0))
+ return -1;
+ dfa->nodes = new_nodes;
+ new_nexts = re_realloc (dfa->nexts, int, new_nodes_alloc);
+ new_indices = re_realloc (dfa->org_indices, int, new_nodes_alloc);
+ new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
+ new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
+ if (BE (new_nexts == NULL || new_indices == NULL
+ || new_edests == NULL || new_eclosures == NULL, 0))
+ return -1;
+ dfa->nexts = new_nexts;
+ dfa->org_indices = new_indices;
+ dfa->edests = new_edests;
+ dfa->eclosures = new_eclosures;
+ dfa->nodes_alloc = new_nodes_alloc;
+ }
+ dfa->nodes[dfa->nodes_len] = token;
+ dfa->nodes[dfa->nodes_len].constraint = 0;
+#ifdef RE_ENABLE_I18N
+ dfa->nodes[dfa->nodes_len].accept_mb =
+ (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET;
+#endif
+ dfa->nexts[dfa->nodes_len] = -1;
+ re_node_set_init_empty (dfa->edests + dfa->nodes_len);
+ re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
+ return dfa->nodes_len++;
+}
+
+static inline unsigned int
+internal_function
+calc_state_hash (const re_node_set *nodes, unsigned int context)
+{
+ unsigned int hash = nodes->nelem + context;
+ int i;
+ for (i = 0 ; i < nodes->nelem ; i++)
+ hash += nodes->elems[i];
+ return hash;
+}
+
+/* Search for the state whose node_set is equivalent to NODES.
+ Return the pointer to the state, if we found it in the DFA.
+ Otherwise create the new one and return it. In case of an error
+ return NULL and set the error code in ERR.
+ Note: - We assume NULL as the invalid state, then it is possible that
+ return value is NULL and ERR is REG_NOERROR.
+ - We never return non-NULL value in case of any errors, it is for
+ optimization. */
+
+static re_dfastate_t *
+internal_function
+re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa,
+ const re_node_set *nodes)
+{
+ unsigned int hash;
+ re_dfastate_t *new_state;
+ struct re_state_table_entry *spot;
+ int i;
+ if (BE (nodes->nelem == 0, 0))
+ {
+ *err = REG_NOERROR;
+ return NULL;
+ }
+ hash = calc_state_hash (nodes, 0);
+ spot = dfa->state_table + (hash & dfa->state_hash_mask);
+
+ for (i = 0 ; i < spot->num ; i++)
+ {
+ re_dfastate_t *state = spot->array[i];
+ if (hash != state->hash)
+ continue;
+ if (re_node_set_compare (&state->nodes, nodes))
+ return state;
+ }
+
+ /* There are no appropriate state in the dfa, create the new one. */
+ new_state = create_ci_newstate (dfa, nodes, hash);
+ if (BE (new_state == NULL, 0))
+ *err = REG_ESPACE;
+
+ return new_state;
+}
+
+/* Search for the state whose node_set is equivalent to NODES and
+ whose context is equivalent to CONTEXT.
+ Return the pointer to the state, if we found it in the DFA.
+ Otherwise create the new one and return it. In case of an error
+ return NULL and set the error code in ERR.
+ Note: - We assume NULL as the invalid state, then it is possible that
+ return value is NULL and ERR is REG_NOERROR.
+ - We never return non-NULL value in case of any errors, it is for
+ optimization. */
+
+static re_dfastate_t *
+internal_function
+re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa,
+ const re_node_set *nodes, unsigned int context)
+{
+ unsigned int hash;
+ re_dfastate_t *new_state;
+ struct re_state_table_entry *spot;
+ int i;
+ if (nodes->nelem == 0)
+ {
+ *err = REG_NOERROR;
+ return NULL;
+ }
+ hash = calc_state_hash (nodes, context);
+ spot = dfa->state_table + (hash & dfa->state_hash_mask);
+
+ for (i = 0 ; i < spot->num ; i++)
+ {
+ re_dfastate_t *state = spot->array[i];
+ if (state->hash == hash
+ && state->context == context
+ && re_node_set_compare (state->entrance_nodes, nodes))
+ return state;
+ }
+ /* There are no appropriate state in `dfa', create the new one. */
+ new_state = create_cd_newstate (dfa, nodes, context, hash);
+ if (BE (new_state == NULL, 0))
+ *err = REG_ESPACE;
+
+ return new_state;
+}
+
+/* Finish initialization of the new state NEWSTATE, and using its hash value
+ HASH put in the appropriate bucket of DFA's state table. Return value
+ indicates the error code if failed. */
+
+static reg_errcode_t
+register_state (const re_dfa_t *dfa, re_dfastate_t *newstate,
+ unsigned int hash)
+{
+ struct re_state_table_entry *spot;
+ reg_errcode_t err;
+ int i;
+
+ newstate->hash = hash;
+ err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
+ if (BE (err != REG_NOERROR, 0))
+ return REG_ESPACE;
+ for (i = 0; i < newstate->nodes.nelem; i++)
+ {
+ int elem = newstate->nodes.elems[i];
+ if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
+ re_node_set_insert_last (&newstate->non_eps_nodes, elem);
+ }
+
+ spot = dfa->state_table + (hash & dfa->state_hash_mask);
+ if (BE (spot->alloc <= spot->num, 0))
+ {
+ int new_alloc = 2 * spot->num + 2;
+ re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
+ new_alloc);
+ if (BE (new_array == NULL, 0))
+ return REG_ESPACE;
+ spot->array = new_array;
+ spot->alloc = new_alloc;
+ }
+ spot->array[spot->num++] = newstate;
+ return REG_NOERROR;
+}
+
+static void
+free_state (re_dfastate_t *state)
+{
+ re_node_set_free (&state->non_eps_nodes);
+ re_node_set_free (&state->inveclosure);
+ if (state->entrance_nodes != &state->nodes)
+ {
+ re_node_set_free (state->entrance_nodes);
+ re_free (state->entrance_nodes);
+ }
+ re_node_set_free (&state->nodes);
+ re_free (state->word_trtable);
+ re_free (state->trtable);
+ re_free (state);
+}
+
+/* Create the new state which is independ of contexts.
+ Return the new state if succeeded, otherwise return NULL. */
+
+static re_dfastate_t *
+internal_function
+create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
+ unsigned int hash)
+{
+ int i;
+ reg_errcode_t err;
+ re_dfastate_t *newstate;
+
+ newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
+ if (BE (newstate == NULL, 0))
+ return NULL;
+ err = re_node_set_init_copy (&newstate->nodes, nodes);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_free (newstate);
+ return NULL;
+ }
+
+ newstate->entrance_nodes = &newstate->nodes;
+ for (i = 0 ; i < nodes->nelem ; i++)
+ {
+ re_token_t *node = dfa->nodes + nodes->elems[i];
+ re_token_type_t type = node->type;
+ if (type == CHARACTER && !node->constraint)
+ continue;
+#ifdef RE_ENABLE_I18N
+ newstate->accept_mb |= node->accept_mb;
+#endif /* RE_ENABLE_I18N */
+
+ /* If the state has the halt node, the state is a halt state. */
+ if (type == END_OF_RE)
+ newstate->halt = 1;
+ else if (type == OP_BACK_REF)
+ newstate->has_backref = 1;
+ else if (type == ANCHOR || node->constraint)
+ newstate->has_constraint = 1;
+ }
+ err = register_state (dfa, newstate, hash);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ free_state (newstate);
+ newstate = NULL;
+ }
+ return newstate;
+}
+
+/* Create the new state which is depend on the context CONTEXT.
+ Return the new state if succeeded, otherwise return NULL. */
+
+static re_dfastate_t *
+internal_function
+create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
+ unsigned int context, unsigned int hash)
+{
+ int i, nctx_nodes = 0;
+ reg_errcode_t err;
+ re_dfastate_t *newstate;
+
+ newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
+ if (BE (newstate == NULL, 0))
+ return NULL;
+ err = re_node_set_init_copy (&newstate->nodes, nodes);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_free (newstate);
+ return NULL;
+ }
+
+ newstate->context = context;
+ newstate->entrance_nodes = &newstate->nodes;
+
+ for (i = 0 ; i < nodes->nelem ; i++)
+ {
+ re_token_t *node = dfa->nodes + nodes->elems[i];
+ re_token_type_t type = node->type;
+ unsigned int constraint = node->constraint;
+
+ if (type == CHARACTER && !constraint)
+ continue;
+#ifdef RE_ENABLE_I18N
+ newstate->accept_mb |= node->accept_mb;
+#endif /* RE_ENABLE_I18N */
+
+ /* If the state has the halt node, the state is a halt state. */
+ if (type == END_OF_RE)
+ newstate->halt = 1;
+ else if (type == OP_BACK_REF)
+ newstate->has_backref = 1;
+
+ if (constraint)
+ {
+ if (newstate->entrance_nodes == &newstate->nodes)
+ {
+ newstate->entrance_nodes = re_malloc (re_node_set, 1);
+ if (BE (newstate->entrance_nodes == NULL, 0))
+ {
+ free_state (newstate);
+ return NULL;
+ }
+ re_node_set_init_copy (newstate->entrance_nodes, nodes);
+ nctx_nodes = 0;
+ newstate->has_constraint = 1;
+ }
+
+ if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
+ {
+ re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
+ ++nctx_nodes;
+ }
+ }
+ }
+ err = register_state (dfa, newstate, hash);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ free_state (newstate);
+ newstate = NULL;
+ }
+ return newstate;
+}
diff --git a/gnu_regex/regex_internal.h b/gnu_regex/regex_internal.h
new file mode 100644
index 0000000..71c4a38
--- /dev/null
+++ b/gnu_regex/regex_internal.h
@@ -0,0 +1,773 @@
+/* Extended regular expression matching and search library.
+ Copyright (C) 2002-2005, 2007, 2008 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifndef _REGEX_INTERNAL_H
+#define _REGEX_INTERNAL_H 1
+
+#include <assert.h>
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if defined HAVE_LANGINFO_H || defined HAVE_LANGINFO_CODESET || defined _LIBC
+# include <langinfo.h>
+#endif
+#if defined HAVE_LOCALE_H || defined _LIBC
+# include <locale.h>
+#endif
+#if defined HAVE_WCHAR_H || defined _LIBC
+# include <wchar.h>
+#endif /* HAVE_WCHAR_H || _LIBC */
+#if defined HAVE_WCTYPE_H || defined _LIBC
+# include <wctype.h>
+#endif /* HAVE_WCTYPE_H || _LIBC */
+#if defined HAVE_STDBOOL_H || defined _LIBC
+# include <stdbool.h>
+#endif /* HAVE_STDBOOL_H || _LIBC */
+#if defined HAVE_STDINT_H || defined _LIBC
+# include <stdint.h>
+#endif /* HAVE_STDINT_H || _LIBC */
+#if defined _LIBC
+# include <bits/libc-lock.h>
+#else
+# define __libc_lock_define(CLASS,NAME)
+# define __libc_lock_init(NAME) do { } while (0)
+# define __libc_lock_lock(NAME) do { } while (0)
+# define __libc_lock_unlock(NAME) do { } while (0)
+#endif
+
+/* In case that the system doesn't have isblank(). */
+#if !defined _LIBC && !defined HAVE_ISBLANK && !defined isblank
+# define isblank(ch) ((ch) == ' ' || (ch) == '\t')
+#endif
+
+#ifdef _LIBC
+# ifndef _RE_DEFINE_LOCALE_FUNCTIONS
+# define _RE_DEFINE_LOCALE_FUNCTIONS 1
+# include <locale/localeinfo.h>
+# include <locale/elem-hash.h>
+# include <locale/coll-lookup.h>
+# endif
+#endif
+
+/* This is for other GNU distributions with internationalized messages. */
+#if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC
+# include <libintl.h>
+# ifdef _LIBC
+# undef gettext
+# define gettext(msgid) \
+ INTUSE(__dcgettext) (_libc_intl_domainname, msgid, LC_MESSAGES)
+# endif
+#else
+# define gettext(msgid) (msgid)
+#endif
+
+#ifndef gettext_noop
+/* This define is so xgettext can find the internationalizable
+ strings. */
+# define gettext_noop(String) String
+#endif
+
+/* For loser systems without the definition. */
+#ifndef SIZE_MAX
+# define SIZE_MAX ((size_t) -1)
+#endif
+
+#if (defined MB_CUR_MAX && HAVE_LOCALE_H && HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_WCRTOMB && HAVE_MBRTOWC && HAVE_WCSCOLL) || _LIBC
+# define RE_ENABLE_I18N
+#endif
+
+#if __GNUC__ >= 3
+# define BE(expr, val) __builtin_expect (expr, val)
+#else
+# define BE(expr, val) (expr)
+# define inline
+#endif
+
+/* Number of single byte character. */
+#define SBC_MAX 256
+
+#define COLL_ELEM_LEN_MAX 8
+
+/* The character which represents newline. */
+#define NEWLINE_CHAR '\n'
+#define WIDE_NEWLINE_CHAR L'\n'
+
+/* Rename to standard API for using out of glibc. */
+#ifndef _LIBC
+# define __wctype wctype
+# define __iswctype iswctype
+# define __btowc btowc
+# define __mbrtowc mbrtowc
+# define __mempcpy mempcpy
+# define __wcrtomb wcrtomb
+# define __regfree regfree
+# define attribute_hidden
+#endif /* not _LIBC */
+
+#ifdef __GNUC__
+# define __attribute(arg) __attribute__ (arg)
+#else
+# define __attribute(arg)
+#endif
+
+extern const char __re_error_msgid[] attribute_hidden;
+extern const size_t __re_error_msgid_idx[] attribute_hidden;
+
+/* An integer used to represent a set of bits. It must be unsigned,
+ and must be at least as wide as unsigned int. */
+typedef unsigned long int bitset_word_t;
+/* All bits set in a bitset_word_t. */
+#define BITSET_WORD_MAX ULONG_MAX
+/* Number of bits in a bitset_word_t. */
+#define BITSET_WORD_BITS (sizeof (bitset_word_t) * CHAR_BIT)
+/* Number of bitset_word_t in a bit_set. */
+#define BITSET_WORDS (SBC_MAX / BITSET_WORD_BITS)
+typedef bitset_word_t bitset_t[BITSET_WORDS];
+typedef bitset_word_t *re_bitset_ptr_t;
+typedef const bitset_word_t *re_const_bitset_ptr_t;
+
+#define bitset_set(set,i) \
+ (set[i / BITSET_WORD_BITS] |= (bitset_word_t) 1 << i % BITSET_WORD_BITS)
+#define bitset_clear(set,i) \
+ (set[i / BITSET_WORD_BITS] &= ~((bitset_word_t) 1 << i % BITSET_WORD_BITS))
+#define bitset_contain(set,i) \
+ (set[i / BITSET_WORD_BITS] & ((bitset_word_t) 1 << i % BITSET_WORD_BITS))
+#define bitset_empty(set) memset (set, '\0', sizeof (bitset_t))
+#define bitset_set_all(set) memset (set, '\xff', sizeof (bitset_t))
+#define bitset_copy(dest,src) memcpy (dest, src, sizeof (bitset_t))
+
+#define PREV_WORD_CONSTRAINT 0x0001
+#define PREV_NOTWORD_CONSTRAINT 0x0002
+#define NEXT_WORD_CONSTRAINT 0x0004
+#define NEXT_NOTWORD_CONSTRAINT 0x0008
+#define PREV_NEWLINE_CONSTRAINT 0x0010
+#define NEXT_NEWLINE_CONSTRAINT 0x0020
+#define PREV_BEGBUF_CONSTRAINT 0x0040
+#define NEXT_ENDBUF_CONSTRAINT 0x0080
+#define WORD_DELIM_CONSTRAINT 0x0100
+#define NOT_WORD_DELIM_CONSTRAINT 0x0200
+
+typedef enum
+{
+ INSIDE_WORD = PREV_WORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
+ WORD_FIRST = PREV_NOTWORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
+ WORD_LAST = PREV_WORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
+ INSIDE_NOTWORD = PREV_NOTWORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
+ LINE_FIRST = PREV_NEWLINE_CONSTRAINT,
+ LINE_LAST = NEXT_NEWLINE_CONSTRAINT,
+ BUF_FIRST = PREV_BEGBUF_CONSTRAINT,
+ BUF_LAST = NEXT_ENDBUF_CONSTRAINT,
+ WORD_DELIM = WORD_DELIM_CONSTRAINT,
+ NOT_WORD_DELIM = NOT_WORD_DELIM_CONSTRAINT
+} re_context_type;
+
+typedef struct
+{
+ int alloc;
+ int nelem;
+ int *elems;
+} re_node_set;
+
+typedef enum
+{
+ NON_TYPE = 0,
+
+ /* Node type, These are used by token, node, tree. */
+ CHARACTER = 1,
+ END_OF_RE = 2,
+ SIMPLE_BRACKET = 3,
+ OP_BACK_REF = 4,
+ OP_PERIOD = 5,
+#ifdef RE_ENABLE_I18N
+ COMPLEX_BRACKET = 6,
+ OP_UTF8_PERIOD = 7,
+#endif /* RE_ENABLE_I18N */
+
+ /* We define EPSILON_BIT as a macro so that OP_OPEN_SUBEXP is used
+ when the debugger shows values of this enum type. */
+#define EPSILON_BIT 8
+ OP_OPEN_SUBEXP = EPSILON_BIT | 0,
+ OP_CLOSE_SUBEXP = EPSILON_BIT | 1,
+ OP_ALT = EPSILON_BIT | 2,
+ OP_DUP_ASTERISK = EPSILON_BIT | 3,
+ ANCHOR = EPSILON_BIT | 4,
+
+ /* Tree type, these are used only by tree. */
+ CONCAT = 16,
+ SUBEXP = 17,
+
+ /* Token type, these are used only by token. */
+ OP_DUP_PLUS = 18,
+ OP_DUP_QUESTION,
+ OP_OPEN_BRACKET,
+ OP_CLOSE_BRACKET,
+ OP_CHARSET_RANGE,
+ OP_OPEN_DUP_NUM,
+ OP_CLOSE_DUP_NUM,
+ OP_NON_MATCH_LIST,
+ OP_OPEN_COLL_ELEM,
+ OP_CLOSE_COLL_ELEM,
+ OP_OPEN_EQUIV_CLASS,
+ OP_CLOSE_EQUIV_CLASS,
+ OP_OPEN_CHAR_CLASS,
+ OP_CLOSE_CHAR_CLASS,
+ OP_WORD,
+ OP_NOTWORD,
+ OP_SPACE,
+ OP_NOTSPACE,
+ BACK_SLASH
+
+} re_token_type_t;
+
+#ifdef RE_ENABLE_I18N
+typedef struct
+{
+ /* Multibyte characters. */
+ wchar_t *mbchars;
+
+ /* Collating symbols. */
+# ifdef _LIBC
+ int32_t *coll_syms;
+# endif
+
+ /* Equivalence classes. */
+# ifdef _LIBC
+ int32_t *equiv_classes;
+# endif
+
+ /* Range expressions. */
+# ifdef _LIBC
+ uint32_t *range_starts;
+ uint32_t *range_ends;
+# else /* not _LIBC */
+ wchar_t *range_starts;
+ wchar_t *range_ends;
+# endif /* not _LIBC */
+
+ /* Character classes. */
+ wctype_t *char_classes;
+
+ /* If this character set is the non-matching list. */
+ unsigned int non_match : 1;
+
+ /* # of multibyte characters. */
+ int nmbchars;
+
+ /* # of collating symbols. */
+ int ncoll_syms;
+
+ /* # of equivalence classes. */
+ int nequiv_classes;
+
+ /* # of range expressions. */
+ int nranges;
+
+ /* # of character classes. */
+ int nchar_classes;
+} re_charset_t;
+#endif /* RE_ENABLE_I18N */
+
+typedef struct
+{
+ union
+ {
+ unsigned char c; /* for CHARACTER */
+ re_bitset_ptr_t sbcset; /* for SIMPLE_BRACKET */
+#ifdef RE_ENABLE_I18N
+ re_charset_t *mbcset; /* for COMPLEX_BRACKET */
+#endif /* RE_ENABLE_I18N */
+ int idx; /* for BACK_REF */
+ re_context_type ctx_type; /* for ANCHOR */
+ } opr;
+#if __GNUC__ >= 2
+ re_token_type_t type : 8;
+#else
+ re_token_type_t type;
+#endif
+ unsigned int constraint : 10; /* context constraint */
+ unsigned int duplicated : 1;
+ unsigned int opt_subexp : 1;
+#ifdef RE_ENABLE_I18N
+ unsigned int accept_mb : 1;
+ /* These 2 bits can be moved into the union if needed (e.g. if running out
+ of bits; move opr.c to opr.c.c and move the flags to opr.c.flags). */
+ unsigned int mb_partial : 1;
+#endif
+ unsigned int word_char : 1;
+} re_token_t;
+
+#define IS_EPSILON_NODE(type) ((type) & EPSILON_BIT)
+
+struct re_string_t
+{
+ /* Indicate the raw buffer which is the original string passed as an
+ argument of regexec(), re_search(), etc.. */
+ const unsigned char *raw_mbs;
+ /* Store the multibyte string. In case of "case insensitive mode" like
+ REG_ICASE, upper cases of the string are stored, otherwise MBS points
+ the same address that RAW_MBS points. */
+ unsigned char *mbs;
+#ifdef RE_ENABLE_I18N
+ /* Store the wide character string which is corresponding to MBS. */
+ wint_t *wcs;
+ int *offsets;
+ mbstate_t cur_state;
+#endif
+ /* Index in RAW_MBS. Each character mbs[i] corresponds to
+ raw_mbs[raw_mbs_idx + i]. */
+ int raw_mbs_idx;
+ /* The length of the valid characters in the buffers. */
+ int valid_len;
+ /* The corresponding number of bytes in raw_mbs array. */
+ int valid_raw_len;
+ /* The length of the buffers MBS and WCS. */
+ int bufs_len;
+ /* The index in MBS, which is updated by re_string_fetch_byte. */
+ int cur_idx;
+ /* length of RAW_MBS array. */
+ int raw_len;
+ /* This is RAW_LEN - RAW_MBS_IDX + VALID_LEN - VALID_RAW_LEN. */
+ int len;
+ /* End of the buffer may be shorter than its length in the cases such
+ as re_match_2, re_search_2. Then, we use STOP for end of the buffer
+ instead of LEN. */
+ int raw_stop;
+ /* This is RAW_STOP - RAW_MBS_IDX adjusted through OFFSETS. */
+ int stop;
+
+ /* The context of mbs[0]. We store the context independently, since
+ the context of mbs[0] may be different from raw_mbs[0], which is
+ the beginning of the input string. */
+ unsigned int tip_context;
+ /* The translation passed as a part of an argument of re_compile_pattern. */
+ RE_TRANSLATE_TYPE trans;
+ /* Copy of re_dfa_t's word_char. */
+ re_const_bitset_ptr_t word_char;
+ /* 1 if REG_ICASE. */
+ unsigned char icase;
+ unsigned char is_utf8;
+ unsigned char map_notascii;
+ unsigned char mbs_allocated;
+ unsigned char offsets_needed;
+ unsigned char newline_anchor;
+ unsigned char word_ops_used;
+ int mb_cur_max;
+};
+typedef struct re_string_t re_string_t;
+
+
+struct re_dfa_t;
+typedef struct re_dfa_t re_dfa_t;
+
+#ifndef _LIBC
+# ifdef __i386__
+# define internal_function __attribute ((regparm (3), stdcall))
+# else
+# define internal_function
+# endif
+#endif
+
+#ifndef NOT_IN_libc
+static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr,
+ int new_buf_len)
+ internal_function;
+# ifdef RE_ENABLE_I18N
+static void build_wcs_buffer (re_string_t *pstr) internal_function;
+static reg_errcode_t build_wcs_upper_buffer (re_string_t *pstr)
+ internal_function;
+# endif /* RE_ENABLE_I18N */
+static void build_upper_buffer (re_string_t *pstr) internal_function;
+static void re_string_translate_buffer (re_string_t *pstr) internal_function;
+static unsigned int re_string_context_at (const re_string_t *input, int idx,
+ int eflags)
+ internal_function __attribute ((pure));
+#endif
+#define re_string_peek_byte(pstr, offset) \
+ ((pstr)->mbs[(pstr)->cur_idx + offset])
+#define re_string_fetch_byte(pstr) \
+ ((pstr)->mbs[(pstr)->cur_idx++])
+#define re_string_first_byte(pstr, idx) \
+ ((idx) == (pstr)->valid_len || (pstr)->wcs[idx] != WEOF)
+#define re_string_is_single_byte_char(pstr, idx) \
+ ((pstr)->wcs[idx] != WEOF && ((pstr)->valid_len == (idx) + 1 \
+ || (pstr)->wcs[(idx) + 1] != WEOF))
+#define re_string_eoi(pstr) ((pstr)->stop <= (pstr)->cur_idx)
+#define re_string_cur_idx(pstr) ((pstr)->cur_idx)
+#define re_string_get_buffer(pstr) ((pstr)->mbs)
+#define re_string_length(pstr) ((pstr)->len)
+#define re_string_byte_at(pstr,idx) ((pstr)->mbs[idx])
+#define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx))
+#define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx))
+
+#ifdef WIN32
+# include <malloc.h>
+#else
+# include <alloca.h>
+#endif
+
+#ifndef _LIBC
+# if HAVE_ALLOCA
+/* The OS usually guarantees only one guard page at the bottom of the stack,
+ and a page size can be as small as 4096 bytes. So we cannot safely
+ allocate anything larger than 4096 bytes. Also care for the possibility
+ of a few compiler-allocated temporary stack slots. */
+# define __libc_use_alloca(n) ((n) < 4032)
+# else
+/* alloca is implemented with malloc, so just use malloc. */
+# define __libc_use_alloca(n) 0
+# endif
+#endif
+
+#define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t)))
+#define re_realloc(p,t,n) ((t *) realloc (p, (n) * sizeof (t)))
+#define re_free(p) free (p)
+
+struct bin_tree_t
+{
+ struct bin_tree_t *parent;
+ struct bin_tree_t *left;
+ struct bin_tree_t *right;
+ struct bin_tree_t *first;
+ struct bin_tree_t *next;
+
+ re_token_t token;
+
+ /* `node_idx' is the index in dfa->nodes, if `type' == 0.
+ Otherwise `type' indicate the type of this node. */
+ int node_idx;
+};
+typedef struct bin_tree_t bin_tree_t;
+
+#define BIN_TREE_STORAGE_SIZE \
+ ((1024 - sizeof (void *)) / sizeof (bin_tree_t))
+
+struct bin_tree_storage_t
+{
+ struct bin_tree_storage_t *next;
+ bin_tree_t data[BIN_TREE_STORAGE_SIZE];
+};
+typedef struct bin_tree_storage_t bin_tree_storage_t;
+
+#define CONTEXT_WORD 1
+#define CONTEXT_NEWLINE (CONTEXT_WORD << 1)
+#define CONTEXT_BEGBUF (CONTEXT_NEWLINE << 1)
+#define CONTEXT_ENDBUF (CONTEXT_BEGBUF << 1)
+
+#define IS_WORD_CONTEXT(c) ((c) & CONTEXT_WORD)
+#define IS_NEWLINE_CONTEXT(c) ((c) & CONTEXT_NEWLINE)
+#define IS_BEGBUF_CONTEXT(c) ((c) & CONTEXT_BEGBUF)
+#define IS_ENDBUF_CONTEXT(c) ((c) & CONTEXT_ENDBUF)
+#define IS_ORDINARY_CONTEXT(c) ((c) == 0)
+
+#define IS_WORD_CHAR(ch) (isalnum (ch) || (ch) == '_')
+#define IS_NEWLINE(ch) ((ch) == NEWLINE_CHAR)
+#define IS_WIDE_WORD_CHAR(ch) (iswalnum (ch) || (ch) == L'_')
+#define IS_WIDE_NEWLINE(ch) ((ch) == WIDE_NEWLINE_CHAR)
+
+#define NOT_SATISFY_PREV_CONSTRAINT(constraint,context) \
+ ((((constraint) & PREV_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \
+ || ((constraint & PREV_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \
+ || ((constraint & PREV_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context))\
+ || ((constraint & PREV_BEGBUF_CONSTRAINT) && !IS_BEGBUF_CONTEXT (context)))
+
+#define NOT_SATISFY_NEXT_CONSTRAINT(constraint,context) \
+ ((((constraint) & NEXT_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \
+ || (((constraint) & NEXT_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \
+ || (((constraint) & NEXT_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context)) \
+ || (((constraint) & NEXT_ENDBUF_CONSTRAINT) && !IS_ENDBUF_CONTEXT (context)))
+
+struct re_dfastate_t
+{
+ unsigned int hash;
+ re_node_set nodes;
+ re_node_set non_eps_nodes;
+ re_node_set inveclosure;
+ re_node_set *entrance_nodes;
+ struct re_dfastate_t **trtable, **word_trtable;
+ unsigned int context : 4;
+ unsigned int halt : 1;
+ /* If this state can accept `multi byte'.
+ Note that we refer to multibyte characters, and multi character
+ collating elements as `multi byte'. */
+ unsigned int accept_mb : 1;
+ /* If this state has backreference node(s). */
+ unsigned int has_backref : 1;
+ unsigned int has_constraint : 1;
+};
+typedef struct re_dfastate_t re_dfastate_t;
+
+struct re_state_table_entry
+{
+ int num;
+ int alloc;
+ re_dfastate_t **array;
+};
+
+/* Array type used in re_sub_match_last_t and re_sub_match_top_t. */
+
+typedef struct
+{
+ int next_idx;
+ int alloc;
+ re_dfastate_t **array;
+} state_array_t;
+
+/* Store information about the node NODE whose type is OP_CLOSE_SUBEXP. */
+
+typedef struct
+{
+ int node;
+ int str_idx; /* The position NODE match at. */
+ state_array_t path;
+} re_sub_match_last_t;
+
+/* Store information about the node NODE whose type is OP_OPEN_SUBEXP.
+ And information about the node, whose type is OP_CLOSE_SUBEXP,
+ corresponding to NODE is stored in LASTS. */
+
+typedef struct
+{
+ int str_idx;
+ int node;
+ state_array_t *path;
+ int alasts; /* Allocation size of LASTS. */
+ int nlasts; /* The number of LASTS. */
+ re_sub_match_last_t **lasts;
+} re_sub_match_top_t;
+
+struct re_backref_cache_entry
+{
+ int node;
+ int str_idx;
+ int subexp_from;
+ int subexp_to;
+ char more;
+ char unused;
+ unsigned short int eps_reachable_subexps_map;
+};
+
+typedef struct
+{
+ /* The string object corresponding to the input string. */
+ re_string_t input;
+#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
+ const re_dfa_t *const dfa;
+#else
+ const re_dfa_t *dfa;
+#endif
+ /* EFLAGS of the argument of regexec. */
+ int eflags;
+ /* Where the matching ends. */
+ int match_last;
+ int last_node;
+ /* The state log used by the matcher. */
+ re_dfastate_t **state_log;
+ int state_log_top;
+ /* Back reference cache. */
+ int nbkref_ents;
+ int abkref_ents;
+ struct re_backref_cache_entry *bkref_ents;
+ int max_mb_elem_len;
+ int nsub_tops;
+ int asub_tops;
+ re_sub_match_top_t **sub_tops;
+} re_match_context_t;
+
+typedef struct
+{
+ re_dfastate_t **sifted_states;
+ re_dfastate_t **limited_states;
+ int last_node;
+ int last_str_idx;
+ re_node_set limits;
+} re_sift_context_t;
+
+struct re_fail_stack_ent_t
+{
+ int idx;
+ int node;
+ regmatch_t *regs;
+ re_node_set eps_via_nodes;
+};
+
+struct re_fail_stack_t
+{
+ int num;
+ int alloc;
+ struct re_fail_stack_ent_t *stack;
+};
+
+struct re_dfa_t
+{
+ re_token_t *nodes;
+ size_t nodes_alloc;
+ size_t nodes_len;
+ int *nexts;
+ int *org_indices;
+ re_node_set *edests;
+ re_node_set *eclosures;
+ re_node_set *inveclosures;
+ struct re_state_table_entry *state_table;
+ re_dfastate_t *init_state;
+ re_dfastate_t *init_state_word;
+ re_dfastate_t *init_state_nl;
+ re_dfastate_t *init_state_begbuf;
+ bin_tree_t *str_tree;
+ bin_tree_storage_t *str_tree_storage;
+ re_bitset_ptr_t sb_char;
+ int str_tree_storage_idx;
+
+ /* number of subexpressions `re_nsub' is in regex_t. */
+ unsigned int state_hash_mask;
+ int init_node;
+ int nbackref; /* The number of backreference in this dfa. */
+
+ /* Bitmap expressing which backreference is used. */
+ bitset_word_t used_bkref_map;
+ bitset_word_t completed_bkref_map;
+
+ unsigned int has_plural_match : 1;
+ /* If this dfa has "multibyte node", which is a backreference or
+ a node which can accept multibyte character or multi character
+ collating element. */
+ unsigned int has_mb_node : 1;
+ unsigned int is_utf8 : 1;
+ unsigned int map_notascii : 1;
+ unsigned int word_ops_used : 1;
+ int mb_cur_max;
+ bitset_t word_char;
+ reg_syntax_t syntax;
+ int *subexp_map;
+#ifdef DEBUG
+ char* re_str;
+#endif
+ __libc_lock_define (, lock)
+};
+
+#define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set))
+#define re_node_set_remove(set,id) \
+ (re_node_set_remove_at (set, re_node_set_contains (set, id) - 1))
+#define re_node_set_empty(p) ((p)->nelem = 0)
+#define re_node_set_free(set) re_free ((set)->elems)
+
+
+typedef enum
+{
+ SB_CHAR,
+ MB_CHAR,
+ EQUIV_CLASS,
+ COLL_SYM,
+ CHAR_CLASS
+} bracket_elem_type;
+
+typedef struct
+{
+ bracket_elem_type type;
+ union
+ {
+ unsigned char ch;
+ unsigned char *name;
+ wchar_t wch;
+ } opr;
+} bracket_elem_t;
+
+
+/* Inline functions for bitset operation. */
+static inline void
+bitset_not (bitset_t set)
+{
+ int bitset_i;
+ for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
+ set[bitset_i] = ~set[bitset_i];
+}
+
+static inline void
+bitset_merge (bitset_t dest, const bitset_t src)
+{
+ int bitset_i;
+ for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
+ dest[bitset_i] |= src[bitset_i];
+}
+
+static inline void
+bitset_mask (bitset_t dest, const bitset_t src)
+{
+ int bitset_i;
+ for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
+ dest[bitset_i] &= src[bitset_i];
+}
+
+#ifdef RE_ENABLE_I18N
+/* Inline functions for re_string. */
+static inline int
+internal_function __attribute ((pure))
+re_string_char_size_at (const re_string_t *pstr, int idx)
+{
+ int byte_idx;
+ if (pstr->mb_cur_max == 1)
+ return 1;
+ for (byte_idx = 1; idx + byte_idx < pstr->valid_len; ++byte_idx)
+ if (pstr->wcs[idx + byte_idx] != WEOF)
+ break;
+ return byte_idx;
+}
+
+static inline wint_t
+internal_function __attribute ((pure))
+re_string_wchar_at (const re_string_t *pstr, int idx)
+{
+ if (pstr->mb_cur_max == 1)
+ return (wint_t) pstr->mbs[idx];
+ return (wint_t) pstr->wcs[idx];
+}
+
+# ifndef NOT_IN_libc
+static int
+internal_function __attribute ((pure))
+re_string_elem_size_at (const re_string_t *pstr, int idx)
+{
+# ifdef _LIBC
+ const unsigned char *p, *extra;
+ const int32_t *table, *indirect;
+ int32_t tmp;
+# include <locale/weight.h>
+ uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+
+ if (nrules != 0)
+ {
+ table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
+ extra = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
+ indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
+ _NL_COLLATE_INDIRECTMB);
+ p = pstr->mbs + idx;
+ tmp = findidx (&p);
+ return p - pstr->mbs - idx;
+ }
+ else
+# endif /* _LIBC */
+ return 1;
+}
+# endif
+#endif /* RE_ENABLE_I18N */
+
+#endif /* _REGEX_INTERNAL_H */
diff --git a/gnu_regex/regexec.c b/gnu_regex/regexec.c
new file mode 100644
index 0000000..560921d
--- /dev/null
+++ b/gnu_regex/regexec.c
@@ -0,0 +1,4338 @@
+/* Extended regular expression matching and search library.
+ Copyright (C) 2002, 2003, 2004, 2005, 2007, 2009 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
+ int n) internal_function;
+static void match_ctx_clean (re_match_context_t *mctx) internal_function;
+static void match_ctx_free (re_match_context_t *cache) internal_function;
+static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
+ int str_idx, int from, int to)
+ internal_function;
+static int search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)
+ internal_function;
+static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node,
+ int str_idx) internal_function;
+static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
+ int node, int str_idx)
+ internal_function;
+static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
+ re_dfastate_t **limited_sts, int last_node,
+ int last_str_idx)
+ internal_function;
+static reg_errcode_t re_search_internal (const regex_t *preg,
+ const char *string, int length,
+ int start, int range, int stop,
+ size_t nmatch, regmatch_t pmatch[],
+ int eflags) internal_function;
+static int re_search_2_stub (struct re_pattern_buffer *bufp,
+ const char *string1, int length1,
+ const char *string2, int length2,
+ int start, int range, struct re_registers *regs,
+ int stop, int ret_len) internal_function;
+static int re_search_stub (struct re_pattern_buffer *bufp,
+ const char *string, int length, int start,
+ int range, int stop, struct re_registers *regs,
+ int ret_len) internal_function;
+static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
+ int nregs, int regs_allocated) internal_function;
+static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
+ internal_function;
+static int check_matching (re_match_context_t *mctx, int fl_longest_match,
+ int *p_match_first) internal_function;
+static int check_halt_state_context (const re_match_context_t *mctx,
+ const re_dfastate_t *state, int idx)
+ internal_function;
+static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
+ regmatch_t *prev_idx_match, int cur_node,
+ int cur_idx, int nmatch) internal_function;
+static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
+ int str_idx, int dest_node, int nregs,
+ regmatch_t *regs,
+ re_node_set *eps_via_nodes)
+ internal_function;
+static reg_errcode_t set_regs (const regex_t *preg,
+ const re_match_context_t *mctx,
+ size_t nmatch, regmatch_t *pmatch,
+ int fl_backtrack) internal_function;
+static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs)
+ internal_function;
+
+#ifdef RE_ENABLE_I18N
+static int sift_states_iter_mb (const re_match_context_t *mctx,
+ re_sift_context_t *sctx,
+ int node_idx, int str_idx, int max_str_idx)
+ internal_function;
+#endif /* RE_ENABLE_I18N */
+static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
+ re_sift_context_t *sctx)
+ internal_function;
+static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
+ re_sift_context_t *sctx, int str_idx,
+ re_node_set *cur_dest)
+ internal_function;
+static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
+ re_sift_context_t *sctx,
+ int str_idx,
+ re_node_set *dest_nodes)
+ internal_function;
+static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
+ re_node_set *dest_nodes,
+ const re_node_set *candidates)
+ internal_function;
+static int check_dst_limits (const re_match_context_t *mctx,
+ re_node_set *limits,
+ int dst_node, int dst_idx, int src_node,
+ int src_idx) internal_function;
+static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
+ int boundaries, int subexp_idx,
+ int from_node, int bkref_idx)
+ internal_function;
+static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
+ int limit, int subexp_idx,
+ int node, int str_idx,
+ int bkref_idx) internal_function;
+static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
+ re_node_set *dest_nodes,
+ const re_node_set *candidates,
+ re_node_set *limits,
+ struct re_backref_cache_entry *bkref_ents,
+ int str_idx) internal_function;
+static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
+ re_sift_context_t *sctx,
+ int str_idx, const re_node_set *candidates)
+ internal_function;
+static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
+ re_dfastate_t **dst,
+ re_dfastate_t **src, int num)
+ internal_function;
+static re_dfastate_t *find_recover_state (reg_errcode_t *err,
+ re_match_context_t *mctx) internal_function;
+static re_dfastate_t *transit_state (reg_errcode_t *err,
+ re_match_context_t *mctx,
+ re_dfastate_t *state) internal_function;
+static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
+ re_match_context_t *mctx,
+ re_dfastate_t *next_state)
+ internal_function;
+static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
+ re_node_set *cur_nodes,
+ int str_idx) internal_function;
+#if 0
+static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
+ re_match_context_t *mctx,
+ re_dfastate_t *pstate)
+ internal_function;
+#endif
+#ifdef RE_ENABLE_I18N
+static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
+ re_dfastate_t *pstate)
+ internal_function;
+#endif /* RE_ENABLE_I18N */
+static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
+ const re_node_set *nodes)
+ internal_function;
+static reg_errcode_t get_subexp (re_match_context_t *mctx,
+ int bkref_node, int bkref_str_idx)
+ internal_function;
+static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
+ const re_sub_match_top_t *sub_top,
+ re_sub_match_last_t *sub_last,
+ int bkref_node, int bkref_str)
+ internal_function;
+static int find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
+ int subexp_idx, int type) internal_function;
+static reg_errcode_t check_arrival (re_match_context_t *mctx,
+ state_array_t *path, int top_node,
+ int top_str, int last_node, int last_str,
+ int type) internal_function;
+static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
+ int str_idx,
+ re_node_set *cur_nodes,
+ re_node_set *next_nodes)
+ internal_function;
+static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
+ re_node_set *cur_nodes,
+ int ex_subexp, int type)
+ internal_function;
+static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
+ re_node_set *dst_nodes,
+ int target, int ex_subexp,
+ int type) internal_function;
+static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
+ re_node_set *cur_nodes, int cur_str,
+ int subexp_num, int type)
+ internal_function;
+static int build_trtable (const re_dfa_t *dfa,
+ re_dfastate_t *state) internal_function;
+#ifdef RE_ENABLE_I18N
+static int check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
+ const re_string_t *input, int idx)
+ internal_function;
+# ifdef _LIBC
+static unsigned int find_collation_sequence_value (const unsigned char *mbs,
+ size_t name_len)
+ internal_function;
+# endif /* _LIBC */
+#endif /* RE_ENABLE_I18N */
+static int group_nodes_into_DFAstates (const re_dfa_t *dfa,
+ const re_dfastate_t *state,
+ re_node_set *states_node,
+ bitset_t *states_ch) internal_function;
+static int check_node_accept (const re_match_context_t *mctx,
+ const re_token_t *node, int idx)
+ internal_function;
+static reg_errcode_t extend_buffers (re_match_context_t *mctx)
+ internal_function;
+
+/* Entry point for POSIX code. */
+
+/* regexec searches for a given pattern, specified by PREG, in the
+ string STRING.
+
+ If NMATCH is zero or REG_NOSUB was set in the cflags argument to
+ `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
+ least NMATCH elements, and we set them to the offsets of the
+ corresponding matched substrings.
+
+ EFLAGS specifies `execution flags' which affect matching: if
+ REG_NOTBOL is set, then ^ does not match at the beginning of the
+ string; if REG_NOTEOL is set, then $ does not match at the end.
+
+ We return 0 if we find a match and REG_NOMATCH if not. */
+
+int
+regexec (preg, string, nmatch, pmatch, eflags)
+ const regex_t *__restrict preg;
+ const char *__restrict string;
+ size_t nmatch;
+ regmatch_t pmatch[];
+ int eflags;
+{
+ reg_errcode_t err;
+ int start, length;
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+
+ if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
+ return REG_BADPAT;
+
+ if (eflags & REG_STARTEND)
+ {
+ start = pmatch[0].rm_so;
+ length = pmatch[0].rm_eo;
+ }
+ else
+ {
+ start = 0;
+ length = strlen (string);
+ }
+
+ __libc_lock_lock (dfa->lock);
+ if (preg->no_sub)
+ err = re_search_internal (preg, string, length, start, length - start,
+ length, 0, NULL, eflags);
+ else
+ err = re_search_internal (preg, string, length, start, length - start,
+ length, nmatch, pmatch, eflags);
+ __libc_lock_unlock (dfa->lock);
+ return err != REG_NOERROR;
+}
+
+#ifdef _LIBC
+# include <shlib-compat.h>
+versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
+
+# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
+__typeof__ (__regexec) __compat_regexec;
+
+int
+attribute_compat_text_section
+__compat_regexec (const regex_t *__restrict preg,
+ const char *__restrict string, size_t nmatch,
+ regmatch_t pmatch[], int eflags)
+{
+ return regexec (preg, string, nmatch, pmatch,
+ eflags & (REG_NOTBOL | REG_NOTEOL));
+}
+compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
+# endif
+#endif
+
+/* Entry points for GNU code. */
+
+/* re_match, re_search, re_match_2, re_search_2
+
+ The former two functions operate on STRING with length LENGTH,
+ while the later two operate on concatenation of STRING1 and STRING2
+ with lengths LENGTH1 and LENGTH2, respectively.
+
+ re_match() matches the compiled pattern in BUFP against the string,
+ starting at index START.
+
+ re_search() first tries matching at index START, then it tries to match
+ starting from index START + 1, and so on. The last start position tried
+ is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same
+ way as re_match().)
+
+ The parameter STOP of re_{match,search}_2 specifies that no match exceeding
+ the first STOP characters of the concatenation of the strings should be
+ concerned.
+
+ If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
+ and all groups is stroed in REGS. (For the "_2" variants, the offsets are
+ computed relative to the concatenation, not relative to the individual
+ strings.)
+
+ On success, re_match* functions return the length of the match, re_search*
+ return the position of the start of the match. Return value -1 means no
+ match was found and -2 indicates an internal error. */
+
+int
+re_match (bufp, string, length, start, regs)
+ struct re_pattern_buffer *bufp;
+ const char *string;
+ int length, start;
+ struct re_registers *regs;
+{
+ return re_search_stub (bufp, string, length, start, 0, length, regs, 1);
+}
+#ifdef _LIBC
+weak_alias (__re_match, re_match)
+#endif
+
+int
+re_search (bufp, string, length, start, range, regs)
+ struct re_pattern_buffer *bufp;
+ const char *string;
+ int length, start, range;
+ struct re_registers *regs;
+{
+ return re_search_stub (bufp, string, length, start, range, length, regs, 0);
+}
+#ifdef _LIBC
+weak_alias (__re_search, re_search)
+#endif
+
+int
+re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
+ struct re_pattern_buffer *bufp;
+ const char *string1, *string2;
+ int length1, length2, start, stop;
+ struct re_registers *regs;
+{
+ return re_search_2_stub (bufp, string1, length1, string2, length2,
+ start, 0, regs, stop, 1);
+}
+#ifdef _LIBC
+weak_alias (__re_match_2, re_match_2)
+#endif
+
+int
+re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
+ struct re_pattern_buffer *bufp;
+ const char *string1, *string2;
+ int length1, length2, start, range, stop;
+ struct re_registers *regs;
+{
+ return re_search_2_stub (bufp, string1, length1, string2, length2,
+ start, range, regs, stop, 0);
+}
+#ifdef _LIBC
+weak_alias (__re_search_2, re_search_2)
+#endif
+
+static int
+re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs,
+ stop, ret_len)
+ struct re_pattern_buffer *bufp;
+ const char *string1, *string2;
+ int length1, length2, start, range, stop, ret_len;
+ struct re_registers *regs;
+{
+ const char *str;
+ int rval;
+ int len = length1 + length2;
+ int free_str = 0;
+
+ if (BE (length1 < 0 || length2 < 0 || stop < 0, 0))
+ return -2;
+
+ /* Concatenate the strings. */
+ if (length2 > 0)
+ if (length1 > 0)
+ {
+ char *s = re_malloc (char, len);
+
+ if (BE (s == NULL, 0))
+ return -2;
+#ifdef _LIBC
+ memcpy (__mempcpy (s, string1, length1), string2, length2);
+#else
+ memcpy (s, string1, length1);
+ memcpy (s + length1, string2, length2);
+#endif
+ str = s;
+ free_str = 1;
+ }
+ else
+ str = string2;
+ else
+ str = string1;
+
+ rval = re_search_stub (bufp, str, len, start, range, stop, regs,
+ ret_len);
+ if (free_str)
+ re_free ((char *) str);
+ return rval;
+}
+
+/* The parameters have the same meaning as those of re_search.
+ Additional parameters:
+ If RET_LEN is nonzero the length of the match is returned (re_match style);
+ otherwise the position of the match is returned. */
+
+static int
+re_search_stub (bufp, string, length, start, range, stop, regs, ret_len)
+ struct re_pattern_buffer *bufp;
+ const char *string;
+ int length, start, range, stop, ret_len;
+ struct re_registers *regs;
+{
+ reg_errcode_t result;
+ regmatch_t *pmatch;
+ int nregs, rval;
+ int eflags = 0;
+ re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
+
+ /* Check for out-of-range. */
+ if (BE (start < 0 || start > length, 0))
+ return -1;
+ if (BE (start + range > length, 0))
+ range = length - start;
+ else if (BE (start + range < 0, 0))
+ range = -start;
+
+ __libc_lock_lock (dfa->lock);
+
+ eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
+ eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
+
+ /* Compile fastmap if we haven't yet. */
+ if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate)
+ re_compile_fastmap (bufp);
+
+ if (BE (bufp->no_sub, 0))
+ regs = NULL;
+
+ /* We need at least 1 register. */
+ if (regs == NULL)
+ nregs = 1;
+ else if (BE (bufp->regs_allocated == REGS_FIXED &&
+ regs->num_regs < bufp->re_nsub + 1, 0))
+ {
+ nregs = regs->num_regs;
+ if (BE (nregs < 1, 0))
+ {
+ /* Nothing can be copied to regs. */
+ regs = NULL;
+ nregs = 1;
+ }
+ }
+ else
+ nregs = bufp->re_nsub + 1;
+ pmatch = re_malloc (regmatch_t, nregs);
+ if (BE (pmatch == NULL, 0))
+ {
+ rval = -2;
+ goto out;
+ }
+
+ result = re_search_internal (bufp, string, length, start, range, stop,
+ nregs, pmatch, eflags);
+
+ rval = 0;
+
+ /* I hope we needn't fill ther regs with -1's when no match was found. */
+ if (result != REG_NOERROR)
+ rval = -1;
+ else if (regs != NULL)
+ {
+ /* If caller wants register contents data back, copy them. */
+ bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
+ bufp->regs_allocated);
+ if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
+ rval = -2;
+ }
+
+ if (BE (rval == 0, 1))
+ {
+ if (ret_len)
+ {
+ assert (pmatch[0].rm_so == start);
+ rval = pmatch[0].rm_eo - start;
+ }
+ else
+ rval = pmatch[0].rm_so;
+ }
+ re_free (pmatch);
+ out:
+ __libc_lock_unlock (dfa->lock);
+ return rval;
+}
+
+static unsigned
+re_copy_regs (regs, pmatch, nregs, regs_allocated)
+ struct re_registers *regs;
+ regmatch_t *pmatch;
+ int nregs, regs_allocated;
+{
+ int rval = REGS_REALLOCATE;
+ int i;
+ int need_regs = nregs + 1;
+ /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
+ uses. */
+
+ /* Have the register data arrays been allocated? */
+ if (regs_allocated == REGS_UNALLOCATED)
+ { /* No. So allocate them with malloc. */
+ regs->start = re_malloc (regoff_t, need_regs);
+ regs->end = re_malloc (regoff_t, need_regs);
+ if (BE (regs->start == NULL, 0) || BE (regs->end == NULL, 0))
+ return REGS_UNALLOCATED;
+ regs->num_regs = need_regs;
+ }
+ else if (regs_allocated == REGS_REALLOCATE)
+ { /* Yes. If we need more elements than were already
+ allocated, reallocate them. If we need fewer, just
+ leave it alone. */
+ if (BE (need_regs > regs->num_regs, 0))
+ {
+ regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
+ regoff_t *new_end = re_realloc (regs->end, regoff_t, need_regs);
+ if (BE (new_start == NULL, 0) || BE (new_end == NULL, 0))
+ return REGS_UNALLOCATED;
+ regs->start = new_start;
+ regs->end = new_end;
+ regs->num_regs = need_regs;
+ }
+ }
+ else
+ {
+ assert (regs_allocated == REGS_FIXED);
+ /* This function may not be called with REGS_FIXED and nregs too big. */
+ assert (regs->num_regs >= nregs);
+ rval = REGS_FIXED;
+ }
+
+ /* Copy the regs. */
+ for (i = 0; i < nregs; ++i)
+ {
+ regs->start[i] = pmatch[i].rm_so;
+ regs->end[i] = pmatch[i].rm_eo;
+ }
+ for ( ; i < regs->num_regs; ++i)
+ regs->start[i] = regs->end[i] = -1;
+
+ return rval;
+}
+
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+ ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
+ this memory for recording register information. STARTS and ENDS
+ must be allocated using the malloc library routine, and must each
+ be at least NUM_REGS * sizeof (regoff_t) bytes long.
+
+ If NUM_REGS == 0, then subsequent matches should allocate their own
+ register data.
+
+ Unless this function is called, the first search or match using
+ PATTERN_BUFFER will allocate its own register data, without
+ freeing the old data. */
+
+void
+re_set_registers (bufp, regs, num_regs, starts, ends)
+ struct re_pattern_buffer *bufp;
+ struct re_registers *regs;
+ unsigned num_regs;
+ regoff_t *starts, *ends;
+{
+ if (num_regs)
+ {
+ bufp->regs_allocated = REGS_REALLOCATE;
+ regs->num_regs = num_regs;
+ regs->start = starts;
+ regs->end = ends;
+ }
+ else
+ {
+ bufp->regs_allocated = REGS_UNALLOCATED;
+ regs->num_regs = 0;
+ regs->start = regs->end = (regoff_t *) 0;
+ }
+}
+#ifdef _LIBC
+weak_alias (__re_set_registers, re_set_registers)
+#endif
+
+/* Entry points compatible with 4.2 BSD regex library. We don't define
+ them unless specifically requested. */
+
+#if defined _REGEX_RE_COMP || defined _LIBC
+int
+# ifdef _LIBC
+weak_function
+# endif
+re_exec (s)
+ const char *s;
+{
+ return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
+}
+#endif /* _REGEX_RE_COMP */
+
+/* Internal entry point. */
+
+/* Searches for a compiled pattern PREG in the string STRING, whose
+ length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same
+ mingings with regexec. START, and RANGE have the same meanings
+ with re_search.
+ Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
+ otherwise return the error code.
+ Note: We assume front end functions already check ranges.
+ (START + RANGE >= 0 && START + RANGE <= LENGTH) */
+
+static reg_errcode_t
+re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
+ eflags)
+ const regex_t *preg;
+ const char *string;
+ int length, start, range, stop, eflags;
+ size_t nmatch;
+ regmatch_t pmatch[];
+{
+ reg_errcode_t err;
+ const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
+ int left_lim, right_lim, incr;
+ int fl_longest_match, match_first, match_kind, match_last = -1;
+ int extra_nmatch;
+ int sb, ch;
+#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
+ re_match_context_t mctx = { .dfa = dfa };
+#else
+ re_match_context_t mctx;
+#endif
+ char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate
+ && range && !preg->can_be_null) ? preg->fastmap : NULL;
+ RE_TRANSLATE_TYPE t = preg->translate;
+
+#if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
+ memset (&mctx, '\0', sizeof (re_match_context_t));
+ mctx.dfa = dfa;
+#endif
+
+ extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
+ nmatch -= extra_nmatch;
+
+ /* Check if the DFA haven't been compiled. */
+ if (BE (preg->used == 0 || dfa->init_state == NULL
+ || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
+ || dfa->init_state_begbuf == NULL, 0))
+ return REG_NOMATCH;
+
+#ifdef DEBUG
+ /* We assume front-end functions already check them. */
+ assert (start + range >= 0 && start + range <= length);
+#endif
+
+ /* If initial states with non-begbuf contexts have no elements,
+ the regex must be anchored. If preg->newline_anchor is set,
+ we'll never use init_state_nl, so do not check it. */
+ if (dfa->init_state->nodes.nelem == 0
+ && dfa->init_state_word->nodes.nelem == 0
+ && (dfa->init_state_nl->nodes.nelem == 0
+ || !preg->newline_anchor))
+ {
+ if (start != 0 && start + range != 0)
+ return REG_NOMATCH;
+ start = range = 0;
+ }
+
+ /* We must check the longest matching, if nmatch > 0. */
+ fl_longest_match = (nmatch != 0 || dfa->nbackref);
+
+ err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
+ preg->translate, preg->syntax & RE_ICASE, dfa);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ mctx.input.stop = stop;
+ mctx.input.raw_stop = stop;
+ mctx.input.newline_anchor = preg->newline_anchor;
+
+ err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+
+ /* We will log all the DFA states through which the dfa pass,
+ if nmatch > 1, or this dfa has "multibyte node", which is a
+ back-reference or a node which can accept multibyte character or
+ multi character collating element. */
+ if (nmatch > 1 || dfa->has_mb_node)
+ {
+ mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
+ if (BE (mctx.state_log == NULL, 0))
+ {
+ err = REG_ESPACE;
+ goto free_return;
+ }
+ }
+ else
+ mctx.state_log = NULL;
+
+ match_first = start;
+ mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
+ : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
+
+ /* Check incrementally whether of not the input string match. */
+ incr = (range < 0) ? -1 : 1;
+ left_lim = (range < 0) ? start + range : start;
+ right_lim = (range < 0) ? start : start + range;
+ sb = dfa->mb_cur_max == 1;
+ match_kind =
+ (fastmap
+ ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
+ | (range >= 0 ? 2 : 0)
+ | (t != NULL ? 1 : 0))
+ : 8);
+
+ for (;; match_first += incr)
+ {
+ err = REG_NOMATCH;
+ if (match_first < left_lim || right_lim < match_first)
+ goto free_return;
+
+ /* Advance as rapidly as possible through the string, until we
+ find a plausible place to start matching. This may be done
+ with varying efficiency, so there are various possibilities:
+ only the most common of them are specialized, in order to
+ save on code size. We use a switch statement for speed. */
+ switch (match_kind)
+ {
+ case 8:
+ /* No fastmap. */
+ break;
+
+ case 7:
+ /* Fastmap with single-byte translation, match forward. */
+ while (BE (match_first < right_lim, 1)
+ && !fastmap[t[(unsigned char) string[match_first]]])
+ ++match_first;
+ goto forward_match_found_start_or_reached_end;
+
+ case 6:
+ /* Fastmap without translation, match forward. */
+ while (BE (match_first < right_lim, 1)
+ && !fastmap[(unsigned char) string[match_first]])
+ ++match_first;
+
+ forward_match_found_start_or_reached_end:
+ if (BE (match_first == right_lim, 0))
+ {
+ ch = match_first >= length
+ ? 0 : (unsigned char) string[match_first];
+ if (!fastmap[t ? t[ch] : ch])
+ goto free_return;
+ }
+ break;
+
+ case 4:
+ case 5:
+ /* Fastmap without multi-byte translation, match backwards. */
+ while (match_first >= left_lim)
+ {
+ ch = match_first >= length
+ ? 0 : (unsigned char) string[match_first];
+ if (fastmap[t ? t[ch] : ch])
+ break;
+ --match_first;
+ }
+ if (match_first < left_lim)
+ goto free_return;
+ break;
+
+ default:
+ /* In this case, we can't determine easily the current byte,
+ since it might be a component byte of a multibyte
+ character. Then we use the constructed buffer instead. */
+ for (;;)
+ {
+ /* If MATCH_FIRST is out of the valid range, reconstruct the
+ buffers. */
+ unsigned int offset = match_first - mctx.input.raw_mbs_idx;
+ if (BE (offset >= (unsigned int) mctx.input.valid_raw_len, 0))
+ {
+ err = re_string_reconstruct (&mctx.input, match_first,
+ eflags);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+
+ offset = match_first - mctx.input.raw_mbs_idx;
+ }
+ /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
+ Note that MATCH_FIRST must not be smaller than 0. */
+ ch = (match_first >= length
+ ? 0 : re_string_byte_at (&mctx.input, offset));
+ if (fastmap[ch])
+ break;
+ match_first += incr;
+ if (match_first < left_lim || match_first > right_lim)
+ {
+ err = REG_NOMATCH;
+ goto free_return;
+ }
+ }
+ break;
+ }
+
+ /* Reconstruct the buffers so that the matcher can assume that
+ the matching starts from the beginning of the buffer. */
+ err = re_string_reconstruct (&mctx.input, match_first, eflags);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+
+#ifdef RE_ENABLE_I18N
+ /* Don't consider this char as a possible match start if it part,
+ yet isn't the head, of a multibyte character. */
+ if (!sb && !re_string_first_byte (&mctx.input, 0))
+ continue;
+#endif
+
+ /* It seems to be appropriate one, then use the matcher. */
+ /* We assume that the matching starts from 0. */
+ mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
+ match_last = check_matching (&mctx, fl_longest_match,
+ range >= 0 ? &match_first : NULL);
+ if (match_last != -1)
+ {
+ if (BE (match_last == -2, 0))
+ {
+ err = REG_ESPACE;
+ goto free_return;
+ }
+ else
+ {
+ mctx.match_last = match_last;
+ if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
+ {
+ re_dfastate_t *pstate = mctx.state_log[match_last];
+ mctx.last_node = check_halt_state_context (&mctx, pstate,
+ match_last);
+ }
+ if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
+ || dfa->nbackref)
+ {
+ err = prune_impossible_nodes (&mctx);
+ if (err == REG_NOERROR)
+ break;
+ if (BE (err != REG_NOMATCH, 0))
+ goto free_return;
+ match_last = -1;
+ }
+ else
+ break; /* We found a match. */
+ }
+ }
+
+ match_ctx_clean (&mctx);
+ }
+
+#ifdef DEBUG
+ assert (match_last != -1);
+ assert (err == REG_NOERROR);
+#endif
+
+ /* Set pmatch[] if we need. */
+ if (nmatch > 0)
+ {
+ int reg_idx;
+
+ /* Initialize registers. */
+ for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
+ pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
+
+ /* Set the points where matching start/end. */
+ pmatch[0].rm_so = 0;
+ pmatch[0].rm_eo = mctx.match_last;
+
+ if (!preg->no_sub && nmatch > 1)
+ {
+ err = set_regs (preg, &mctx, nmatch, pmatch,
+ dfa->has_plural_match && dfa->nbackref > 0);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ }
+
+ /* At last, add the offset to the each registers, since we slided
+ the buffers so that we could assume that the matching starts
+ from 0. */
+ for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
+ if (pmatch[reg_idx].rm_so != -1)
+ {
+#ifdef RE_ENABLE_I18N
+ if (BE (mctx.input.offsets_needed != 0, 0))
+ {
+ pmatch[reg_idx].rm_so =
+ (pmatch[reg_idx].rm_so == mctx.input.valid_len
+ ? mctx.input.valid_raw_len
+ : mctx.input.offsets[pmatch[reg_idx].rm_so]);
+ pmatch[reg_idx].rm_eo =
+ (pmatch[reg_idx].rm_eo == mctx.input.valid_len
+ ? mctx.input.valid_raw_len
+ : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
+ }
+#else
+ assert (mctx.input.offsets_needed == 0);
+#endif
+ pmatch[reg_idx].rm_so += match_first;
+ pmatch[reg_idx].rm_eo += match_first;
+ }
+ for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
+ {
+ pmatch[nmatch + reg_idx].rm_so = -1;
+ pmatch[nmatch + reg_idx].rm_eo = -1;
+ }
+
+ if (dfa->subexp_map)
+ for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
+ if (dfa->subexp_map[reg_idx] != reg_idx)
+ {
+ pmatch[reg_idx + 1].rm_so
+ = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
+ pmatch[reg_idx + 1].rm_eo
+ = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
+ }
+ }
+
+ free_return:
+ re_free (mctx.state_log);
+ if (dfa->nbackref)
+ match_ctx_free (&mctx);
+ re_string_destruct (&mctx.input);
+ return err;
+}
+
+static reg_errcode_t
+prune_impossible_nodes (mctx)
+ re_match_context_t *mctx;
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ int halt_node, match_last;
+ reg_errcode_t ret;
+ re_dfastate_t **sifted_states;
+ re_dfastate_t **lim_states = NULL;
+ re_sift_context_t sctx;
+#ifdef DEBUG
+ assert (mctx->state_log != NULL);
+#endif
+ match_last = mctx->match_last;
+ halt_node = mctx->last_node;
+ sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
+ if (BE (sifted_states == NULL, 0))
+ {
+ ret = REG_ESPACE;
+ goto free_return;
+ }
+ if (dfa->nbackref)
+ {
+ lim_states = re_malloc (re_dfastate_t *, match_last + 1);
+ if (BE (lim_states == NULL, 0))
+ {
+ ret = REG_ESPACE;
+ goto free_return;
+ }
+ while (1)
+ {
+ memset (lim_states, '\0',
+ sizeof (re_dfastate_t *) * (match_last + 1));
+ sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
+ match_last);
+ ret = sift_states_backward (mctx, &sctx);
+ re_node_set_free (&sctx.limits);
+ if (BE (ret != REG_NOERROR, 0))
+ goto free_return;
+ if (sifted_states[0] != NULL || lim_states[0] != NULL)
+ break;
+ do
+ {
+ --match_last;
+ if (match_last < 0)
+ {
+ ret = REG_NOMATCH;
+ goto free_return;
+ }
+ } while (mctx->state_log[match_last] == NULL
+ || !mctx->state_log[match_last]->halt);
+ halt_node = check_halt_state_context (mctx,
+ mctx->state_log[match_last],
+ match_last);
+ }
+ ret = merge_state_array (dfa, sifted_states, lim_states,
+ match_last + 1);
+ re_free (lim_states);
+ lim_states = NULL;
+ if (BE (ret != REG_NOERROR, 0))
+ goto free_return;
+ }
+ else
+ {
+ sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
+ ret = sift_states_backward (mctx, &sctx);
+ re_node_set_free (&sctx.limits);
+ if (BE (ret != REG_NOERROR, 0))
+ goto free_return;
+ if (sifted_states[0] == NULL)
+ {
+ ret = REG_NOMATCH;
+ goto free_return;
+ }
+ }
+ re_free (mctx->state_log);
+ mctx->state_log = sifted_states;
+ sifted_states = NULL;
+ mctx->last_node = halt_node;
+ mctx->match_last = match_last;
+ ret = REG_NOERROR;
+ free_return:
+ re_free (sifted_states);
+ re_free (lim_states);
+ return ret;
+}
+
+/* Acquire an initial state and return it.
+ We must select appropriate initial state depending on the context,
+ since initial states may have constraints like "\<", "^", etc.. */
+
+static inline re_dfastate_t *
+__attribute ((always_inline)) internal_function
+acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
+ int idx)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ if (dfa->init_state->has_constraint)
+ {
+ unsigned int context;
+ context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
+ if (IS_WORD_CONTEXT (context))
+ return dfa->init_state_word;
+ else if (IS_ORDINARY_CONTEXT (context))
+ return dfa->init_state;
+ else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
+ return dfa->init_state_begbuf;
+ else if (IS_NEWLINE_CONTEXT (context))
+ return dfa->init_state_nl;
+ else if (IS_BEGBUF_CONTEXT (context))
+ {
+ /* It is relatively rare case, then calculate on demand. */
+ return re_acquire_state_context (err, dfa,
+ dfa->init_state->entrance_nodes,
+ context);
+ }
+ else
+ /* Must not happen? */
+ return dfa->init_state;
+ }
+ else
+ return dfa->init_state;
+}
+
+/* Check whether the regular expression match input string INPUT or not,
+ and return the index where the matching end, return -1 if not match,
+ or return -2 in case of an error.
+ FL_LONGEST_MATCH means we want the POSIX longest matching.
+ If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
+ next place where we may want to try matching.
+ Note that the matcher assume that the maching starts from the current
+ index of the buffer. */
+
+static int
+internal_function
+check_matching (re_match_context_t *mctx, int fl_longest_match,
+ int *p_match_first)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ reg_errcode_t err;
+ int match = 0;
+ int match_last = -1;
+ int cur_str_idx = re_string_cur_idx (&mctx->input);
+ re_dfastate_t *cur_state;
+ int at_init_state = p_match_first != NULL;
+ int next_start_idx = cur_str_idx;
+
+ err = REG_NOERROR;
+ cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
+ /* An initial state must not be NULL (invalid). */
+ if (BE (cur_state == NULL, 0))
+ {
+ assert (err == REG_ESPACE);
+ return -2;
+ }
+
+ if (mctx->state_log != NULL)
+ {
+ mctx->state_log[cur_str_idx] = cur_state;
+
+ /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
+ later. E.g. Processing back references. */
+ if (BE (dfa->nbackref, 0))
+ {
+ at_init_state = 0;
+ err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+
+ if (cur_state->has_backref)
+ {
+ err = transit_state_bkref (mctx, &cur_state->nodes);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ }
+ }
+
+ /* If the RE accepts NULL string. */
+ if (BE (cur_state->halt, 0))
+ {
+ if (!cur_state->has_constraint
+ || check_halt_state_context (mctx, cur_state, cur_str_idx))
+ {
+ if (!fl_longest_match)
+ return cur_str_idx;
+ else
+ {
+ match_last = cur_str_idx;
+ match = 1;
+ }
+ }
+ }
+
+ while (!re_string_eoi (&mctx->input))
+ {
+ re_dfastate_t *old_state = cur_state;
+ int next_char_idx = re_string_cur_idx (&mctx->input) + 1;
+
+ if (BE (next_char_idx >= mctx->input.bufs_len, 0)
+ || (BE (next_char_idx >= mctx->input.valid_len, 0)
+ && mctx->input.valid_len < mctx->input.len))
+ {
+ err = extend_buffers (mctx);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ assert (err == REG_ESPACE);
+ return -2;
+ }
+ }
+
+ cur_state = transit_state (&err, mctx, cur_state);
+ if (mctx->state_log != NULL)
+ cur_state = merge_state_with_log (&err, mctx, cur_state);
+
+ if (cur_state == NULL)
+ {
+ /* Reached the invalid state or an error. Try to recover a valid
+ state using the state log, if available and if we have not
+ already found a valid (even if not the longest) match. */
+ if (BE (err != REG_NOERROR, 0))
+ return -2;
+
+ if (mctx->state_log == NULL
+ || (match && !fl_longest_match)
+ || (cur_state = find_recover_state (&err, mctx)) == NULL)
+ break;
+ }
+
+ if (BE (at_init_state, 0))
+ {
+ if (old_state == cur_state)
+ next_start_idx = next_char_idx;
+ else
+ at_init_state = 0;
+ }
+
+ if (cur_state->halt)
+ {
+ /* Reached a halt state.
+ Check the halt state can satisfy the current context. */
+ if (!cur_state->has_constraint
+ || check_halt_state_context (mctx, cur_state,
+ re_string_cur_idx (&mctx->input)))
+ {
+ /* We found an appropriate halt state. */
+ match_last = re_string_cur_idx (&mctx->input);
+ match = 1;
+
+ /* We found a match, do not modify match_first below. */
+ p_match_first = NULL;
+ if (!fl_longest_match)
+ break;
+ }
+ }
+ }
+
+ if (p_match_first)
+ *p_match_first += next_start_idx;
+
+ return match_last;
+}
+
+/* Check NODE match the current context. */
+
+static int
+internal_function
+check_halt_node_context (const re_dfa_t *dfa, int node, unsigned int context)
+{
+ re_token_type_t type = dfa->nodes[node].type;
+ unsigned int constraint = dfa->nodes[node].constraint;
+ if (type != END_OF_RE)
+ return 0;
+ if (!constraint)
+ return 1;
+ if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
+ return 0;
+ return 1;
+}
+
+/* Check the halt state STATE match the current context.
+ Return 0 if not match, if the node, STATE has, is a halt node and
+ match the context, return the node. */
+
+static int
+internal_function
+check_halt_state_context (const re_match_context_t *mctx,
+ const re_dfastate_t *state, int idx)
+{
+ int i;
+ unsigned int context;
+#ifdef DEBUG
+ assert (state->halt);
+#endif
+ context = re_string_context_at (&mctx->input, idx, mctx->eflags);
+ for (i = 0; i < state->nodes.nelem; ++i)
+ if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
+ return state->nodes.elems[i];
+ return 0;
+}
+
+/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
+ corresponding to the DFA).
+ Return the destination node, and update EPS_VIA_NODES, return -1 in case
+ of errors. */
+
+static int
+internal_function
+proceed_next_node (const re_match_context_t *mctx, int nregs, regmatch_t *regs,
+ int *pidx, int node, re_node_set *eps_via_nodes,
+ struct re_fail_stack_t *fs)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ int i, err;
+ if (IS_EPSILON_NODE (dfa->nodes[node].type))
+ {
+ re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
+ re_node_set *edests = &dfa->edests[node];
+ int dest_node;
+ err = re_node_set_insert (eps_via_nodes, node);
+ if (BE (err < 0, 0))
+ return -2;
+ /* Pick up a valid destination, or return -1 if none is found. */
+ for (dest_node = -1, i = 0; i < edests->nelem; ++i)
+ {
+ int candidate = edests->elems[i];
+ if (!re_node_set_contains (cur_nodes, candidate))
+ continue;
+ if (dest_node == -1)
+ dest_node = candidate;
+
+ else
+ {
+ /* In order to avoid infinite loop like "(a*)*", return the second
+ epsilon-transition if the first was already considered. */
+ if (re_node_set_contains (eps_via_nodes, dest_node))
+ return candidate;
+
+ /* Otherwise, push the second epsilon-transition on the fail stack. */
+ else if (fs != NULL
+ && push_fail_stack (fs, *pidx, candidate, nregs, regs,
+ eps_via_nodes))
+ return -2;
+
+ /* We know we are going to exit. */
+ break;
+ }
+ }
+ return dest_node;
+ }
+ else
+ {
+ int naccepted = 0;
+ re_token_type_t type = dfa->nodes[node].type;
+
+#ifdef RE_ENABLE_I18N
+ if (dfa->nodes[node].accept_mb)
+ naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
+ else
+#endif /* RE_ENABLE_I18N */
+ if (type == OP_BACK_REF)
+ {
+ int subexp_idx = dfa->nodes[node].opr.idx + 1;
+ naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
+ if (fs != NULL)
+ {
+ if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
+ return -1;
+ else if (naccepted)
+ {
+ char *buf = (char *) re_string_get_buffer (&mctx->input);
+ if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
+ naccepted) != 0)
+ return -1;
+ }
+ }
+
+ if (naccepted == 0)
+ {
+ int dest_node;
+ err = re_node_set_insert (eps_via_nodes, node);
+ if (BE (err < 0, 0))
+ return -2;
+ dest_node = dfa->edests[node].elems[0];
+ if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
+ dest_node))
+ return dest_node;
+ }
+ }
+
+ if (naccepted != 0
+ || check_node_accept (mctx, dfa->nodes + node, *pidx))
+ {
+ int dest_node = dfa->nexts[node];
+ *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
+ if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
+ || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
+ dest_node)))
+ return -1;
+ re_node_set_empty (eps_via_nodes);
+ return dest_node;
+ }
+ }
+ return -1;
+}
+
+static reg_errcode_t
+internal_function
+push_fail_stack (struct re_fail_stack_t *fs, int str_idx, int dest_node,
+ int nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
+{
+ reg_errcode_t err;
+ int num = fs->num++;
+ if (fs->num == fs->alloc)
+ {
+ struct re_fail_stack_ent_t *new_array;
+ new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
+ * fs->alloc * 2));
+ if (new_array == NULL)
+ return REG_ESPACE;
+ fs->alloc *= 2;
+ fs->stack = new_array;
+ }
+ fs->stack[num].idx = str_idx;
+ fs->stack[num].node = dest_node;
+ fs->stack[num].regs = re_malloc (regmatch_t, nregs);
+ if (fs->stack[num].regs == NULL)
+ return REG_ESPACE;
+ memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
+ err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
+ return err;
+}
+
+static int
+internal_function
+pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs,
+ regmatch_t *regs, re_node_set *eps_via_nodes)
+{
+ int num = --fs->num;
+ assert (num >= 0);
+ *pidx = fs->stack[num].idx;
+ memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
+ re_node_set_free (eps_via_nodes);
+ re_free (fs->stack[num].regs);
+ *eps_via_nodes = fs->stack[num].eps_via_nodes;
+ return fs->stack[num].node;
+}
+
+/* Set the positions where the subexpressions are starts/ends to registers
+ PMATCH.
+ Note: We assume that pmatch[0] is already set, and
+ pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */
+
+static reg_errcode_t
+internal_function
+set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
+ regmatch_t *pmatch, int fl_backtrack)
+{
+ const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
+ int idx, cur_node;
+ re_node_set eps_via_nodes;
+ struct re_fail_stack_t *fs;
+ struct re_fail_stack_t fs_body = { 0, 2, NULL };
+ regmatch_t *prev_idx_match;
+ int prev_idx_match_malloced = 0;
+
+#ifdef DEBUG
+ assert (nmatch > 1);
+ assert (mctx->state_log != NULL);
+#endif
+ if (fl_backtrack)
+ {
+ fs = &fs_body;
+ fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
+ if (fs->stack == NULL)
+ return REG_ESPACE;
+ }
+ else
+ fs = NULL;
+
+ cur_node = dfa->init_node;
+ re_node_set_init_empty (&eps_via_nodes);
+
+ if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
+ prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
+ else
+ {
+ prev_idx_match = re_malloc (regmatch_t, nmatch);
+ if (prev_idx_match == NULL)
+ {
+ free_fail_stack_return (fs);
+ return REG_ESPACE;
+ }
+ prev_idx_match_malloced = 1;
+ }
+ memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
+
+ for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
+ {
+ update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
+
+ if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
+ {
+ int reg_idx;
+ if (fs)
+ {
+ for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
+ if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
+ break;
+ if (reg_idx == nmatch)
+ {
+ re_node_set_free (&eps_via_nodes);
+ if (prev_idx_match_malloced)
+ re_free (prev_idx_match);
+ return free_fail_stack_return (fs);
+ }
+ cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
+ &eps_via_nodes);
+ }
+ else
+ {
+ re_node_set_free (&eps_via_nodes);
+ if (prev_idx_match_malloced)
+ re_free (prev_idx_match);
+ return REG_NOERROR;
+ }
+ }
+
+ /* Proceed to next node. */
+ cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
+ &eps_via_nodes, fs);
+
+ if (BE (cur_node < 0, 0))
+ {
+ if (BE (cur_node == -2, 0))
+ {
+ re_node_set_free (&eps_via_nodes);
+ if (prev_idx_match_malloced)
+ re_free (prev_idx_match);
+ free_fail_stack_return (fs);
+ return REG_ESPACE;
+ }
+ if (fs)
+ cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
+ &eps_via_nodes);
+ else
+ {
+ re_node_set_free (&eps_via_nodes);
+ if (prev_idx_match_malloced)
+ re_free (prev_idx_match);
+ return REG_NOMATCH;
+ }
+ }
+ }
+ re_node_set_free (&eps_via_nodes);
+ if (prev_idx_match_malloced)
+ re_free (prev_idx_match);
+ return free_fail_stack_return (fs);
+}
+
+static reg_errcode_t
+internal_function
+free_fail_stack_return (struct re_fail_stack_t *fs)
+{
+ if (fs)
+ {
+ int fs_idx;
+ for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
+ {
+ re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
+ re_free (fs->stack[fs_idx].regs);
+ }
+ re_free (fs->stack);
+ }
+ return REG_NOERROR;
+}
+
+static void
+internal_function
+update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
+ regmatch_t *prev_idx_match, int cur_node, int cur_idx, int nmatch)
+{
+ int type = dfa->nodes[cur_node].type;
+ if (type == OP_OPEN_SUBEXP)
+ {
+ int reg_num = dfa->nodes[cur_node].opr.idx + 1;
+
+ /* We are at the first node of this sub expression. */
+ if (reg_num < nmatch)
+ {
+ pmatch[reg_num].rm_so = cur_idx;
+ pmatch[reg_num].rm_eo = -1;
+ }
+ }
+ else if (type == OP_CLOSE_SUBEXP)
+ {
+ int reg_num = dfa->nodes[cur_node].opr.idx + 1;
+ if (reg_num < nmatch)
+ {
+ /* We are at the last node of this sub expression. */
+ if (pmatch[reg_num].rm_so < cur_idx)
+ {
+ pmatch[reg_num].rm_eo = cur_idx;
+ /* This is a non-empty match or we are not inside an optional
+ subexpression. Accept this right away. */
+ memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
+ }
+ else
+ {
+ if (dfa->nodes[cur_node].opt_subexp
+ && prev_idx_match[reg_num].rm_so != -1)
+ /* We transited through an empty match for an optional
+ subexpression, like (a?)*, and this is not the subexp's
+ first match. Copy back the old content of the registers
+ so that matches of an inner subexpression are undone as
+ well, like in ((a?))*. */
+ memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
+ else
+ /* We completed a subexpression, but it may be part of
+ an optional one, so do not update PREV_IDX_MATCH. */
+ pmatch[reg_num].rm_eo = cur_idx;
+ }
+ }
+ }
+}
+
+/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
+ and sift the nodes in each states according to the following rules.
+ Updated state_log will be wrote to STATE_LOG.
+
+ Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
+ 1. When STR_IDX == MATCH_LAST(the last index in the state_log):
+ If `a' isn't the LAST_NODE and `a' can't epsilon transit to
+ the LAST_NODE, we throw away the node `a'.
+ 2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
+ string `s' and transit to `b':
+ i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
+ away the node `a'.
+ ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
+ thrown away, we throw away the node `a'.
+ 3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
+ i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
+ node `a'.
+ ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
+ we throw away the node `a'. */
+
+#define STATE_NODE_CONTAINS(state,node) \
+ ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
+
+static reg_errcode_t
+internal_function
+sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
+{
+ reg_errcode_t err;
+ int null_cnt = 0;
+ int str_idx = sctx->last_str_idx;
+ re_node_set cur_dest;
+
+#ifdef DEBUG
+ assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
+#endif
+
+ /* Build sifted state_log[str_idx]. It has the nodes which can epsilon
+ transit to the last_node and the last_node itself. */
+ err = re_node_set_init_1 (&cur_dest, sctx->last_node);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+
+ /* Then check each states in the state_log. */
+ while (str_idx > 0)
+ {
+ /* Update counters. */
+ null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
+ if (null_cnt > mctx->max_mb_elem_len)
+ {
+ memset (sctx->sifted_states, '\0',
+ sizeof (re_dfastate_t *) * str_idx);
+ re_node_set_free (&cur_dest);
+ return REG_NOERROR;
+ }
+ re_node_set_empty (&cur_dest);
+ --str_idx;
+
+ if (mctx->state_log[str_idx])
+ {
+ err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ }
+
+ /* Add all the nodes which satisfy the following conditions:
+ - It can epsilon transit to a node in CUR_DEST.
+ - It is in CUR_SRC.
+ And update state_log. */
+ err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ }
+ err = REG_NOERROR;
+ free_return:
+ re_node_set_free (&cur_dest);
+ return err;
+}
+
+static reg_errcode_t
+internal_function
+build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
+ int str_idx, re_node_set *cur_dest)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
+ int i;
+
+ /* Then build the next sifted state.
+ We build the next sifted state on `cur_dest', and update
+ `sifted_states[str_idx]' with `cur_dest'.
+ Note:
+ `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
+ `cur_src' points the node_set of the old `state_log[str_idx]'
+ (with the epsilon nodes pre-filtered out). */
+ for (i = 0; i < cur_src->nelem; i++)
+ {
+ int prev_node = cur_src->elems[i];
+ int naccepted = 0;
+ int ret;
+
+#ifdef DEBUG
+ re_token_type_t type = dfa->nodes[prev_node].type;
+ assert (!IS_EPSILON_NODE (type));
+#endif
+#ifdef RE_ENABLE_I18N
+ /* If the node may accept `multi byte'. */
+ if (dfa->nodes[prev_node].accept_mb)
+ naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
+ str_idx, sctx->last_str_idx);
+#endif /* RE_ENABLE_I18N */
+
+ /* We don't check backreferences here.
+ See update_cur_sifted_state(). */
+ if (!naccepted
+ && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
+ && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
+ dfa->nexts[prev_node]))
+ naccepted = 1;
+
+ if (naccepted == 0)
+ continue;
+
+ if (sctx->limits.nelem)
+ {
+ int to_idx = str_idx + naccepted;
+ if (check_dst_limits (mctx, &sctx->limits,
+ dfa->nexts[prev_node], to_idx,
+ prev_node, str_idx))
+ continue;
+ }
+ ret = re_node_set_insert (cur_dest, prev_node);
+ if (BE (ret == -1, 0))
+ return REG_ESPACE;
+ }
+
+ return REG_NOERROR;
+}
+
+/* Helper functions. */
+
+static reg_errcode_t
+internal_function
+clean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx)
+{
+ int top = mctx->state_log_top;
+
+ if (next_state_log_idx >= mctx->input.bufs_len
+ || (next_state_log_idx >= mctx->input.valid_len
+ && mctx->input.valid_len < mctx->input.len))
+ {
+ reg_errcode_t err;
+ err = extend_buffers (mctx);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+
+ if (top < next_state_log_idx)
+ {
+ memset (mctx->state_log + top + 1, '\0',
+ sizeof (re_dfastate_t *) * (next_state_log_idx - top));
+ mctx->state_log_top = next_state_log_idx;
+ }
+ return REG_NOERROR;
+}
+
+static reg_errcode_t
+internal_function
+merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
+ re_dfastate_t **src, int num)
+{
+ int st_idx;
+ reg_errcode_t err;
+ for (st_idx = 0; st_idx < num; ++st_idx)
+ {
+ if (dst[st_idx] == NULL)
+ dst[st_idx] = src[st_idx];
+ else if (src[st_idx] != NULL)
+ {
+ re_node_set merged_set;
+ err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
+ &src[st_idx]->nodes);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
+ re_node_set_free (&merged_set);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ }
+ return REG_NOERROR;
+}
+
+static reg_errcode_t
+internal_function
+update_cur_sifted_state (const re_match_context_t *mctx,
+ re_sift_context_t *sctx, int str_idx,
+ re_node_set *dest_nodes)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ reg_errcode_t err = REG_NOERROR;
+ const re_node_set *candidates;
+ candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
+ : &mctx->state_log[str_idx]->nodes);
+
+ if (dest_nodes->nelem == 0)
+ sctx->sifted_states[str_idx] = NULL;
+ else
+ {
+ if (candidates)
+ {
+ /* At first, add the nodes which can epsilon transit to a node in
+ DEST_NODE. */
+ err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+
+ /* Then, check the limitations in the current sift_context. */
+ if (sctx->limits.nelem)
+ {
+ err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
+ mctx->bkref_ents, str_idx);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ }
+
+ sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+
+ if (candidates && mctx->state_log[str_idx]->has_backref)
+ {
+ err = sift_states_bkref (mctx, sctx, str_idx, candidates);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ return REG_NOERROR;
+}
+
+static reg_errcode_t
+internal_function
+add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
+ const re_node_set *candidates)
+{
+ reg_errcode_t err = REG_NOERROR;
+ int i;
+
+ re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+
+ if (!state->inveclosure.alloc)
+ {
+ err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
+ if (BE (err != REG_NOERROR, 0))
+ return REG_ESPACE;
+ for (i = 0; i < dest_nodes->nelem; i++)
+ re_node_set_merge (&state->inveclosure,
+ dfa->inveclosures + dest_nodes->elems[i]);
+ }
+ return re_node_set_add_intersect (dest_nodes, candidates,
+ &state->inveclosure);
+}
+
+static reg_errcode_t
+internal_function
+sub_epsilon_src_nodes (const re_dfa_t *dfa, int node, re_node_set *dest_nodes,
+ const re_node_set *candidates)
+{
+ int ecl_idx;
+ reg_errcode_t err;
+ re_node_set *inv_eclosure = dfa->inveclosures + node;
+ re_node_set except_nodes;
+ re_node_set_init_empty (&except_nodes);
+ for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
+ {
+ int cur_node = inv_eclosure->elems[ecl_idx];
+ if (cur_node == node)
+ continue;
+ if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
+ {
+ int edst1 = dfa->edests[cur_node].elems[0];
+ int edst2 = ((dfa->edests[cur_node].nelem > 1)
+ ? dfa->edests[cur_node].elems[1] : -1);
+ if ((!re_node_set_contains (inv_eclosure, edst1)
+ && re_node_set_contains (dest_nodes, edst1))
+ || (edst2 > 0
+ && !re_node_set_contains (inv_eclosure, edst2)
+ && re_node_set_contains (dest_nodes, edst2)))
+ {
+ err = re_node_set_add_intersect (&except_nodes, candidates,
+ dfa->inveclosures + cur_node);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&except_nodes);
+ return err;
+ }
+ }
+ }
+ }
+ for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
+ {
+ int cur_node = inv_eclosure->elems[ecl_idx];
+ if (!re_node_set_contains (&except_nodes, cur_node))
+ {
+ int idx = re_node_set_contains (dest_nodes, cur_node) - 1;
+ re_node_set_remove_at (dest_nodes, idx);
+ }
+ }
+ re_node_set_free (&except_nodes);
+ return REG_NOERROR;
+}
+
+static int
+internal_function
+check_dst_limits (const re_match_context_t *mctx, re_node_set *limits,
+ int dst_node, int dst_idx, int src_node, int src_idx)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ int lim_idx, src_pos, dst_pos;
+
+ int dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
+ int src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
+ for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
+ {
+ int subexp_idx;
+ struct re_backref_cache_entry *ent;
+ ent = mctx->bkref_ents + limits->elems[lim_idx];
+ subexp_idx = dfa->nodes[ent->node].opr.idx;
+
+ dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
+ subexp_idx, dst_node, dst_idx,
+ dst_bkref_idx);
+ src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
+ subexp_idx, src_node, src_idx,
+ src_bkref_idx);
+
+ /* In case of:
+ <src> <dst> ( <subexp> )
+ ( <subexp> ) <src> <dst>
+ ( <subexp1> <src> <subexp2> <dst> <subexp3> ) */
+ if (src_pos == dst_pos)
+ continue; /* This is unrelated limitation. */
+ else
+ return 1;
+ }
+ return 0;
+}
+
+static int
+internal_function
+check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
+ int subexp_idx, int from_node, int bkref_idx)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ const re_node_set *eclosures = dfa->eclosures + from_node;
+ int node_idx;
+
+ /* Else, we are on the boundary: examine the nodes on the epsilon
+ closure. */
+ for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
+ {
+ int node = eclosures->elems[node_idx];
+ switch (dfa->nodes[node].type)
+ {
+ case OP_BACK_REF:
+ if (bkref_idx != -1)
+ {
+ struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
+ do
+ {
+ int dst, cpos;
+
+ if (ent->node != node)
+ continue;
+
+ if (subexp_idx < BITSET_WORD_BITS
+ && !(ent->eps_reachable_subexps_map
+ & ((bitset_word_t) 1 << subexp_idx)))
+ continue;
+
+ /* Recurse trying to reach the OP_OPEN_SUBEXP and
+ OP_CLOSE_SUBEXP cases below. But, if the
+ destination node is the same node as the source
+ node, don't recurse because it would cause an
+ infinite loop: a regex that exhibits this behavior
+ is ()\1*\1* */
+ dst = dfa->edests[node].elems[0];
+ if (dst == from_node)
+ {
+ if (boundaries & 1)
+ return -1;
+ else /* if (boundaries & 2) */
+ return 0;
+ }
+
+ cpos =
+ check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
+ dst, bkref_idx);
+ if (cpos == -1 /* && (boundaries & 1) */)
+ return -1;
+ if (cpos == 0 && (boundaries & 2))
+ return 0;
+
+ if (subexp_idx < BITSET_WORD_BITS)
+ ent->eps_reachable_subexps_map
+ &= ~((bitset_word_t) 1 << subexp_idx);
+ }
+ while (ent++->more);
+ }
+ break;
+
+ case OP_OPEN_SUBEXP:
+ if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
+ return -1;
+ break;
+
+ case OP_CLOSE_SUBEXP:
+ if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
+ return 0;
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ return (boundaries & 2) ? 1 : 0;
+}
+
+static int
+internal_function
+check_dst_limits_calc_pos (const re_match_context_t *mctx, int limit,
+ int subexp_idx, int from_node, int str_idx,
+ int bkref_idx)
+{
+ struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
+ int boundaries;
+
+ /* If we are outside the range of the subexpression, return -1 or 1. */
+ if (str_idx < lim->subexp_from)
+ return -1;
+
+ if (lim->subexp_to < str_idx)
+ return 1;
+
+ /* If we are within the subexpression, return 0. */
+ boundaries = (str_idx == lim->subexp_from);
+ boundaries |= (str_idx == lim->subexp_to) << 1;
+ if (boundaries == 0)
+ return 0;
+
+ /* Else, examine epsilon closure. */
+ return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
+ from_node, bkref_idx);
+}
+
+/* Check the limitations of sub expressions LIMITS, and remove the nodes
+ which are against limitations from DEST_NODES. */
+
+static reg_errcode_t
+internal_function
+check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
+ const re_node_set *candidates, re_node_set *limits,
+ struct re_backref_cache_entry *bkref_ents, int str_idx)
+{
+ reg_errcode_t err;
+ int node_idx, lim_idx;
+
+ for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
+ {
+ int subexp_idx;
+ struct re_backref_cache_entry *ent;
+ ent = bkref_ents + limits->elems[lim_idx];
+
+ if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
+ continue; /* This is unrelated limitation. */
+
+ subexp_idx = dfa->nodes[ent->node].opr.idx;
+ if (ent->subexp_to == str_idx)
+ {
+ int ops_node = -1;
+ int cls_node = -1;
+ for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
+ {
+ int node = dest_nodes->elems[node_idx];
+ re_token_type_t type = dfa->nodes[node].type;
+ if (type == OP_OPEN_SUBEXP
+ && subexp_idx == dfa->nodes[node].opr.idx)
+ ops_node = node;
+ else if (type == OP_CLOSE_SUBEXP
+ && subexp_idx == dfa->nodes[node].opr.idx)
+ cls_node = node;
+ }
+
+ /* Check the limitation of the open subexpression. */
+ /* Note that (ent->subexp_to = str_idx != ent->subexp_from). */
+ if (ops_node >= 0)
+ {
+ err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
+ candidates);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+
+ /* Check the limitation of the close subexpression. */
+ if (cls_node >= 0)
+ for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
+ {
+ int node = dest_nodes->elems[node_idx];
+ if (!re_node_set_contains (dfa->inveclosures + node,
+ cls_node)
+ && !re_node_set_contains (dfa->eclosures + node,
+ cls_node))
+ {
+ /* It is against this limitation.
+ Remove it form the current sifted state. */
+ err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
+ candidates);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ --node_idx;
+ }
+ }
+ }
+ else /* (ent->subexp_to != str_idx) */
+ {
+ for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
+ {
+ int node = dest_nodes->elems[node_idx];
+ re_token_type_t type = dfa->nodes[node].type;
+ if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
+ {
+ if (subexp_idx != dfa->nodes[node].opr.idx)
+ continue;
+ /* It is against this limitation.
+ Remove it form the current sifted state. */
+ err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
+ candidates);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ }
+ }
+ }
+ return REG_NOERROR;
+}
+
+static reg_errcode_t
+internal_function
+sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
+ int str_idx, const re_node_set *candidates)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ reg_errcode_t err;
+ int node_idx, node;
+ re_sift_context_t local_sctx;
+ int first_idx = search_cur_bkref_entry (mctx, str_idx);
+
+ if (first_idx == -1)
+ return REG_NOERROR;
+
+ local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */
+
+ for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
+ {
+ int enabled_idx;
+ re_token_type_t type;
+ struct re_backref_cache_entry *entry;
+ node = candidates->elems[node_idx];
+ type = dfa->nodes[node].type;
+ /* Avoid infinite loop for the REs like "()\1+". */
+ if (node == sctx->last_node && str_idx == sctx->last_str_idx)
+ continue;
+ if (type != OP_BACK_REF)
+ continue;
+
+ entry = mctx->bkref_ents + first_idx;
+ enabled_idx = first_idx;
+ do
+ {
+ int subexp_len;
+ int to_idx;
+ int dst_node;
+ int ret;
+ re_dfastate_t *cur_state;
+
+ if (entry->node != node)
+ continue;
+ subexp_len = entry->subexp_to - entry->subexp_from;
+ to_idx = str_idx + subexp_len;
+ dst_node = (subexp_len ? dfa->nexts[node]
+ : dfa->edests[node].elems[0]);
+
+ if (to_idx > sctx->last_str_idx
+ || sctx->sifted_states[to_idx] == NULL
+ || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
+ || check_dst_limits (mctx, &sctx->limits, node,
+ str_idx, dst_node, to_idx))
+ continue;
+
+ if (local_sctx.sifted_states == NULL)
+ {
+ local_sctx = *sctx;
+ err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ }
+ local_sctx.last_node = node;
+ local_sctx.last_str_idx = str_idx;
+ ret = re_node_set_insert (&local_sctx.limits, enabled_idx);
+ if (BE (ret < 0, 0))
+ {
+ err = REG_ESPACE;
+ goto free_return;
+ }
+ cur_state = local_sctx.sifted_states[str_idx];
+ err = sift_states_backward (mctx, &local_sctx);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ if (sctx->limited_states != NULL)
+ {
+ err = merge_state_array (dfa, sctx->limited_states,
+ local_sctx.sifted_states,
+ str_idx + 1);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ }
+ local_sctx.sifted_states[str_idx] = cur_state;
+ re_node_set_remove (&local_sctx.limits, enabled_idx);
+
+ /* mctx->bkref_ents may have changed, reload the pointer. */
+ entry = mctx->bkref_ents + enabled_idx;
+ }
+ while (enabled_idx++, entry++->more);
+ }
+ err = REG_NOERROR;
+ free_return:
+ if (local_sctx.sifted_states != NULL)
+ {
+ re_node_set_free (&local_sctx.limits);
+ }
+
+ return err;
+}
+
+
+#ifdef RE_ENABLE_I18N
+static int
+internal_function
+sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
+ int node_idx, int str_idx, int max_str_idx)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ int naccepted;
+ /* Check the node can accept `multi byte'. */
+ naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
+ if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
+ !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
+ dfa->nexts[node_idx]))
+ /* The node can't accept the `multi byte', or the
+ destination was already thrown away, then the node
+ could't accept the current input `multi byte'. */
+ naccepted = 0;
+ /* Otherwise, it is sure that the node could accept
+ `naccepted' bytes input. */
+ return naccepted;
+}
+#endif /* RE_ENABLE_I18N */
+
+
+/* Functions for state transition. */
+
+/* Return the next state to which the current state STATE will transit by
+ accepting the current input byte, and update STATE_LOG if necessary.
+ If STATE can accept a multibyte char/collating element/back reference
+ update the destination of STATE_LOG. */
+
+static re_dfastate_t *
+internal_function
+transit_state (reg_errcode_t *err, re_match_context_t *mctx,
+ re_dfastate_t *state)
+{
+ re_dfastate_t **trtable;
+ unsigned char ch;
+
+#ifdef RE_ENABLE_I18N
+ /* If the current state can accept multibyte. */
+ if (BE (state->accept_mb, 0))
+ {
+ *err = transit_state_mb (mctx, state);
+ if (BE (*err != REG_NOERROR, 0))
+ return NULL;
+ }
+#endif /* RE_ENABLE_I18N */
+
+ /* Then decide the next state with the single byte. */
+#if 0
+ if (0)
+ /* don't use transition table */
+ return transit_state_sb (err, mctx, state);
+#endif
+
+ /* Use transition table */
+ ch = re_string_fetch_byte (&mctx->input);
+ for (;;)
+ {
+ trtable = state->trtable;
+ if (BE (trtable != NULL, 1))
+ return trtable[ch];
+
+ trtable = state->word_trtable;
+ if (BE (trtable != NULL, 1))
+ {
+ unsigned int context;
+ context
+ = re_string_context_at (&mctx->input,
+ re_string_cur_idx (&mctx->input) - 1,
+ mctx->eflags);
+ if (IS_WORD_CONTEXT (context))
+ return trtable[ch + SBC_MAX];
+ else
+ return trtable[ch];
+ }
+
+ if (!build_trtable (mctx->dfa, state))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+
+ /* Retry, we now have a transition table. */
+ }
+}
+
+/* Update the state_log if we need */
+re_dfastate_t *
+internal_function
+merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
+ re_dfastate_t *next_state)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ int cur_idx = re_string_cur_idx (&mctx->input);
+
+ if (cur_idx > mctx->state_log_top)
+ {
+ mctx->state_log[cur_idx] = next_state;
+ mctx->state_log_top = cur_idx;
+ }
+ else if (mctx->state_log[cur_idx] == 0)
+ {
+ mctx->state_log[cur_idx] = next_state;
+ }
+ else
+ {
+ re_dfastate_t *pstate;
+ unsigned int context;
+ re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
+ /* If (state_log[cur_idx] != 0), it implies that cur_idx is
+ the destination of a multibyte char/collating element/
+ back reference. Then the next state is the union set of
+ these destinations and the results of the transition table. */
+ pstate = mctx->state_log[cur_idx];
+ log_nodes = pstate->entrance_nodes;
+ if (next_state != NULL)
+ {
+ table_nodes = next_state->entrance_nodes;
+ *err = re_node_set_init_union (&next_nodes, table_nodes,
+ log_nodes);
+ if (BE (*err != REG_NOERROR, 0))
+ return NULL;
+ }
+ else
+ next_nodes = *log_nodes;
+ /* Note: We already add the nodes of the initial state,
+ then we don't need to add them here. */
+
+ context = re_string_context_at (&mctx->input,
+ re_string_cur_idx (&mctx->input) - 1,
+ mctx->eflags);
+ next_state = mctx->state_log[cur_idx]
+ = re_acquire_state_context (err, dfa, &next_nodes, context);
+ /* We don't need to check errors here, since the return value of
+ this function is next_state and ERR is already set. */
+
+ if (table_nodes != NULL)
+ re_node_set_free (&next_nodes);
+ }
+
+ if (BE (dfa->nbackref, 0) && next_state != NULL)
+ {
+ /* Check OP_OPEN_SUBEXP in the current state in case that we use them
+ later. We must check them here, since the back references in the
+ next state might use them. */
+ *err = check_subexp_matching_top (mctx, &next_state->nodes,
+ cur_idx);
+ if (BE (*err != REG_NOERROR, 0))
+ return NULL;
+
+ /* If the next state has back references. */
+ if (next_state->has_backref)
+ {
+ *err = transit_state_bkref (mctx, &next_state->nodes);
+ if (BE (*err != REG_NOERROR, 0))
+ return NULL;
+ next_state = mctx->state_log[cur_idx];
+ }
+ }
+
+ return next_state;
+}
+
+/* Skip bytes in the input that correspond to part of a
+ multi-byte match, then look in the log for a state
+ from which to restart matching. */
+re_dfastate_t *
+internal_function
+find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
+{
+ re_dfastate_t *cur_state;
+ do
+ {
+ int max = mctx->state_log_top;
+ int cur_str_idx = re_string_cur_idx (&mctx->input);
+
+ do
+ {
+ if (++cur_str_idx > max)
+ return NULL;
+ re_string_skip_bytes (&mctx->input, 1);
+ }
+ while (mctx->state_log[cur_str_idx] == NULL);
+
+ cur_state = merge_state_with_log (err, mctx, NULL);
+ }
+ while (*err == REG_NOERROR && cur_state == NULL);
+ return cur_state;
+}
+
+/* Helper functions for transit_state. */
+
+/* From the node set CUR_NODES, pick up the nodes whose types are
+ OP_OPEN_SUBEXP and which have corresponding back references in the regular
+ expression. And register them to use them later for evaluating the
+ correspoding back references. */
+
+static reg_errcode_t
+internal_function
+check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
+ int str_idx)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ int node_idx;
+ reg_errcode_t err;
+
+ /* TODO: This isn't efficient.
+ Because there might be more than one nodes whose types are
+ OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
+ nodes.
+ E.g. RE: (a){2} */
+ for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
+ {
+ int node = cur_nodes->elems[node_idx];
+ if (dfa->nodes[node].type == OP_OPEN_SUBEXP
+ && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
+ && (dfa->used_bkref_map
+ & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
+ {
+ err = match_ctx_add_subtop (mctx, node, str_idx);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ }
+ return REG_NOERROR;
+}
+
+#if 0
+/* Return the next state to which the current state STATE will transit by
+ accepting the current input byte. */
+
+static re_dfastate_t *
+transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
+ re_dfastate_t *state)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ re_node_set next_nodes;
+ re_dfastate_t *next_state;
+ int node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
+ unsigned int context;
+
+ *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
+ if (BE (*err != REG_NOERROR, 0))
+ return NULL;
+ for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
+ {
+ int cur_node = state->nodes.elems[node_cnt];
+ if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
+ {
+ *err = re_node_set_merge (&next_nodes,
+ dfa->eclosures + dfa->nexts[cur_node]);
+ if (BE (*err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&next_nodes);
+ return NULL;
+ }
+ }
+ }
+ context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
+ next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
+ /* We don't need to check errors here, since the return value of
+ this function is next_state and ERR is already set. */
+
+ re_node_set_free (&next_nodes);
+ re_string_skip_bytes (&mctx->input, 1);
+ return next_state;
+}
+#endif
+
+#ifdef RE_ENABLE_I18N
+static reg_errcode_t
+internal_function
+transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ reg_errcode_t err;
+ int i;
+
+ for (i = 0; i < pstate->nodes.nelem; ++i)
+ {
+ re_node_set dest_nodes, *new_nodes;
+ int cur_node_idx = pstate->nodes.elems[i];
+ int naccepted, dest_idx;
+ unsigned int context;
+ re_dfastate_t *dest_state;
+
+ if (!dfa->nodes[cur_node_idx].accept_mb)
+ continue;
+
+ if (dfa->nodes[cur_node_idx].constraint)
+ {
+ context = re_string_context_at (&mctx->input,
+ re_string_cur_idx (&mctx->input),
+ mctx->eflags);
+ if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
+ context))
+ continue;
+ }
+
+ /* How many bytes the node can accept? */
+ naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
+ re_string_cur_idx (&mctx->input));
+ if (naccepted == 0)
+ continue;
+
+ /* The node can accepts `naccepted' bytes. */
+ dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
+ mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
+ : mctx->max_mb_elem_len);
+ err = clean_state_log_if_needed (mctx, dest_idx);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+#ifdef DEBUG
+ assert (dfa->nexts[cur_node_idx] != -1);
+#endif
+ new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
+
+ dest_state = mctx->state_log[dest_idx];
+ if (dest_state == NULL)
+ dest_nodes = *new_nodes;
+ else
+ {
+ err = re_node_set_init_union (&dest_nodes,
+ dest_state->entrance_nodes, new_nodes);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ context = re_string_context_at (&mctx->input, dest_idx - 1,
+ mctx->eflags);
+ mctx->state_log[dest_idx]
+ = re_acquire_state_context (&err, dfa, &dest_nodes, context);
+ if (dest_state != NULL)
+ re_node_set_free (&dest_nodes);
+ if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
+ return err;
+ }
+ return REG_NOERROR;
+}
+#endif /* RE_ENABLE_I18N */
+
+static reg_errcode_t
+internal_function
+transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ reg_errcode_t err;
+ int i;
+ int cur_str_idx = re_string_cur_idx (&mctx->input);
+
+ for (i = 0; i < nodes->nelem; ++i)
+ {
+ int dest_str_idx, prev_nelem, bkc_idx;
+ int node_idx = nodes->elems[i];
+ unsigned int context;
+ const re_token_t *node = dfa->nodes + node_idx;
+ re_node_set *new_dest_nodes;
+
+ /* Check whether `node' is a backreference or not. */
+ if (node->type != OP_BACK_REF)
+ continue;
+
+ if (node->constraint)
+ {
+ context = re_string_context_at (&mctx->input, cur_str_idx,
+ mctx->eflags);
+ if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
+ continue;
+ }
+
+ /* `node' is a backreference.
+ Check the substring which the substring matched. */
+ bkc_idx = mctx->nbkref_ents;
+ err = get_subexp (mctx, node_idx, cur_str_idx);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+
+ /* And add the epsilon closures (which is `new_dest_nodes') of
+ the backreference to appropriate state_log. */
+#ifdef DEBUG
+ assert (dfa->nexts[node_idx] != -1);
+#endif
+ for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
+ {
+ int subexp_len;
+ re_dfastate_t *dest_state;
+ struct re_backref_cache_entry *bkref_ent;
+ bkref_ent = mctx->bkref_ents + bkc_idx;
+ if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
+ continue;
+ subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
+ new_dest_nodes = (subexp_len == 0
+ ? dfa->eclosures + dfa->edests[node_idx].elems[0]
+ : dfa->eclosures + dfa->nexts[node_idx]);
+ dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
+ - bkref_ent->subexp_from);
+ context = re_string_context_at (&mctx->input, dest_str_idx - 1,
+ mctx->eflags);
+ dest_state = mctx->state_log[dest_str_idx];
+ prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
+ : mctx->state_log[cur_str_idx]->nodes.nelem);
+ /* Add `new_dest_node' to state_log. */
+ if (dest_state == NULL)
+ {
+ mctx->state_log[dest_str_idx]
+ = re_acquire_state_context (&err, dfa, new_dest_nodes,
+ context);
+ if (BE (mctx->state_log[dest_str_idx] == NULL
+ && err != REG_NOERROR, 0))
+ goto free_return;
+ }
+ else
+ {
+ re_node_set dest_nodes;
+ err = re_node_set_init_union (&dest_nodes,
+ dest_state->entrance_nodes,
+ new_dest_nodes);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&dest_nodes);
+ goto free_return;
+ }
+ mctx->state_log[dest_str_idx]
+ = re_acquire_state_context (&err, dfa, &dest_nodes, context);
+ re_node_set_free (&dest_nodes);
+ if (BE (mctx->state_log[dest_str_idx] == NULL
+ && err != REG_NOERROR, 0))
+ goto free_return;
+ }
+ /* We need to check recursively if the backreference can epsilon
+ transit. */
+ if (subexp_len == 0
+ && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
+ {
+ err = check_subexp_matching_top (mctx, new_dest_nodes,
+ cur_str_idx);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ err = transit_state_bkref (mctx, new_dest_nodes);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ }
+ }
+ }
+ err = REG_NOERROR;
+ free_return:
+ return err;
+}
+
+/* Enumerate all the candidates which the backreference BKREF_NODE can match
+ at BKREF_STR_IDX, and register them by match_ctx_add_entry().
+ Note that we might collect inappropriate candidates here.
+ However, the cost of checking them strictly here is too high, then we
+ delay these checking for prune_impossible_nodes(). */
+
+static reg_errcode_t
+internal_function
+get_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ int subexp_num, sub_top_idx;
+ const char *buf = (const char *) re_string_get_buffer (&mctx->input);
+ /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */
+ int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
+ if (cache_idx != -1)
+ {
+ const struct re_backref_cache_entry *entry
+ = mctx->bkref_ents + cache_idx;
+ do
+ if (entry->node == bkref_node)
+ return REG_NOERROR; /* We already checked it. */
+ while (entry++->more);
+ }
+
+ subexp_num = dfa->nodes[bkref_node].opr.idx;
+
+ /* For each sub expression */
+ for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
+ {
+ reg_errcode_t err;
+ re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
+ re_sub_match_last_t *sub_last;
+ int sub_last_idx, sl_str, bkref_str_off;
+
+ if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
+ continue; /* It isn't related. */
+
+ sl_str = sub_top->str_idx;
+ bkref_str_off = bkref_str_idx;
+ /* At first, check the last node of sub expressions we already
+ evaluated. */
+ for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
+ {
+ int sl_str_diff;
+ sub_last = sub_top->lasts[sub_last_idx];
+ sl_str_diff = sub_last->str_idx - sl_str;
+ /* The matched string by the sub expression match with the substring
+ at the back reference? */
+ if (sl_str_diff > 0)
+ {
+ if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
+ {
+ /* Not enough chars for a successful match. */
+ if (bkref_str_off + sl_str_diff > mctx->input.len)
+ break;
+
+ err = clean_state_log_if_needed (mctx,
+ bkref_str_off
+ + sl_str_diff);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ buf = (const char *) re_string_get_buffer (&mctx->input);
+ }
+ if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
+ /* We don't need to search this sub expression any more. */
+ break;
+ }
+ bkref_str_off += sl_str_diff;
+ sl_str += sl_str_diff;
+ err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
+ bkref_str_idx);
+
+ /* Reload buf, since the preceding call might have reallocated
+ the buffer. */
+ buf = (const char *) re_string_get_buffer (&mctx->input);
+
+ if (err == REG_NOMATCH)
+ continue;
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+
+ if (sub_last_idx < sub_top->nlasts)
+ continue;
+ if (sub_last_idx > 0)
+ ++sl_str;
+ /* Then, search for the other last nodes of the sub expression. */
+ for (; sl_str <= bkref_str_idx; ++sl_str)
+ {
+ int cls_node, sl_str_off;
+ const re_node_set *nodes;
+ sl_str_off = sl_str - sub_top->str_idx;
+ /* The matched string by the sub expression match with the substring
+ at the back reference? */
+ if (sl_str_off > 0)
+ {
+ if (BE (bkref_str_off >= mctx->input.valid_len, 0))
+ {
+ /* If we are at the end of the input, we cannot match. */
+ if (bkref_str_off >= mctx->input.len)
+ break;
+
+ err = extend_buffers (mctx);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+
+ buf = (const char *) re_string_get_buffer (&mctx->input);
+ }
+ if (buf [bkref_str_off++] != buf[sl_str - 1])
+ break; /* We don't need to search this sub expression
+ any more. */
+ }
+ if (mctx->state_log[sl_str] == NULL)
+ continue;
+ /* Does this state have a ')' of the sub expression? */
+ nodes = &mctx->state_log[sl_str]->nodes;
+ cls_node = find_subexp_node (dfa, nodes, subexp_num,
+ OP_CLOSE_SUBEXP);
+ if (cls_node == -1)
+ continue; /* No. */
+ if (sub_top->path == NULL)
+ {
+ sub_top->path = calloc (sizeof (state_array_t),
+ sl_str - sub_top->str_idx + 1);
+ if (sub_top->path == NULL)
+ return REG_ESPACE;
+ }
+ /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
+ in the current context? */
+ err = check_arrival (mctx, sub_top->path, sub_top->node,
+ sub_top->str_idx, cls_node, sl_str,
+ OP_CLOSE_SUBEXP);
+ if (err == REG_NOMATCH)
+ continue;
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
+ if (BE (sub_last == NULL, 0))
+ return REG_ESPACE;
+ err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
+ bkref_str_idx);
+ if (err == REG_NOMATCH)
+ continue;
+ }
+ }
+ return REG_NOERROR;
+}
+
+/* Helper functions for get_subexp(). */
+
+/* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
+ If it can arrive, register the sub expression expressed with SUB_TOP
+ and SUB_LAST. */
+
+static reg_errcode_t
+internal_function
+get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
+ re_sub_match_last_t *sub_last, int bkref_node, int bkref_str)
+{
+ reg_errcode_t err;
+ int to_idx;
+ /* Can the subexpression arrive the back reference? */
+ err = check_arrival (mctx, &sub_last->path, sub_last->node,
+ sub_last->str_idx, bkref_node, bkref_str,
+ OP_OPEN_SUBEXP);
+ if (err != REG_NOERROR)
+ return err;
+ err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
+ sub_last->str_idx);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
+ return clean_state_log_if_needed (mctx, to_idx);
+}
+
+/* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
+ Search '(' if FL_OPEN, or search ')' otherwise.
+ TODO: This function isn't efficient...
+ Because there might be more than one nodes whose types are
+ OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
+ nodes.
+ E.g. RE: (a){2} */
+
+static int
+internal_function
+find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
+ int subexp_idx, int type)
+{
+ int cls_idx;
+ for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
+ {
+ int cls_node = nodes->elems[cls_idx];
+ const re_token_t *node = dfa->nodes + cls_node;
+ if (node->type == type
+ && node->opr.idx == subexp_idx)
+ return cls_node;
+ }
+ return -1;
+}
+
+/* Check whether the node TOP_NODE at TOP_STR can arrive to the node
+ LAST_NODE at LAST_STR. We record the path onto PATH since it will be
+ heavily reused.
+ Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */
+
+static reg_errcode_t
+internal_function
+check_arrival (re_match_context_t *mctx, state_array_t *path, int top_node,
+ int top_str, int last_node, int last_str, int type)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ reg_errcode_t err = REG_NOERROR;
+ int subexp_num, backup_cur_idx, str_idx, null_cnt;
+ re_dfastate_t *cur_state = NULL;
+ re_node_set *cur_nodes, next_nodes;
+ re_dfastate_t **backup_state_log;
+ unsigned int context;
+
+ subexp_num = dfa->nodes[top_node].opr.idx;
+ /* Extend the buffer if we need. */
+ if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
+ {
+ re_dfastate_t **new_array;
+ int old_alloc = path->alloc;
+ path->alloc += last_str + mctx->max_mb_elem_len + 1;
+ new_array = re_realloc (path->array, re_dfastate_t *, path->alloc);
+ if (BE (new_array == NULL, 0))
+ {
+ path->alloc = old_alloc;
+ return REG_ESPACE;
+ }
+ path->array = new_array;
+ memset (new_array + old_alloc, '\0',
+ sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
+ }
+
+ str_idx = path->next_idx ? 0 : top_str;
+
+ /* Temporary modify MCTX. */
+ backup_state_log = mctx->state_log;
+ backup_cur_idx = mctx->input.cur_idx;
+ mctx->state_log = path->array;
+ mctx->input.cur_idx = str_idx;
+
+ /* Setup initial node set. */
+ context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
+ if (str_idx == top_str)
+ {
+ err = re_node_set_init_1 (&next_nodes, top_node);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&next_nodes);
+ return err;
+ }
+ }
+ else
+ {
+ cur_state = mctx->state_log[str_idx];
+ if (cur_state && cur_state->has_backref)
+ {
+ err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ else
+ re_node_set_init_empty (&next_nodes);
+ }
+ if (str_idx == top_str || (cur_state && cur_state->has_backref))
+ {
+ if (next_nodes.nelem)
+ {
+ err = expand_bkref_cache (mctx, &next_nodes, str_idx,
+ subexp_num, type);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&next_nodes);
+ return err;
+ }
+ }
+ cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
+ if (BE (cur_state == NULL && err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&next_nodes);
+ return err;
+ }
+ mctx->state_log[str_idx] = cur_state;
+ }
+
+ for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
+ {
+ re_node_set_empty (&next_nodes);
+ if (mctx->state_log[str_idx + 1])
+ {
+ err = re_node_set_merge (&next_nodes,
+ &mctx->state_log[str_idx + 1]->nodes);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&next_nodes);
+ return err;
+ }
+ }
+ if (cur_state)
+ {
+ err = check_arrival_add_next_nodes (mctx, str_idx,
+ &cur_state->non_eps_nodes,
+ &next_nodes);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&next_nodes);
+ return err;
+ }
+ }
+ ++str_idx;
+ if (next_nodes.nelem)
+ {
+ err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&next_nodes);
+ return err;
+ }
+ err = expand_bkref_cache (mctx, &next_nodes, str_idx,
+ subexp_num, type);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&next_nodes);
+ return err;
+ }
+ }
+ context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
+ cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
+ if (BE (cur_state == NULL && err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&next_nodes);
+ return err;
+ }
+ mctx->state_log[str_idx] = cur_state;
+ null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
+ }
+ re_node_set_free (&next_nodes);
+ cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
+ : &mctx->state_log[last_str]->nodes);
+ path->next_idx = str_idx;
+
+ /* Fix MCTX. */
+ mctx->state_log = backup_state_log;
+ mctx->input.cur_idx = backup_cur_idx;
+
+ /* Then check the current node set has the node LAST_NODE. */
+ if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
+ return REG_NOERROR;
+
+ return REG_NOMATCH;
+}
+
+/* Helper functions for check_arrival. */
+
+/* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
+ to NEXT_NODES.
+ TODO: This function is similar to the functions transit_state*(),
+ however this function has many additional works.
+ Can't we unify them? */
+
+static reg_errcode_t
+internal_function
+check_arrival_add_next_nodes (re_match_context_t *mctx, int str_idx,
+ re_node_set *cur_nodes, re_node_set *next_nodes)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ int result;
+ int cur_idx;
+ reg_errcode_t err = REG_NOERROR;
+ re_node_set union_set;
+ re_node_set_init_empty (&union_set);
+ for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
+ {
+ int naccepted = 0;
+ int cur_node = cur_nodes->elems[cur_idx];
+#ifdef DEBUG
+ re_token_type_t type = dfa->nodes[cur_node].type;
+ assert (!IS_EPSILON_NODE (type));
+#endif
+#ifdef RE_ENABLE_I18N
+ /* If the node may accept `multi byte'. */
+ if (dfa->nodes[cur_node].accept_mb)
+ {
+ naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
+ str_idx);
+ if (naccepted > 1)
+ {
+ re_dfastate_t *dest_state;
+ int next_node = dfa->nexts[cur_node];
+ int next_idx = str_idx + naccepted;
+ dest_state = mctx->state_log[next_idx];
+ re_node_set_empty (&union_set);
+ if (dest_state)
+ {
+ err = re_node_set_merge (&union_set, &dest_state->nodes);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&union_set);
+ return err;
+ }
+ }
+ result = re_node_set_insert (&union_set, next_node);
+ if (BE (result < 0, 0))
+ {
+ re_node_set_free (&union_set);
+ return REG_ESPACE;
+ }
+ mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
+ &union_set);
+ if (BE (mctx->state_log[next_idx] == NULL
+ && err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&union_set);
+ return err;
+ }
+ }
+ }
+#endif /* RE_ENABLE_I18N */
+ if (naccepted
+ || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
+ {
+ result = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
+ if (BE (result < 0, 0))
+ {
+ re_node_set_free (&union_set);
+ return REG_ESPACE;
+ }
+ }
+ }
+ re_node_set_free (&union_set);
+ return REG_NOERROR;
+}
+
+/* For all the nodes in CUR_NODES, add the epsilon closures of them to
+ CUR_NODES, however exclude the nodes which are:
+ - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
+ - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
+*/
+
+static reg_errcode_t
+internal_function
+check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
+ int ex_subexp, int type)
+{
+ reg_errcode_t err;
+ int idx, outside_node;
+ re_node_set new_nodes;
+#ifdef DEBUG
+ assert (cur_nodes->nelem);
+#endif
+ err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ /* Create a new node set NEW_NODES with the nodes which are epsilon
+ closures of the node in CUR_NODES. */
+
+ for (idx = 0; idx < cur_nodes->nelem; ++idx)
+ {
+ int cur_node = cur_nodes->elems[idx];
+ const re_node_set *eclosure = dfa->eclosures + cur_node;
+ outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
+ if (outside_node == -1)
+ {
+ /* There are no problematic nodes, just merge them. */
+ err = re_node_set_merge (&new_nodes, eclosure);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&new_nodes);
+ return err;
+ }
+ }
+ else
+ {
+ /* There are problematic nodes, re-calculate incrementally. */
+ err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
+ ex_subexp, type);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&new_nodes);
+ return err;
+ }
+ }
+ }
+ re_node_set_free (cur_nodes);
+ *cur_nodes = new_nodes;
+ return REG_NOERROR;
+}
+
+/* Helper function for check_arrival_expand_ecl.
+ Check incrementally the epsilon closure of TARGET, and if it isn't
+ problematic append it to DST_NODES. */
+
+static reg_errcode_t
+internal_function
+check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
+ int target, int ex_subexp, int type)
+{
+ int cur_node;
+ for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
+ {
+ int err;
+
+ if (dfa->nodes[cur_node].type == type
+ && dfa->nodes[cur_node].opr.idx == ex_subexp)
+ {
+ if (type == OP_CLOSE_SUBEXP)
+ {
+ err = re_node_set_insert (dst_nodes, cur_node);
+ if (BE (err == -1, 0))
+ return REG_ESPACE;
+ }
+ break;
+ }
+ err = re_node_set_insert (dst_nodes, cur_node);
+ if (BE (err == -1, 0))
+ return REG_ESPACE;
+ if (dfa->edests[cur_node].nelem == 0)
+ break;
+ if (dfa->edests[cur_node].nelem == 2)
+ {
+ err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
+ dfa->edests[cur_node].elems[1],
+ ex_subexp, type);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ cur_node = dfa->edests[cur_node].elems[0];
+ }
+ return REG_NOERROR;
+}
+
+
+/* For all the back references in the current state, calculate the
+ destination of the back references by the appropriate entry
+ in MCTX->BKREF_ENTS. */
+
+static reg_errcode_t
+internal_function
+expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
+ int cur_str, int subexp_num, int type)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ reg_errcode_t err;
+ int cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
+ struct re_backref_cache_entry *ent;
+
+ if (cache_idx_start == -1)
+ return REG_NOERROR;
+
+ restart:
+ ent = mctx->bkref_ents + cache_idx_start;
+ do
+ {
+ int to_idx, next_node;
+
+ /* Is this entry ENT is appropriate? */
+ if (!re_node_set_contains (cur_nodes, ent->node))
+ continue; /* No. */
+
+ to_idx = cur_str + ent->subexp_to - ent->subexp_from;
+ /* Calculate the destination of the back reference, and append it
+ to MCTX->STATE_LOG. */
+ if (to_idx == cur_str)
+ {
+ /* The backreference did epsilon transit, we must re-check all the
+ node in the current state. */
+ re_node_set new_dests;
+ reg_errcode_t err2, err3;
+ next_node = dfa->edests[ent->node].elems[0];
+ if (re_node_set_contains (cur_nodes, next_node))
+ continue;
+ err = re_node_set_init_1 (&new_dests, next_node);
+ err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
+ err3 = re_node_set_merge (cur_nodes, &new_dests);
+ re_node_set_free (&new_dests);
+ if (BE (err != REG_NOERROR || err2 != REG_NOERROR
+ || err3 != REG_NOERROR, 0))
+ {
+ err = (err != REG_NOERROR ? err
+ : (err2 != REG_NOERROR ? err2 : err3));
+ return err;
+ }
+ /* TODO: It is still inefficient... */
+ goto restart;
+ }
+ else
+ {
+ re_node_set union_set;
+ next_node = dfa->nexts[ent->node];
+ if (mctx->state_log[to_idx])
+ {
+ int ret;
+ if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
+ next_node))
+ continue;
+ err = re_node_set_init_copy (&union_set,
+ &mctx->state_log[to_idx]->nodes);
+ ret = re_node_set_insert (&union_set, next_node);
+ if (BE (err != REG_NOERROR || ret < 0, 0))
+ {
+ re_node_set_free (&union_set);
+ err = err != REG_NOERROR ? err : REG_ESPACE;
+ return err;
+ }
+ }
+ else
+ {
+ err = re_node_set_init_1 (&union_set, next_node);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
+ re_node_set_free (&union_set);
+ if (BE (mctx->state_log[to_idx] == NULL
+ && err != REG_NOERROR, 0))
+ return err;
+ }
+ }
+ while (ent++->more);
+ return REG_NOERROR;
+}
+
+/* Build transition table for the state.
+ Return 1 if succeeded, otherwise return NULL. */
+
+static int
+internal_function
+build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
+{
+ reg_errcode_t err;
+ int i, j, ch, need_word_trtable = 0;
+ bitset_word_t elem, mask;
+ bool dests_node_malloced = false;
+ bool dest_states_malloced = false;
+ int ndests; /* Number of the destination states from `state'. */
+ re_dfastate_t **trtable;
+ re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
+ re_node_set follows, *dests_node;
+ bitset_t *dests_ch;
+ bitset_t acceptable;
+
+ struct dests_alloc
+ {
+ re_node_set dests_node[SBC_MAX];
+ bitset_t dests_ch[SBC_MAX];
+ } *dests_alloc;
+
+ /* We build DFA states which corresponds to the destination nodes
+ from `state'. `dests_node[i]' represents the nodes which i-th
+ destination state contains, and `dests_ch[i]' represents the
+ characters which i-th destination state accepts. */
+ if (__libc_use_alloca (sizeof (struct dests_alloc)))
+ dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
+ else
+ {
+ dests_alloc = re_malloc (struct dests_alloc, 1);
+ if (BE (dests_alloc == NULL, 0))
+ return 0;
+ dests_node_malloced = true;
+ }
+ dests_node = dests_alloc->dests_node;
+ dests_ch = dests_alloc->dests_ch;
+
+ /* Initialize transiton table. */
+ state->word_trtable = state->trtable = NULL;
+
+ /* At first, group all nodes belonging to `state' into several
+ destinations. */
+ ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
+ if (BE (ndests <= 0, 0))
+ {
+ if (dests_node_malloced)
+ free (dests_alloc);
+ /* Return 0 in case of an error, 1 otherwise. */
+ if (ndests == 0)
+ {
+ state->trtable = (re_dfastate_t **)
+ calloc (sizeof (re_dfastate_t *), SBC_MAX);
+ return 1;
+ }
+ return 0;
+ }
+
+ err = re_node_set_alloc (&follows, ndests + 1);
+ if (BE (err != REG_NOERROR, 0))
+ goto out_free;
+
+ if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
+ + ndests * 3 * sizeof (re_dfastate_t *)))
+ dest_states = (re_dfastate_t **)
+ alloca (ndests * 3 * sizeof (re_dfastate_t *));
+ else
+ {
+ dest_states = (re_dfastate_t **)
+ malloc (ndests * 3 * sizeof (re_dfastate_t *));
+ if (BE (dest_states == NULL, 0))
+ {
+out_free:
+ if (dest_states_malloced)
+ free (dest_states);
+ re_node_set_free (&follows);
+ for (i = 0; i < ndests; ++i)
+ re_node_set_free (dests_node + i);
+ if (dests_node_malloced)
+ free (dests_alloc);
+ return 0;
+ }
+ dest_states_malloced = true;
+ }
+ dest_states_word = dest_states + ndests;
+ dest_states_nl = dest_states_word + ndests;
+ bitset_empty (acceptable);
+
+ /* Then build the states for all destinations. */
+ for (i = 0; i < ndests; ++i)
+ {
+ int next_node;
+ re_node_set_empty (&follows);
+ /* Merge the follows of this destination states. */
+ for (j = 0; j < dests_node[i].nelem; ++j)
+ {
+ next_node = dfa->nexts[dests_node[i].elems[j]];
+ if (next_node != -1)
+ {
+ err = re_node_set_merge (&follows, dfa->eclosures + next_node);
+ if (BE (err != REG_NOERROR, 0))
+ goto out_free;
+ }
+ }
+ dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
+ if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
+ goto out_free;
+ /* If the new state has context constraint,
+ build appropriate states for these contexts. */
+ if (dest_states[i]->has_constraint)
+ {
+ dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
+ CONTEXT_WORD);
+ if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
+ goto out_free;
+
+ if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
+ need_word_trtable = 1;
+
+ dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
+ CONTEXT_NEWLINE);
+ if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
+ goto out_free;
+ }
+ else
+ {
+ dest_states_word[i] = dest_states[i];
+ dest_states_nl[i] = dest_states[i];
+ }
+ bitset_merge (acceptable, dests_ch[i]);
+ }
+
+ if (!BE (need_word_trtable, 0))
+ {
+ /* We don't care about whether the following character is a word
+ character, or we are in a single-byte character set so we can
+ discern by looking at the character code: allocate a
+ 256-entry transition table. */
+ trtable = state->trtable =
+ (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
+ if (BE (trtable == NULL, 0))
+ goto out_free;
+
+ /* For all characters ch...: */
+ for (i = 0; i < BITSET_WORDS; ++i)
+ for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
+ elem;
+ mask <<= 1, elem >>= 1, ++ch)
+ if (BE (elem & 1, 0))
+ {
+ /* There must be exactly one destination which accepts
+ character ch. See group_nodes_into_DFAstates. */
+ for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
+ ;
+
+ /* j-th destination accepts the word character ch. */
+ if (dfa->word_char[i] & mask)
+ trtable[ch] = dest_states_word[j];
+ else
+ trtable[ch] = dest_states[j];
+ }
+ }
+ else
+ {
+ /* We care about whether the following character is a word
+ character, and we are in a multi-byte character set: discern
+ by looking at the character code: build two 256-entry
+ transition tables, one starting at trtable[0] and one
+ starting at trtable[SBC_MAX]. */
+ trtable = state->word_trtable =
+ (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
+ if (BE (trtable == NULL, 0))
+ goto out_free;
+
+ /* For all characters ch...: */
+ for (i = 0; i < BITSET_WORDS; ++i)
+ for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
+ elem;
+ mask <<= 1, elem >>= 1, ++ch)
+ if (BE (elem & 1, 0))
+ {
+ /* There must be exactly one destination which accepts
+ character ch. See group_nodes_into_DFAstates. */
+ for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
+ ;
+
+ /* j-th destination accepts the word character ch. */
+ trtable[ch] = dest_states[j];
+ trtable[ch + SBC_MAX] = dest_states_word[j];
+ }
+ }
+
+ /* new line */
+ if (bitset_contain (acceptable, NEWLINE_CHAR))
+ {
+ /* The current state accepts newline character. */
+ for (j = 0; j < ndests; ++j)
+ if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
+ {
+ /* k-th destination accepts newline character. */
+ trtable[NEWLINE_CHAR] = dest_states_nl[j];
+ if (need_word_trtable)
+ trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
+ /* There must be only one destination which accepts
+ newline. See group_nodes_into_DFAstates. */
+ break;
+ }
+ }
+
+ if (dest_states_malloced)
+ free (dest_states);
+
+ re_node_set_free (&follows);
+ for (i = 0; i < ndests; ++i)
+ re_node_set_free (dests_node + i);
+
+ if (dests_node_malloced)
+ free (dests_alloc);
+
+ return 1;
+}
+
+/* Group all nodes belonging to STATE into several destinations.
+ Then for all destinations, set the nodes belonging to the destination
+ to DESTS_NODE[i] and set the characters accepted by the destination
+ to DEST_CH[i]. This function return the number of destinations. */
+
+static int
+internal_function
+group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
+ re_node_set *dests_node, bitset_t *dests_ch)
+{
+ reg_errcode_t err;
+ int result;
+ int i, j, k;
+ int ndests; /* Number of the destinations from `state'. */
+ bitset_t accepts; /* Characters a node can accept. */
+ const re_node_set *cur_nodes = &state->nodes;
+ bitset_empty (accepts);
+ ndests = 0;
+
+ /* For all the nodes belonging to `state', */
+ for (i = 0; i < cur_nodes->nelem; ++i)
+ {
+ re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
+ re_token_type_t type = node->type;
+ unsigned int constraint = node->constraint;
+
+ /* Enumerate all single byte character this node can accept. */
+ if (type == CHARACTER)
+ bitset_set (accepts, node->opr.c);
+ else if (type == SIMPLE_BRACKET)
+ {
+ bitset_merge (accepts, node->opr.sbcset);
+ }
+ else if (type == OP_PERIOD)
+ {
+#ifdef RE_ENABLE_I18N
+ if (dfa->mb_cur_max > 1)
+ bitset_merge (accepts, dfa->sb_char);
+ else
+#endif
+ bitset_set_all (accepts);
+ if (!(dfa->syntax & RE_DOT_NEWLINE))
+ bitset_clear (accepts, '\n');
+ if (dfa->syntax & RE_DOT_NOT_NULL)
+ bitset_clear (accepts, '\0');
+ }
+#ifdef RE_ENABLE_I18N
+ else if (type == OP_UTF8_PERIOD)
+ {
+ memset (accepts, '\xff', sizeof (bitset_t) / 2);
+ if (!(dfa->syntax & RE_DOT_NEWLINE))
+ bitset_clear (accepts, '\n');
+ if (dfa->syntax & RE_DOT_NOT_NULL)
+ bitset_clear (accepts, '\0');
+ }
+#endif
+ else
+ continue;
+
+ /* Check the `accepts' and sift the characters which are not
+ match it the context. */
+ if (constraint)
+ {
+ if (constraint & NEXT_NEWLINE_CONSTRAINT)
+ {
+ bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
+ bitset_empty (accepts);
+ if (accepts_newline)
+ bitset_set (accepts, NEWLINE_CHAR);
+ else
+ continue;
+ }
+ if (constraint & NEXT_ENDBUF_CONSTRAINT)
+ {
+ bitset_empty (accepts);
+ continue;
+ }
+
+ if (constraint & NEXT_WORD_CONSTRAINT)
+ {
+ bitset_word_t any_set = 0;
+ if (type == CHARACTER && !node->word_char)
+ {
+ bitset_empty (accepts);
+ continue;
+ }
+#ifdef RE_ENABLE_I18N
+ if (dfa->mb_cur_max > 1)
+ for (j = 0; j < BITSET_WORDS; ++j)
+ any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
+ else
+#endif
+ for (j = 0; j < BITSET_WORDS; ++j)
+ any_set |= (accepts[j] &= dfa->word_char[j]);
+ if (!any_set)
+ continue;
+ }
+ if (constraint & NEXT_NOTWORD_CONSTRAINT)
+ {
+ bitset_word_t any_set = 0;
+ if (type == CHARACTER && node->word_char)
+ {
+ bitset_empty (accepts);
+ continue;
+ }
+#ifdef RE_ENABLE_I18N
+ if (dfa->mb_cur_max > 1)
+ for (j = 0; j < BITSET_WORDS; ++j)
+ any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
+ else
+#endif
+ for (j = 0; j < BITSET_WORDS; ++j)
+ any_set |= (accepts[j] &= ~dfa->word_char[j]);
+ if (!any_set)
+ continue;
+ }
+ }
+
+ /* Then divide `accepts' into DFA states, or create a new
+ state. Above, we make sure that accepts is not empty. */
+ for (j = 0; j < ndests; ++j)
+ {
+ bitset_t intersec; /* Intersection sets, see below. */
+ bitset_t remains;
+ /* Flags, see below. */
+ bitset_word_t has_intersec, not_subset, not_consumed;
+
+ /* Optimization, skip if this state doesn't accept the character. */
+ if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
+ continue;
+
+ /* Enumerate the intersection set of this state and `accepts'. */
+ has_intersec = 0;
+ for (k = 0; k < BITSET_WORDS; ++k)
+ has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
+ /* And skip if the intersection set is empty. */
+ if (!has_intersec)
+ continue;
+
+ /* Then check if this state is a subset of `accepts'. */
+ not_subset = not_consumed = 0;
+ for (k = 0; k < BITSET_WORDS; ++k)
+ {
+ not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
+ not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
+ }
+
+ /* If this state isn't a subset of `accepts', create a
+ new group state, which has the `remains'. */
+ if (not_subset)
+ {
+ bitset_copy (dests_ch[ndests], remains);
+ bitset_copy (dests_ch[j], intersec);
+ err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
+ if (BE (err != REG_NOERROR, 0))
+ goto error_return;
+ ++ndests;
+ }
+
+ /* Put the position in the current group. */
+ result = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
+ if (BE (result < 0, 0))
+ goto error_return;
+
+ /* If all characters are consumed, go to next node. */
+ if (!not_consumed)
+ break;
+ }
+ /* Some characters remain, create a new group. */
+ if (j == ndests)
+ {
+ bitset_copy (dests_ch[ndests], accepts);
+ err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
+ if (BE (err != REG_NOERROR, 0))
+ goto error_return;
+ ++ndests;
+ bitset_empty (accepts);
+ }
+ }
+ return ndests;
+ error_return:
+ for (j = 0; j < ndests; ++j)
+ re_node_set_free (dests_node + j);
+ return -1;
+}
+
+#ifdef RE_ENABLE_I18N
+/* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
+ Return the number of the bytes the node accepts.
+ STR_IDX is the current index of the input string.
+
+ This function handles the nodes which can accept one character, or
+ one collating element like '.', '[a-z]', opposite to the other nodes
+ can only accept one byte. */
+
+static int
+internal_function
+check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
+ const re_string_t *input, int str_idx)
+{
+ const re_token_t *node = dfa->nodes + node_idx;
+ int char_len, elem_len;
+ int i;
+
+ if (BE (node->type == OP_UTF8_PERIOD, 0))
+ {
+ unsigned char c = re_string_byte_at (input, str_idx), d;
+ if (BE (c < 0xc2, 1))
+ return 0;
+
+ if (str_idx + 2 > input->len)
+ return 0;
+
+ d = re_string_byte_at (input, str_idx + 1);
+ if (c < 0xe0)
+ return (d < 0x80 || d > 0xbf) ? 0 : 2;
+ else if (c < 0xf0)
+ {
+ char_len = 3;
+ if (c == 0xe0 && d < 0xa0)
+ return 0;
+ }
+ else if (c < 0xf8)
+ {
+ char_len = 4;
+ if (c == 0xf0 && d < 0x90)
+ return 0;
+ }
+ else if (c < 0xfc)
+ {
+ char_len = 5;
+ if (c == 0xf8 && d < 0x88)
+ return 0;
+ }
+ else if (c < 0xfe)
+ {
+ char_len = 6;
+ if (c == 0xfc && d < 0x84)
+ return 0;
+ }
+ else
+ return 0;
+
+ if (str_idx + char_len > input->len)
+ return 0;
+
+ for (i = 1; i < char_len; ++i)
+ {
+ d = re_string_byte_at (input, str_idx + i);
+ if (d < 0x80 || d > 0xbf)
+ return 0;
+ }
+ return char_len;
+ }
+
+ char_len = re_string_char_size_at (input, str_idx);
+ if (node->type == OP_PERIOD)
+ {
+ if (char_len <= 1)
+ return 0;
+ /* FIXME: I don't think this if is needed, as both '\n'
+ and '\0' are char_len == 1. */
+ /* '.' accepts any one character except the following two cases. */
+ if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
+ re_string_byte_at (input, str_idx) == '\n') ||
+ ((dfa->syntax & RE_DOT_NOT_NULL) &&
+ re_string_byte_at (input, str_idx) == '\0'))
+ return 0;
+ return char_len;
+ }
+
+ elem_len = re_string_elem_size_at (input, str_idx);
+ if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
+ return 0;
+
+ if (node->type == COMPLEX_BRACKET)
+ {
+ const re_charset_t *cset = node->opr.mbcset;
+# ifdef _LIBC
+ const unsigned char *pin
+ = ((const unsigned char *) re_string_get_buffer (input) + str_idx);
+ int j;
+ uint32_t nrules;
+# endif /* _LIBC */
+ int match_len = 0;
+ wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
+ ? re_string_wchar_at (input, str_idx) : 0);
+
+ /* match with multibyte character? */
+ for (i = 0; i < cset->nmbchars; ++i)
+ if (wc == cset->mbchars[i])
+ {
+ match_len = char_len;
+ goto check_node_accept_bytes_match;
+ }
+ /* match with character_class? */
+ for (i = 0; i < cset->nchar_classes; ++i)
+ {
+ wctype_t wt = cset->char_classes[i];
+ if (__iswctype (wc, wt))
+ {
+ match_len = char_len;
+ goto check_node_accept_bytes_match;
+ }
+ }
+
+# ifdef _LIBC
+ nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+ if (nrules != 0)
+ {
+ unsigned int in_collseq = 0;
+ const int32_t *table, *indirect;
+ const unsigned char *weights, *extra;
+ const char *collseqwc;
+ /* This #include defines a local function! */
+# include <locale/weight.h>
+
+ /* match with collating_symbol? */
+ if (cset->ncoll_syms)
+ extra = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
+ for (i = 0; i < cset->ncoll_syms; ++i)
+ {
+ const unsigned char *coll_sym = extra + cset->coll_syms[i];
+ /* Compare the length of input collating element and
+ the length of current collating element. */
+ if (*coll_sym != elem_len)
+ continue;
+ /* Compare each bytes. */
+ for (j = 0; j < *coll_sym; j++)
+ if (pin[j] != coll_sym[1 + j])
+ break;
+ if (j == *coll_sym)
+ {
+ /* Match if every bytes is equal. */
+ match_len = j;
+ goto check_node_accept_bytes_match;
+ }
+ }
+
+ if (cset->nranges)
+ {
+ if (elem_len <= char_len)
+ {
+ collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
+ in_collseq = __collseq_table_lookup (collseqwc, wc);
+ }
+ else
+ in_collseq = find_collation_sequence_value (pin, elem_len);
+ }
+ /* match with range expression? */
+ for (i = 0; i < cset->nranges; ++i)
+ if (cset->range_starts[i] <= in_collseq
+ && in_collseq <= cset->range_ends[i])
+ {
+ match_len = elem_len;
+ goto check_node_accept_bytes_match;
+ }
+
+ /* match with equivalence_class? */
+ if (cset->nequiv_classes)
+ {
+ const unsigned char *cp = pin;
+ table = (const int32_t *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
+ weights = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
+ extra = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
+ indirect = (const int32_t *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
+ int32_t idx = findidx (&cp);
+ if (idx > 0)
+ for (i = 0; i < cset->nequiv_classes; ++i)
+ {
+ int32_t equiv_class_idx = cset->equiv_classes[i];
+ size_t weight_len = weights[idx & 0xffffff];
+ if (weight_len == weights[equiv_class_idx & 0xffffff]
+ && (idx >> 24) == (equiv_class_idx >> 24))
+ {
+ int cnt = 0;
+
+ idx &= 0xffffff;
+ equiv_class_idx &= 0xffffff;
+
+ while (cnt <= weight_len
+ && (weights[equiv_class_idx + 1 + cnt]
+ == weights[idx + 1 + cnt]))
+ ++cnt;
+ if (cnt > weight_len)
+ {
+ match_len = elem_len;
+ goto check_node_accept_bytes_match;
+ }
+ }
+ }
+ }
+ }
+ else
+# endif /* _LIBC */
+ {
+ /* match with range expression? */
+#if __GNUC__ >= 2
+ wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
+#else
+ wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
+ cmp_buf[2] = wc;
+#endif
+ for (i = 0; i < cset->nranges; ++i)
+ {
+ cmp_buf[0] = cset->range_starts[i];
+ cmp_buf[4] = cset->range_ends[i];
+ if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
+ && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
+ {
+ match_len = char_len;
+ goto check_node_accept_bytes_match;
+ }
+ }
+ }
+ check_node_accept_bytes_match:
+ if (!cset->non_match)
+ return match_len;
+ else
+ {
+ if (match_len > 0)
+ return 0;
+ else
+ return (elem_len > char_len) ? elem_len : char_len;
+ }
+ }
+ return 0;
+}
+
+# ifdef _LIBC
+static unsigned int
+internal_function
+find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
+{
+ uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+ if (nrules == 0)
+ {
+ if (mbs_len == 1)
+ {
+ /* No valid character. Match it as a single byte character. */
+ const unsigned char *collseq = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
+ return collseq[mbs[0]];
+ }
+ return UINT_MAX;
+ }
+ else
+ {
+ int32_t idx;
+ const unsigned char *extra = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
+ int32_t extrasize = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
+
+ for (idx = 0; idx < extrasize;)
+ {
+ int mbs_cnt, found = 0;
+ int32_t elem_mbs_len;
+ /* Skip the name of collating element name. */
+ idx = idx + extra[idx] + 1;
+ elem_mbs_len = extra[idx++];
+ if (mbs_len == elem_mbs_len)
+ {
+ for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
+ if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
+ break;
+ if (mbs_cnt == elem_mbs_len)
+ /* Found the entry. */
+ found = 1;
+ }
+ /* Skip the byte sequence of the collating element. */
+ idx += elem_mbs_len;
+ /* Adjust for the alignment. */
+ idx = (idx + 3) & ~3;
+ /* Skip the collation sequence value. */
+ idx += sizeof (uint32_t);
+ /* Skip the wide char sequence of the collating element. */
+ idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
+ /* If we found the entry, return the sequence value. */
+ if (found)
+ return *(uint32_t *) (extra + idx);
+ /* Skip the collation sequence value. */
+ idx += sizeof (uint32_t);
+ }
+ return UINT_MAX;
+ }
+}
+# endif /* _LIBC */
+#endif /* RE_ENABLE_I18N */
+
+/* Check whether the node accepts the byte which is IDX-th
+ byte of the INPUT. */
+
+static int
+internal_function
+check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
+ int idx)
+{
+ unsigned char ch;
+ ch = re_string_byte_at (&mctx->input, idx);
+ switch (node->type)
+ {
+ case CHARACTER:
+ if (node->opr.c != ch)
+ return 0;
+ break;
+
+ case SIMPLE_BRACKET:
+ if (!bitset_contain (node->opr.sbcset, ch))
+ return 0;
+ break;
+
+#ifdef RE_ENABLE_I18N
+ case OP_UTF8_PERIOD:
+ if (ch >= 0x80)
+ return 0;
+ /* FALLTHROUGH */
+#endif
+ case OP_PERIOD:
+ if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
+ || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
+ return 0;
+ break;
+
+ default:
+ return 0;
+ }
+
+ if (node->constraint)
+ {
+ /* The node has constraints. Check whether the current context
+ satisfies the constraints. */
+ unsigned int context = re_string_context_at (&mctx->input, idx,
+ mctx->eflags);
+ if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
+ return 0;
+ }
+
+ return 1;
+}
+
+/* Extend the buffers, if the buffers have run out. */
+
+static reg_errcode_t
+internal_function
+extend_buffers (re_match_context_t *mctx)
+{
+ reg_errcode_t ret;
+ re_string_t *pstr = &mctx->input;
+
+ /* Double the lengthes of the buffers. */
+ ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+
+ if (mctx->state_log != NULL)
+ {
+ /* And double the length of state_log. */
+ /* XXX We have no indication of the size of this buffer. If this
+ allocation fail we have no indication that the state_log array
+ does not have the right size. */
+ re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
+ pstr->bufs_len + 1);
+ if (BE (new_array == NULL, 0))
+ return REG_ESPACE;
+ mctx->state_log = new_array;
+ }
+
+ /* Then reconstruct the buffers. */
+ if (pstr->icase)
+ {
+#ifdef RE_ENABLE_I18N
+ if (pstr->mb_cur_max > 1)
+ {
+ ret = build_wcs_upper_buffer (pstr);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+ }
+ else
+#endif /* RE_ENABLE_I18N */
+ build_upper_buffer (pstr);
+ }
+ else
+ {
+#ifdef RE_ENABLE_I18N
+ if (pstr->mb_cur_max > 1)
+ build_wcs_buffer (pstr);
+ else
+#endif /* RE_ENABLE_I18N */
+ {
+ if (pstr->trans != NULL)
+ re_string_translate_buffer (pstr);
+ }
+ }
+ return REG_NOERROR;
+}
+
+
+/* Functions for matching context. */
+
+/* Initialize MCTX. */
+
+static reg_errcode_t
+internal_function
+match_ctx_init (re_match_context_t *mctx, int eflags, int n)
+{
+ mctx->eflags = eflags;
+ mctx->match_last = -1;
+ if (n > 0)
+ {
+ mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
+ mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
+ if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
+ return REG_ESPACE;
+ }
+ /* Already zero-ed by the caller.
+ else
+ mctx->bkref_ents = NULL;
+ mctx->nbkref_ents = 0;
+ mctx->nsub_tops = 0; */
+ mctx->abkref_ents = n;
+ mctx->max_mb_elem_len = 1;
+ mctx->asub_tops = n;
+ return REG_NOERROR;
+}
+
+/* Clean the entries which depend on the current input in MCTX.
+ This function must be invoked when the matcher changes the start index
+ of the input, or changes the input string. */
+
+static void
+internal_function
+match_ctx_clean (re_match_context_t *mctx)
+{
+ int st_idx;
+ for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
+ {
+ int sl_idx;
+ re_sub_match_top_t *top = mctx->sub_tops[st_idx];
+ for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
+ {
+ re_sub_match_last_t *last = top->lasts[sl_idx];
+ re_free (last->path.array);
+ re_free (last);
+ }
+ re_free (top->lasts);
+ if (top->path)
+ {
+ re_free (top->path->array);
+ re_free (top->path);
+ }
+ free (top);
+ }
+
+ mctx->nsub_tops = 0;
+ mctx->nbkref_ents = 0;
+}
+
+/* Free all the memory associated with MCTX. */
+
+static void
+internal_function
+match_ctx_free (re_match_context_t *mctx)
+{
+ /* First, free all the memory associated with MCTX->SUB_TOPS. */
+ match_ctx_clean (mctx);
+ re_free (mctx->sub_tops);
+ re_free (mctx->bkref_ents);
+}
+
+/* Add a new backreference entry to MCTX.
+ Note that we assume that caller never call this function with duplicate
+ entry, and call with STR_IDX which isn't smaller than any existing entry.
+*/
+
+static reg_errcode_t
+internal_function
+match_ctx_add_entry (re_match_context_t *mctx, int node, int str_idx, int from,
+ int to)
+{
+ if (mctx->nbkref_ents >= mctx->abkref_ents)
+ {
+ struct re_backref_cache_entry* new_entry;
+ new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
+ mctx->abkref_ents * 2);
+ if (BE (new_entry == NULL, 0))
+ {
+ re_free (mctx->bkref_ents);
+ return REG_ESPACE;
+ }
+ mctx->bkref_ents = new_entry;
+ memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
+ sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
+ mctx->abkref_ents *= 2;
+ }
+ if (mctx->nbkref_ents > 0
+ && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
+ mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
+
+ mctx->bkref_ents[mctx->nbkref_ents].node = node;
+ mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
+ mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
+ mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
+
+ /* This is a cache that saves negative results of check_dst_limits_calc_pos.
+ If bit N is clear, means that this entry won't epsilon-transition to
+ an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression. If
+ it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
+ such node.
+
+ A backreference does not epsilon-transition unless it is empty, so set
+ to all zeros if FROM != TO. */
+ mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
+ = (from == to ? ~0 : 0);
+
+ mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
+ if (mctx->max_mb_elem_len < to - from)
+ mctx->max_mb_elem_len = to - from;
+ return REG_NOERROR;
+}
+
+/* Search for the first entry which has the same str_idx, or -1 if none is
+ found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */
+
+static int
+internal_function
+search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)
+{
+ int left, right, mid, last;
+ last = right = mctx->nbkref_ents;
+ for (left = 0; left < right;)
+ {
+ mid = (left + right) / 2;
+ if (mctx->bkref_ents[mid].str_idx < str_idx)
+ left = mid + 1;
+ else
+ right = mid;
+ }
+ if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
+ return left;
+ else
+ return -1;
+}
+
+/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
+ at STR_IDX. */
+
+static reg_errcode_t
+internal_function
+match_ctx_add_subtop (re_match_context_t *mctx, int node, int str_idx)
+{
+#ifdef DEBUG
+ assert (mctx->sub_tops != NULL);
+ assert (mctx->asub_tops > 0);
+#endif
+ if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
+ {
+ int new_asub_tops = mctx->asub_tops * 2;
+ re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
+ re_sub_match_top_t *,
+ new_asub_tops);
+ if (BE (new_array == NULL, 0))
+ return REG_ESPACE;
+ mctx->sub_tops = new_array;
+ mctx->asub_tops = new_asub_tops;
+ }
+ mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
+ if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
+ return REG_ESPACE;
+ mctx->sub_tops[mctx->nsub_tops]->node = node;
+ mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
+ return REG_NOERROR;
+}
+
+/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
+ at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */
+
+static re_sub_match_last_t *
+internal_function
+match_ctx_add_sublast (re_sub_match_top_t *subtop, int node, int str_idx)
+{
+ re_sub_match_last_t *new_entry;
+ if (BE (subtop->nlasts == subtop->alasts, 0))
+ {
+ int new_alasts = 2 * subtop->alasts + 1;
+ re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
+ re_sub_match_last_t *,
+ new_alasts);
+ if (BE (new_array == NULL, 0))
+ return NULL;
+ subtop->lasts = new_array;
+ subtop->alasts = new_alasts;
+ }
+ new_entry = calloc (1, sizeof (re_sub_match_last_t));
+ if (BE (new_entry != NULL, 1))
+ {
+ subtop->lasts[subtop->nlasts] = new_entry;
+ new_entry->node = node;
+ new_entry->str_idx = str_idx;
+ ++subtop->nlasts;
+ }
+ return new_entry;
+}
+
+static void
+internal_function
+sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
+ re_dfastate_t **limited_sts, int last_node, int last_str_idx)
+{
+ sctx->sifted_states = sifted_sts;
+ sctx->limited_states = limited_sts;
+ sctx->last_node = last_node;
+ sctx->last_str_idx = last_str_idx;
+ re_node_set_init_empty (&sctx->limits);
+}
diff --git a/html.c b/html.c
new file mode 100644
index 0000000..6801d37
--- /dev/null
+++ b/html.c
@@ -0,0 +1,49 @@
+/*
+* $Id: html.c 443 2006-05-30 04:37:13Z darren $
+*
+* Copyright (c) 2003, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* This module contains functions for generating tags for HTML language
+* files.
+*/
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+#include "parse.h"
+
+/*
+* FUNCTION DEFINITIONS
+*/
+
+static void installHtmlRegex (const langType language)
+{
+#define POSSIBLE_ATTRIBUTES "([ \t]+[a-z]+=\"?[^>\"]*\"?)*"
+ addTagRegex (language,
+ "<a"
+ POSSIBLE_ATTRIBUTES
+ "[ \t]+name=\"?([^>\"]+)\"?"
+ POSSIBLE_ATTRIBUTES
+ "[ \t]*>",
+ "\\2", "a,anchor,named anchors", "i");
+
+ addTagRegex (language, "^[ \t]*function[ \t]*([A-Za-z0-9_]+)[ \t]*\\(",
+ "\\1", "f,function,JavaScript functions", NULL);
+}
+
+/* Create parser definition stucture */
+extern parserDefinition* HtmlParser (void)
+{
+ static const char *const extensions [] = { "htm", "html", NULL };
+ parserDefinition *const def = parserNew ("HTML");
+ def->extensions = extensions;
+ def->initialize = installHtmlRegex;
+ def->regex = TRUE;
+ return def;
+}
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/jscript.c b/jscript.c
new file mode 100644
index 0000000..c4e5b1a
--- /dev/null
+++ b/jscript.c
@@ -0,0 +1,1572 @@
+/*
+ * $Id: jscript.c 666 2008-05-15 17:47:31Z dfishburn $
+ *
+ * Copyright (c) 2003, Darren Hiebert
+ *
+ * This source code is released for free distribution under the terms of the
+ * GNU General Public License.
+ *
+ * This module contains functions for generating tags for JavaScript language
+ * files.
+ *
+ * This is a good reference for different forms of the function statement:
+ * http://www.permadi.com/tutorial/jsFunc/
+ * Another good reference:
+ * http://developer.mozilla.org/en/docs/Core_JavaScript_1.5_Guide
+ */
+
+/*
+ * INCLUDE FILES
+ */
+#include "general.h" /* must always come first */
+#include <ctype.h> /* to define isalpha () */
+#include <setjmp.h>
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+#include "debug.h"
+#include "entry.h"
+#include "keyword.h"
+#include "parse.h"
+#include "read.h"
+#include "routines.h"
+#include "vstring.h"
+
+/*
+ * MACROS
+ */
+#define isType(token,t) (boolean) ((token)->type == (t))
+#define isKeyword(token,k) (boolean) ((token)->keyword == (k))
+
+/*
+ * DATA DECLARATIONS
+ */
+
+typedef enum eException { ExceptionNone, ExceptionEOF } exception_t;
+
+/*
+ * Tracks class and function names already created
+ */
+static stringList *ClassNames;
+static stringList *FunctionNames;
+
+/* Used to specify type of keyword.
+*/
+typedef enum eKeywordId {
+ KEYWORD_NONE = -1,
+ KEYWORD_function,
+ KEYWORD_capital_function,
+ KEYWORD_object,
+ KEYWORD_capital_object,
+ KEYWORD_prototype,
+ KEYWORD_var,
+ KEYWORD_new,
+ KEYWORD_this,
+ KEYWORD_for,
+ KEYWORD_while,
+ KEYWORD_do,
+ KEYWORD_if,
+ KEYWORD_else,
+ KEYWORD_switch,
+ KEYWORD_try,
+ KEYWORD_catch,
+ KEYWORD_finally
+} keywordId;
+
+/* Used to determine whether keyword is valid for the token language and
+ * what its ID is.
+ */
+typedef struct sKeywordDesc {
+ const char *name;
+ keywordId id;
+} keywordDesc;
+
+typedef enum eTokenType {
+ TOKEN_UNDEFINED,
+ TOKEN_CHARACTER,
+ TOKEN_CLOSE_PAREN,
+ TOKEN_SEMICOLON,
+ TOKEN_COLON,
+ TOKEN_COMMA,
+ TOKEN_KEYWORD,
+ TOKEN_OPEN_PAREN,
+ TOKEN_OPERATOR,
+ TOKEN_IDENTIFIER,
+ TOKEN_STRING,
+ TOKEN_PERIOD,
+ TOKEN_OPEN_CURLY,
+ TOKEN_CLOSE_CURLY,
+ TOKEN_EQUAL_SIGN,
+ TOKEN_FORWARD_SLASH,
+ TOKEN_OPEN_SQUARE,
+ TOKEN_CLOSE_SQUARE
+} tokenType;
+
+typedef struct sTokenInfo {
+ tokenType type;
+ keywordId keyword;
+ vString * string;
+ vString * scope;
+ unsigned long lineNumber;
+ fpos_t filePosition;
+ int nestLevel;
+ boolean ignoreTag;
+} tokenInfo;
+
+/*
+ * DATA DEFINITIONS
+ */
+
+static langType Lang_js;
+
+static jmp_buf Exception;
+
+typedef enum {
+ JSTAG_FUNCTION,
+ JSTAG_CLASS,
+ JSTAG_METHOD,
+ JSTAG_PROPERTY,
+ JSTAG_VARIABLE,
+ JSTAG_COUNT
+} jsKind;
+
+static kindOption JsKinds [] = {
+ { TRUE, 'f', "function", "functions" },
+ { TRUE, 'c', "class", "classes" },
+ { TRUE, 'm', "method", "methods" },
+ { TRUE, 'p', "property", "properties" },
+ { TRUE, 'v', "variable", "global variables" }
+};
+
+static const keywordDesc JsKeywordTable [] = {
+ /* keyword keyword ID */
+ { "function", KEYWORD_function },
+ { "Function", KEYWORD_capital_function },
+ { "object", KEYWORD_object },
+ { "Object", KEYWORD_capital_object },
+ { "prototype", KEYWORD_prototype },
+ { "var", KEYWORD_var },
+ { "new", KEYWORD_new },
+ { "this", KEYWORD_this },
+ { "for", KEYWORD_for },
+ { "while", KEYWORD_while },
+ { "do", KEYWORD_do },
+ { "if", KEYWORD_if },
+ { "else", KEYWORD_else },
+ { "switch", KEYWORD_switch },
+ { "try", KEYWORD_try },
+ { "catch", KEYWORD_catch },
+ { "finally", KEYWORD_finally }
+};
+
+/*
+ * FUNCTION DEFINITIONS
+ */
+
+/* Recursive functions */
+static void parseFunction (tokenInfo *const token);
+static boolean parseBlock (tokenInfo *const token, tokenInfo *const parent);
+static boolean parseLine (tokenInfo *const token, boolean is_inside_class);
+
+static boolean isIdentChar (const int c)
+{
+ return (boolean)
+ (isalpha (c) || isdigit (c) || c == '$' ||
+ c == '@' || c == '_' || c == '#');
+}
+
+static void buildJsKeywordHash (void)
+{
+ const size_t count = sizeof (JsKeywordTable) /
+ sizeof (JsKeywordTable [0]);
+ size_t i;
+ for (i = 0 ; i < count ; ++i)
+ {
+ const keywordDesc* const p = &JsKeywordTable [i];
+ addKeyword (p->name, Lang_js, (int) p->id);
+ }
+}
+
+static tokenInfo *newToken (void)
+{
+ tokenInfo *const token = xMalloc (1, tokenInfo);
+
+ token->type = TOKEN_UNDEFINED;
+ token->keyword = KEYWORD_NONE;
+ token->string = vStringNew ();
+ token->scope = vStringNew ();
+ token->nestLevel = 0;
+ token->ignoreTag = FALSE;
+ token->lineNumber = getSourceLineNumber ();
+ token->filePosition = getInputFilePosition ();
+
+ return token;
+}
+
+static void deleteToken (tokenInfo *const token)
+{
+ vStringDelete (token->string);
+ vStringDelete (token->scope);
+ eFree (token);
+}
+
+/*
+ * Tag generation functions
+ */
+
+static void makeConstTag (tokenInfo *const token, const jsKind kind)
+{
+ if (JsKinds [kind].enabled && ! token->ignoreTag )
+ {
+ const char *const name = vStringValue (token->string);
+ tagEntryInfo e;
+ initTagEntry (&e, name);
+
+ e.lineNumber = token->lineNumber;
+ e.filePosition = token->filePosition;
+ e.kindName = JsKinds [kind].name;
+ e.kind = JsKinds [kind].letter;
+
+ makeTagEntry (&e);
+ }
+}
+
+static void makeJsTag (tokenInfo *const token, const jsKind kind)
+{
+ vString * fulltag;
+
+ if (JsKinds [kind].enabled && ! token->ignoreTag )
+ {
+ /*
+ * If a scope has been added to the token, change the token
+ * string to include the scope when making the tag.
+ */
+ if ( vStringLength(token->scope) > 0 )
+ {
+ fulltag = vStringNew ();
+ vStringCopy(fulltag, token->scope);
+ vStringCatS (fulltag, ".");
+ vStringCatS (fulltag, vStringValue(token->string));
+ vStringTerminate(fulltag);
+ vStringCopy(token->string, fulltag);
+ vStringDelete (fulltag);
+ }
+ makeConstTag (token, kind);
+ }
+}
+
+static void makeClassTag (tokenInfo *const token)
+{
+ vString * fulltag;
+
+ if ( ! token->ignoreTag )
+ {
+ fulltag = vStringNew ();
+ if (vStringLength (token->scope) > 0)
+ {
+ vStringCopy(fulltag, token->scope);
+ vStringCatS (fulltag, ".");
+ vStringCatS (fulltag, vStringValue(token->string));
+ }
+ else
+ {
+ vStringCopy(fulltag, token->string);
+ }
+ vStringTerminate(fulltag);
+ if ( ! stringListHas(ClassNames, vStringValue (fulltag)) )
+ {
+ stringListAdd (ClassNames, vStringNewCopy (fulltag));
+ makeJsTag (token, JSTAG_CLASS);
+ }
+ vStringDelete (fulltag);
+ }
+}
+
+static void makeFunctionTag (tokenInfo *const token)
+{
+ vString * fulltag;
+
+ if ( ! token->ignoreTag )
+ {
+ fulltag = vStringNew ();
+ if (vStringLength (token->scope) > 0)
+ {
+ vStringCopy(fulltag, token->scope);
+ vStringCatS (fulltag, ".");
+ vStringCatS (fulltag, vStringValue(token->string));
+ }
+ else
+ {
+ vStringCopy(fulltag, token->string);
+ }
+ vStringTerminate(fulltag);
+ if ( ! stringListHas(FunctionNames, vStringValue (fulltag)) )
+ {
+ stringListAdd (FunctionNames, vStringNewCopy (fulltag));
+ makeJsTag (token, JSTAG_FUNCTION);
+ }
+ vStringDelete (fulltag);
+ }
+}
+
+/*
+ * Parsing functions
+ */
+
+static void parseString (vString *const string, const int delimiter)
+{
+ boolean end = FALSE;
+ while (! end)
+ {
+ int c = fileGetc ();
+ if (c == EOF)
+ end = TRUE;
+ else if (c == '\\')
+ {
+ c = fileGetc(); /* This maybe a ' or ". */
+ vStringPut(string, c);
+ }
+ else if (c == delimiter)
+ end = TRUE;
+ else
+ vStringPut (string, c);
+ }
+ vStringTerminate (string);
+}
+
+/* Read a C identifier beginning with "firstChar" and places it into
+ * "name".
+ */
+static void parseIdentifier (vString *const string, const int firstChar)
+{
+ int c = firstChar;
+ Assert (isIdentChar (c));
+ do
+ {
+ vStringPut (string, c);
+ c = fileGetc ();
+ } while (isIdentChar (c));
+ vStringTerminate (string);
+ if (!isspace (c))
+ fileUngetc (c); /* unget non-identifier character */
+}
+
+static void readToken (tokenInfo *const token)
+{
+ int c;
+
+ token->type = TOKEN_UNDEFINED;
+ token->keyword = KEYWORD_NONE;
+ vStringClear (token->string);
+
+getNextChar:
+ do
+ {
+ c = fileGetc ();
+ token->lineNumber = getSourceLineNumber ();
+ token->filePosition = getInputFilePosition ();
+ }
+ while (c == '\t' || c == ' ' || c == '\n');
+
+ switch (c)
+ {
+ case EOF: longjmp (Exception, (int)ExceptionEOF); break;
+ case '(': token->type = TOKEN_OPEN_PAREN; break;
+ case ')': token->type = TOKEN_CLOSE_PAREN; break;
+ case ';': token->type = TOKEN_SEMICOLON; break;
+ case ',': token->type = TOKEN_COMMA; break;
+ case '.': token->type = TOKEN_PERIOD; break;
+ case ':': token->type = TOKEN_COLON; break;
+ case '{': token->type = TOKEN_OPEN_CURLY; break;
+ case '}': token->type = TOKEN_CLOSE_CURLY; break;
+ case '=': token->type = TOKEN_EQUAL_SIGN; break;
+ case '[': token->type = TOKEN_OPEN_SQUARE; break;
+ case ']': token->type = TOKEN_CLOSE_SQUARE; break;
+
+ case '\'':
+ case '"':
+ token->type = TOKEN_STRING;
+ parseString (token->string, c);
+ token->lineNumber = getSourceLineNumber ();
+ token->filePosition = getInputFilePosition ();
+ break;
+
+ case '\\':
+ c = fileGetc ();
+ if (c != '\\' && c != '"' && !isspace (c))
+ fileUngetc (c);
+ token->type = TOKEN_CHARACTER;
+ token->lineNumber = getSourceLineNumber ();
+ token->filePosition = getInputFilePosition ();
+ break;
+
+ case '/':
+ {
+ int d = fileGetc ();
+ if ( (d != '*') && /* is this the start of a comment? */
+ (d != '/') ) /* is a one line comment? */
+ {
+ token->type = TOKEN_FORWARD_SLASH;
+ fileUngetc (d);
+ }
+ else
+ {
+ if (d == '*')
+ {
+ do
+ {
+ fileSkipToCharacter ('*');
+ c = fileGetc ();
+ if (c == '/')
+ break;
+ else
+ fileUngetc (c);
+ } while (c != EOF && c != '\0');
+ goto getNextChar;
+ }
+ else if (d == '/') /* is this the start of a comment? */
+ {
+ fileSkipToCharacter ('\n');
+ goto getNextChar;
+ }
+ }
+ break;
+ }
+
+ default:
+ if (! isIdentChar (c))
+ token->type = TOKEN_UNDEFINED;
+ else
+ {
+ parseIdentifier (token->string, c);
+ token->lineNumber = getSourceLineNumber ();
+ token->filePosition = getInputFilePosition ();
+ token->keyword = analyzeToken (token->string, Lang_js);
+ if (isKeyword (token, KEYWORD_NONE))
+ token->type = TOKEN_IDENTIFIER;
+ else
+ token->type = TOKEN_KEYWORD;
+ }
+ break;
+ }
+}
+
+static void copyToken (tokenInfo *const dest, tokenInfo *const src)
+{
+ dest->nestLevel = src->nestLevel;
+ dest->lineNumber = src->lineNumber;
+ dest->filePosition = src->filePosition;
+ dest->type = src->type;
+ dest->keyword = src->keyword;
+ vStringCopy(dest->string, src->string);
+ vStringCopy(dest->scope, src->scope);
+}
+
+/*
+ * Token parsing functions
+ */
+
+static void skipArgumentList (tokenInfo *const token)
+{
+ int nest_level = 0;
+
+ /*
+ * Other databases can have arguments with fully declared
+ * datatypes:
+ * ( name varchar(30), text binary(10) )
+ * So we must check for nested open and closing parantheses
+ */
+
+ if (isType (token, TOKEN_OPEN_PAREN)) /* arguments? */
+ {
+ nest_level++;
+ while (! (isType (token, TOKEN_CLOSE_PAREN) && (nest_level == 0)))
+ {
+ readToken (token);
+ if (isType (token, TOKEN_OPEN_PAREN))
+ {
+ nest_level++;
+ }
+ if (isType (token, TOKEN_CLOSE_PAREN))
+ {
+ if (nest_level > 0)
+ {
+ nest_level--;
+ }
+ }
+ }
+ readToken (token);
+ }
+}
+
+static void skipArrayList (tokenInfo *const token)
+{
+ int nest_level = 0;
+
+ /*
+ * Handle square brackets
+ * var name[1]
+ * So we must check for nested open and closing square brackets
+ */
+
+ if (isType (token, TOKEN_OPEN_SQUARE)) /* arguments? */
+ {
+ nest_level++;
+ while (! (isType (token, TOKEN_CLOSE_SQUARE) && (nest_level == 0)))
+ {
+ readToken (token);
+ if (isType (token, TOKEN_OPEN_SQUARE))
+ {
+ nest_level++;
+ }
+ if (isType (token, TOKEN_CLOSE_SQUARE))
+ {
+ if (nest_level > 0)
+ {
+ nest_level--;
+ }
+ }
+ }
+ readToken (token);
+ }
+}
+
+static void addContext (tokenInfo* const parent, const tokenInfo* const child)
+{
+ if (vStringLength (parent->string) > 0)
+ {
+ vStringCatS (parent->string, ".");
+ }
+ vStringCatS (parent->string, vStringValue(child->string));
+ vStringTerminate(parent->string);
+}
+
+static void addToScope (tokenInfo* const token, vString* const extra)
+{
+ if (vStringLength (token->scope) > 0)
+ {
+ vStringCatS (token->scope, ".");
+ }
+ vStringCatS (token->scope, vStringValue(extra));
+ vStringTerminate(token->scope);
+}
+
+/*
+ * Scanning functions
+ */
+
+static void findCmdTerm (tokenInfo *const token)
+{
+ /*
+ * Read until we find either a semicolon or closing brace.
+ * Any nested braces will be handled within.
+ */
+ while (! ( isType (token, TOKEN_SEMICOLON) ||
+ isType (token, TOKEN_CLOSE_CURLY) ) )
+ {
+ /* Handle nested blocks */
+ if ( isType (token, TOKEN_OPEN_CURLY))
+ {
+ parseBlock (token, token);
+ }
+ else if ( isType (token, TOKEN_OPEN_PAREN) )
+ {
+ skipArgumentList(token);
+ }
+ else
+ {
+ readToken (token);
+ }
+ }
+}
+
+static void parseSwitch (tokenInfo *const token)
+{
+ /*
+ * switch (expression){
+ * case value1:
+ * statement;
+ * break;
+ * case value2:
+ * statement;
+ * break;
+ * default : statement;
+ * }
+ */
+
+ readToken (token);
+
+ if (isType (token, TOKEN_OPEN_PAREN))
+ {
+ /*
+ * Handle nameless functions, these will only
+ * be considered methods.
+ */
+ skipArgumentList(token);
+ }
+
+ if (isType (token, TOKEN_OPEN_CURLY))
+ {
+ /*
+ * This will be either a function or a class.
+ * We can only determine this by checking the body
+ * of the function. If we find a "this." we know
+ * it is a class, otherwise it is a function.
+ */
+ parseBlock (token, token);
+ }
+
+}
+
+static void parseLoop (tokenInfo *const token)
+{
+ /*
+ * Handles these statements
+ * for (x=0; x<3; x++)
+ * document.write("This text is repeated three times<br>");
+ *
+ * for (x=0; x<3; x++)
+ * {
+ * document.write("This text is repeated three times<br>");
+ * }
+ *
+ * while (number<5){
+ * document.write(number+"<br>");
+ * number++;
+ * }
+ *
+ * do{
+ * document.write(number+"<br>");
+ * number++;
+ * }
+ * while (number<5);
+ */
+
+ if (isKeyword (token, KEYWORD_for) || isKeyword (token, KEYWORD_while))
+ {
+ readToken(token);
+
+ if (isType (token, TOKEN_OPEN_PAREN))
+ {
+ /*
+ * Handle nameless functions, these will only
+ * be considered methods.
+ */
+ skipArgumentList(token);
+ }
+
+ if (isType (token, TOKEN_OPEN_CURLY))
+ {
+ /*
+ * This will be either a function or a class.
+ * We can only determine this by checking the body
+ * of the function. If we find a "this." we know
+ * it is a class, otherwise it is a function.
+ */
+ parseBlock (token, token);
+ }
+ else
+ {
+ parseLine(token, FALSE);
+ }
+ }
+ else if (isKeyword (token, KEYWORD_do))
+ {
+ readToken(token);
+
+ if (isType (token, TOKEN_OPEN_CURLY))
+ {
+ /*
+ * This will be either a function or a class.
+ * We can only determine this by checking the body
+ * of the function. If we find a "this." we know
+ * it is a class, otherwise it is a function.
+ */
+ parseBlock (token, token);
+ }
+ else
+ {
+ parseLine(token, FALSE);
+ }
+
+ readToken(token);
+
+ if (isKeyword (token, KEYWORD_while))
+ {
+ readToken(token);
+
+ if (isType (token, TOKEN_OPEN_PAREN))
+ {
+ /*
+ * Handle nameless functions, these will only
+ * be considered methods.
+ */
+ skipArgumentList(token);
+ }
+ }
+ }
+}
+
+static boolean parseIf (tokenInfo *const token)
+{
+ boolean read_next_token = TRUE;
+ /*
+ * If statements have two forms
+ * if ( ... )
+ * one line;
+ *
+ * if ( ... )
+ * statement;
+ * else
+ * statement
+ *
+ * if ( ... ) {
+ * multiple;
+ * statements;
+ * }
+ *
+ *
+ * if ( ... ) {
+ * return elem
+ * }
+ *
+ * This example if correctly written, but the
+ * else contains only 1 statement without a terminator
+ * since the function finishes with the closing brace.
+ *
+ * function a(flag){
+ * if(flag)
+ * test(1);
+ * else
+ * test(2)
+ * }
+ *
+ * TODO: Deal with statements that can optional end
+ * without a semi-colon. Currently this messes up
+ * the parsing of blocks.
+ * Need to somehow detect this has happened, and either
+ * backup a token, or skip reading the next token if
+ * that is possible from all code locations.
+ *
+ */
+
+ readToken (token);
+
+ if (isKeyword (token, KEYWORD_if))
+ {
+ /*
+ * Check for an "else if" and consume the "if"
+ */
+ readToken (token);
+ }
+
+ if (isType (token, TOKEN_OPEN_PAREN))
+ {
+ /*
+ * Handle nameless functions, these will only
+ * be considered methods.
+ */
+ skipArgumentList(token);
+ }
+
+ if (isType (token, TOKEN_OPEN_CURLY))
+ {
+ /*
+ * This will be either a function or a class.
+ * We can only determine this by checking the body
+ * of the function. If we find a "this." we know
+ * it is a class, otherwise it is a function.
+ */
+ parseBlock (token, token);
+ }
+ else
+ {
+ findCmdTerm (token);
+
+ /*
+ * The IF could be followed by an ELSE statement.
+ * This too could have two formats, a curly braced
+ * multiline section, or another single line.
+ */
+
+ if (isType (token, TOKEN_CLOSE_CURLY))
+ {
+ /*
+ * This statement did not have a line terminator.
+ */
+ read_next_token = FALSE;
+ }
+ else
+ {
+ readToken (token);
+
+ if (isType (token, TOKEN_CLOSE_CURLY))
+ {
+ /*
+ * This statement did not have a line terminator.
+ */
+ read_next_token = FALSE;
+ }
+ else
+ {
+ if (isKeyword (token, KEYWORD_else))
+ read_next_token = parseIf (token);
+ }
+ }
+ }
+ return read_next_token;
+}
+
+static void parseFunction (tokenInfo *const token)
+{
+ tokenInfo *const name = newToken ();
+ boolean is_class = FALSE;
+
+ /*
+ * This deals with these formats
+ * function validFunctionTwo(a,b) {}
+ */
+
+ readToken (name);
+ /* Add scope in case this is an INNER function */
+ addToScope(name, token->scope);
+
+ readToken (token);
+ if (isType (token, TOKEN_PERIOD))
+ {
+ do
+ {
+ readToken (token);
+ if ( isKeyword(token, KEYWORD_NONE) )
+ {
+ addContext (name, token);
+ readToken (token);
+ }
+ } while (isType (token, TOKEN_PERIOD));
+ }
+
+ if ( isType (token, TOKEN_OPEN_PAREN) )
+ skipArgumentList(token);
+
+ if ( isType (token, TOKEN_OPEN_CURLY) )
+ {
+ is_class = parseBlock (token, name);
+ if ( is_class )
+ makeClassTag (name);
+ else
+ makeFunctionTag (name);
+ }
+
+ findCmdTerm (token);
+
+ deleteToken (name);
+}
+
+static boolean parseBlock (tokenInfo *const token, tokenInfo *const parent)
+{
+ boolean is_class = FALSE;
+ boolean read_next_token = TRUE;
+ vString * saveScope = vStringNew ();
+
+ token->nestLevel++;
+ /*
+ * Make this routine a bit more forgiving.
+ * If called on an open_curly advance it
+ */
+ if ( isType (token, TOKEN_OPEN_CURLY) &&
+ isKeyword(token, KEYWORD_NONE) )
+ readToken(token);
+
+ if (! isType (token, TOKEN_CLOSE_CURLY))
+ {
+ /*
+ * Read until we find the closing brace,
+ * any nested braces will be handled within
+ */
+ do
+ {
+ read_next_token = TRUE;
+ if (isKeyword (token, KEYWORD_this))
+ {
+ /*
+ * Means we are inside a class and have found
+ * a class, not a function
+ */
+ is_class = TRUE;
+ vStringCopy(saveScope, token->scope);
+ addToScope (token, parent->string);
+
+ /*
+ * Ignore the remainder of the line
+ * findCmdTerm(token);
+ */
+ parseLine (token, is_class);
+
+ vStringCopy(token->scope, saveScope);
+ }
+ else if (isKeyword (token, KEYWORD_var))
+ {
+ /*
+ * Potentially we have found an inner function.
+ * Set something to indicate the scope
+ */
+ vStringCopy(saveScope, token->scope);
+ addToScope (token, parent->string);
+ parseLine (token, is_class);
+ vStringCopy(token->scope, saveScope);
+ }
+ else if (isKeyword (token, KEYWORD_function))
+ {
+ vStringCopy(saveScope, token->scope);
+ addToScope (token, parent->string);
+ parseFunction (token);
+ vStringCopy(token->scope, saveScope);
+ }
+ else if (isType (token, TOKEN_OPEN_CURLY))
+ {
+ /* Handle nested blocks */
+ parseBlock (token, parent);
+ }
+ else
+ {
+ /*
+ * It is possible for a line to have no terminator
+ * if the following line is a closing brace.
+ * parseLine will detect this case and indicate
+ * whether we should read an additional token.
+ */
+ read_next_token = parseLine (token, is_class);
+ }
+
+ /*
+ * Always read a new token unless we find a statement without
+ * a ending terminator
+ */
+ if( read_next_token )
+ readToken(token);
+
+ /*
+ * If we find a statement without a terminator consider the
+ * block finished, otherwise the stack will be off by one.
+ */
+ } while (! isType (token, TOKEN_CLOSE_CURLY) && read_next_token );
+ }
+
+ vStringDelete(saveScope);
+ token->nestLevel--;
+
+ return is_class;
+}
+
+static void parseMethods (tokenInfo *const token, tokenInfo *const class)
+{
+ tokenInfo *const name = newToken ();
+
+ /*
+ * This deals with these formats
+ * validProperty : 2,
+ * validMethod : function(a,b) {}
+ * 'validMethod2' : function(a,b) {}
+ * container.dirtyTab = {'url': false, 'title':false, 'snapshot':false, '*': false}
+ */
+
+ do
+ {
+ readToken (token);
+ if (isType (token, TOKEN_STRING) || isKeyword(token, KEYWORD_NONE))
+ {
+ copyToken(name, token);
+
+ readToken (token);
+ if ( isType (token, TOKEN_COLON) )
+ {
+ readToken (token);
+ if ( isKeyword (token, KEYWORD_function) )
+ {
+ readToken (token);
+ if ( isType (token, TOKEN_OPEN_PAREN) )
+ {
+ skipArgumentList(token);
+ }
+
+ if (isType (token, TOKEN_OPEN_CURLY))
+ {
+ addToScope (name, class->string);
+ makeJsTag (name, JSTAG_METHOD);
+ parseBlock (token, name);
+
+ /*
+ * Read to the closing curly, check next
+ * token, if a comma, we must loop again
+ */
+ readToken (token);
+ }
+ }
+ else
+ {
+ addToScope (name, class->string);
+ makeJsTag (name, JSTAG_PROPERTY);
+
+ /*
+ * Read the next token, if a comma
+ * we must loop again
+ */
+ readToken (token);
+ }
+ }
+ }
+ } while ( isType(token, TOKEN_COMMA) );
+
+ findCmdTerm (token);
+
+ deleteToken (name);
+}
+
+static boolean parseStatement (tokenInfo *const token, boolean is_inside_class)
+{
+ tokenInfo *const name = newToken ();
+ tokenInfo *const secondary_name = newToken ();
+ vString * saveScope = vStringNew ();
+ boolean is_class = FALSE;
+ boolean is_terminated = TRUE;
+ boolean is_global = FALSE;
+ boolean is_prototype = FALSE;
+ vString * fulltag;
+
+ vStringClear(saveScope);
+ /*
+ * Functions can be named or unnamed.
+ * This deals with these formats:
+ * Function
+ * validFunctionOne = function(a,b) {}
+ * testlib.validFunctionFive = function(a,b) {}
+ * var innerThree = function(a,b) {}
+ * var innerFour = (a,b) {}
+ * var D2 = secondary_fcn_name(a,b) {}
+ * var D3 = new Function("a", "b", "return a+b;");
+ * Class
+ * testlib.extras.ValidClassOne = function(a,b) {
+ * this.a = a;
+ * }
+ * Class Methods
+ * testlib.extras.ValidClassOne.prototype = {
+ * 'validMethodOne' : function(a,b) {},
+ * 'validMethodTwo' : function(a,b) {}
+ * }
+ * ValidClassTwo = function ()
+ * {
+ * this.validMethodThree = function() {}
+ * // unnamed method
+ * this.validMethodFour = () {}
+ * }
+ * Database.prototype.validMethodThree = Database_getTodaysDate;
+ */
+
+ if ( is_inside_class )
+ is_class = TRUE;
+ /*
+ * var can preceed an inner function
+ */
+ if ( isKeyword(token, KEYWORD_var) )
+ {
+ /*
+ * Only create variables for global scope
+ */
+ if ( token->nestLevel == 0 )
+ {
+ is_global = TRUE;
+ }
+ readToken(token);
+ }
+
+ if ( isKeyword(token, KEYWORD_this) )
+ {
+ readToken(token);
+ if (isType (token, TOKEN_PERIOD))
+ {
+ readToken(token);
+ }
+ }
+
+ copyToken(name, token);
+
+ while (! isType (token, TOKEN_CLOSE_CURLY) &&
+ ! isType (token, TOKEN_SEMICOLON) &&
+ ! isType (token, TOKEN_EQUAL_SIGN) )
+ {
+ /* Potentially the name of the function */
+ readToken (token);
+ if (isType (token, TOKEN_PERIOD))
+ {
+ /*
+ * Cannot be a global variable is it has dot references in the name
+ */
+ is_global = FALSE;
+ do
+ {
+ readToken (token);
+ if ( isKeyword(token, KEYWORD_NONE) )
+ {
+ if ( is_class )
+ {
+ vStringCopy(saveScope, token->scope);
+ addToScope(token, name->string);
+ }
+ else
+ addContext (name, token);
+ }
+ else if ( isKeyword(token, KEYWORD_prototype) )
+ {
+ /*
+ * When we reach the "prototype" tag, we infer:
+ * "BindAgent" is a class
+ * "build" is a method
+ *
+ * function BindAgent( repeatableIdName, newParentIdName ) {
+ * }
+ *
+ * CASE 1
+ * Specified function name: "build"
+ * BindAgent.prototype.build = function( mode ) {
+ * ignore everything within this function
+ * }
+ *
+ * CASE 2
+ * Prototype listing
+ * ValidClassOne.prototype = {
+ * 'validMethodOne' : function(a,b) {},
+ * 'validMethodTwo' : function(a,b) {}
+ * }
+ *
+ */
+ makeClassTag (name);
+ is_class = TRUE;
+ is_prototype = TRUE;
+
+ /*
+ * There should a ".function_name" next.
+ */
+ readToken (token);
+ if (isType (token, TOKEN_PERIOD))
+ {
+ /*
+ * Handle CASE 1
+ */
+ readToken (token);
+ if ( isKeyword(token, KEYWORD_NONE) )
+ {
+ vStringCopy(saveScope, token->scope);
+ addToScope(token, name->string);
+
+ makeJsTag (token, JSTAG_METHOD);
+ /*
+ * We can read until the end of the block / statement.
+ * We need to correctly parse any nested blocks, but
+ * we do NOT want to create any tags based on what is
+ * within the blocks.
+ */
+ token->ignoreTag = TRUE;
+ /*
+ * Find to the end of the statement
+ */
+ findCmdTerm (token);
+ token->ignoreTag = FALSE;
+ is_terminated = TRUE;
+ goto cleanUp;
+ }
+ }
+ else if (isType (token, TOKEN_EQUAL_SIGN))
+ {
+ readToken (token);
+ if (isType (token, TOKEN_OPEN_CURLY))
+ {
+ /*
+ * Handle CASE 2
+ *
+ * Creates tags for each of these class methods
+ * ValidClassOne.prototype = {
+ * 'validMethodOne' : function(a,b) {},
+ * 'validMethodTwo' : function(a,b) {}
+ * }
+ */
+ parseMethods(token, name);
+ /*
+ * Find to the end of the statement
+ */
+ findCmdTerm (token);
+ token->ignoreTag = FALSE;
+ is_terminated = TRUE;
+ goto cleanUp;
+ }
+ }
+ }
+ readToken (token);
+ } while (isType (token, TOKEN_PERIOD));
+ }
+
+ if ( isType (token, TOKEN_OPEN_PAREN) )
+ skipArgumentList(token);
+
+ if ( isType (token, TOKEN_OPEN_SQUARE) )
+ skipArrayList(token);
+
+ /*
+ if ( isType (token, TOKEN_OPEN_CURLY) )
+ {
+ is_class = parseBlock (token, name);
+ }
+ */
+ }
+
+ if ( isType (token, TOKEN_CLOSE_CURLY) )
+ {
+ /*
+ * Reaching this section without having
+ * processed an open curly brace indicates
+ * the statement is most likely not terminated.
+ */
+ is_terminated = FALSE;
+ goto cleanUp;
+ }
+
+ if ( isType (token, TOKEN_SEMICOLON) )
+ {
+ /*
+ * Only create variables for global scope
+ */
+ if ( token->nestLevel == 0 && is_global )
+ {
+ /*
+ * Handles this syntax:
+ * var g_var2;
+ */
+ if (isType (token, TOKEN_SEMICOLON))
+ makeJsTag (name, JSTAG_VARIABLE);
+ }
+ /*
+ * Statement has ended.
+ * This deals with calls to functions, like:
+ * alert(..);
+ */
+ goto cleanUp;
+ }
+
+ if ( isType (token, TOKEN_EQUAL_SIGN) )
+ {
+ readToken (token);
+
+ if ( isKeyword (token, KEYWORD_function) )
+ {
+ readToken (token);
+
+ if ( isKeyword (token, KEYWORD_NONE) &&
+ ! isType (token, TOKEN_OPEN_PAREN) )
+ {
+ /*
+ * Functions of this format:
+ * var D2A = function theAdd(a, b)
+ * {
+ * return a+b;
+ * }
+ * Are really two separate defined functions and
+ * can be referenced in two ways:
+ * alert( D2A(1,2) ); // produces 3
+ * alert( theAdd(1,2) ); // also produces 3
+ * So it must have two tags:
+ * D2A
+ * theAdd
+ * Save the reference to the name for later use, once
+ * we have established this is a valid function we will
+ * create the secondary reference to it.
+ */
+ copyToken(secondary_name, token);
+ readToken (token);
+ }
+
+ if ( isType (token, TOKEN_OPEN_PAREN) )
+ skipArgumentList(token);
+
+ if (isType (token, TOKEN_OPEN_CURLY))
+ {
+ /*
+ * This will be either a function or a class.
+ * We can only determine this by checking the body
+ * of the function. If we find a "this." we know
+ * it is a class, otherwise it is a function.
+ */
+ if ( is_inside_class )
+ {
+ makeJsTag (name, JSTAG_METHOD);
+ if ( vStringLength(secondary_name->string) > 0 )
+ makeFunctionTag (secondary_name);
+ parseBlock (token, name);
+ }
+ else
+ {
+ is_class = parseBlock (token, name);
+ if ( is_class )
+ makeClassTag (name);
+ else
+ makeFunctionTag (name);
+
+ if ( vStringLength(secondary_name->string) > 0 )
+ makeFunctionTag (secondary_name);
+
+ /*
+ * Find to the end of the statement
+ */
+ goto cleanUp;
+ }
+ }
+ }
+ else if (isType (token, TOKEN_OPEN_PAREN))
+ {
+ /*
+ * Handle nameless functions
+ * this.method_name = () {}
+ */
+ skipArgumentList(token);
+
+ if (isType (token, TOKEN_OPEN_CURLY))
+ {
+ /*
+ * Nameless functions are only setup as methods.
+ */
+ makeJsTag (name, JSTAG_METHOD);
+ parseBlock (token, name);
+ }
+ }
+ else if (isType (token, TOKEN_OPEN_CURLY))
+ {
+ /*
+ * Creates tags for each of these class methods
+ * ValidClassOne.prototype = {
+ * 'validMethodOne' : function(a,b) {},
+ * 'validMethodTwo' : function(a,b) {}
+ * }
+ */
+ parseMethods(token, name);
+ if (isType (token, TOKEN_CLOSE_CURLY))
+ {
+ /*
+ * Assume the closing parantheses terminates
+ * this statements.
+ */
+ is_terminated = TRUE;
+ }
+ }
+ else if (isKeyword (token, KEYWORD_new))
+ {
+ readToken (token);
+ if ( isKeyword (token, KEYWORD_function) ||
+ isKeyword (token, KEYWORD_capital_function) ||
+ isKeyword (token, KEYWORD_object) ||
+ isKeyword (token, KEYWORD_capital_object) )
+ {
+ if ( isKeyword (token, KEYWORD_object) ||
+ isKeyword (token, KEYWORD_capital_object) )
+ is_class = TRUE;
+
+ readToken (token);
+ if ( isType (token, TOKEN_OPEN_PAREN) )
+ skipArgumentList(token);
+
+ if (isType (token, TOKEN_SEMICOLON))
+ {
+ if ( token->nestLevel == 0 )
+ {
+ if ( is_class )
+ {
+ makeClassTag (name);
+ } else {
+ makeFunctionTag (name);
+ }
+ }
+ }
+ }
+ }
+ else if (isKeyword (token, KEYWORD_NONE))
+ {
+ /*
+ * Only create variables for global scope
+ */
+ if ( token->nestLevel == 0 && is_global )
+ {
+ /*
+ * A pointer can be created to the function.
+ * If we recognize the function/class name ignore the variable.
+ * This format looks identical to a variable definition.
+ * A variable defined outside of a block is considered
+ * a global variable:
+ * var g_var1 = 1;
+ * var g_var2;
+ * This is not a global variable:
+ * var g_var = function;
+ * This is a global variable:
+ * var g_var = different_var_name;
+ */
+ fulltag = vStringNew ();
+ if (vStringLength (token->scope) > 0)
+ {
+ vStringCopy(fulltag, token->scope);
+ vStringCatS (fulltag, ".");
+ vStringCatS (fulltag, vStringValue(token->string));
+ }
+ else
+ {
+ vStringCopy(fulltag, token->string);
+ }
+ vStringTerminate(fulltag);
+ if ( ! stringListHas(FunctionNames, vStringValue (fulltag)) &&
+ ! stringListHas(ClassNames, vStringValue (fulltag)) )
+ {
+ findCmdTerm (token);
+ if (isType (token, TOKEN_SEMICOLON))
+ makeJsTag (name, JSTAG_VARIABLE);
+ }
+ vStringDelete (fulltag);
+ }
+ }
+ }
+ findCmdTerm (token);
+
+ /*
+ * Statements can be optionally terminated in the case of
+ * statement prior to a close curly brace as in the
+ * document.write line below:
+ *
+ * function checkForUpdate() {
+ * if( 1==1 ) {
+ * document.write("hello from checkForUpdate<br>")
+ * }
+ * return 1;
+ * }
+ */
+ if ( ! is_terminated && isType (token, TOKEN_CLOSE_CURLY))
+ is_terminated = FALSE;
+
+
+cleanUp:
+ vStringCopy(token->scope, saveScope);
+ deleteToken (name);
+ deleteToken (secondary_name);
+ vStringDelete(saveScope);
+
+ return is_terminated;
+}
+
+static boolean parseLine (tokenInfo *const token, boolean is_inside_class)
+{
+ boolean is_terminated = TRUE;
+ /*
+ * Detect the common statements, if, while, for, do, ...
+ * This is necessary since the last statement within a block "{}"
+ * can be optionally terminated.
+ *
+ * If the statement is not terminated, we need to tell
+ * the calling routine to prevent reading an additional token
+ * looking for the end of the statement.
+ */
+
+ if (isType(token, TOKEN_KEYWORD))
+ {
+ switch (token->keyword)
+ {
+ case KEYWORD_for:
+ case KEYWORD_while:
+ case KEYWORD_do:
+ parseLoop (token);
+ break;
+ case KEYWORD_if:
+ case KEYWORD_else:
+ case KEYWORD_try:
+ case KEYWORD_catch:
+ case KEYWORD_finally:
+ /* Common semantics */
+ is_terminated = parseIf (token);
+ break;
+ case KEYWORD_switch:
+ parseSwitch (token);
+ break;
+ default:
+ parseStatement (token, is_inside_class);
+ break;
+ }
+ }
+ else
+ {
+ /*
+ * Special case where single line statements may not be
+ * SEMICOLON terminated. parseBlock needs to know this
+ * so that it does not read the next token.
+ */
+ is_terminated = parseStatement (token, is_inside_class);
+ }
+ return is_terminated;
+}
+
+static void parseJsFile (tokenInfo *const token)
+{
+ do
+ {
+ readToken (token);
+
+ if (isType(token, TOKEN_KEYWORD))
+ {
+ switch (token->keyword)
+ {
+ case KEYWORD_function: parseFunction (token); break;
+ default: parseLine (token, FALSE); break;
+ }
+ }
+ else
+ {
+ parseLine (token, FALSE);
+ }
+ } while (TRUE);
+}
+
+static void initialize (const langType language)
+{
+ Assert (sizeof (JsKinds) / sizeof (JsKinds [0]) == JSTAG_COUNT);
+ Lang_js = language;
+ buildJsKeywordHash ();
+}
+
+static void findJsTags (void)
+{
+ tokenInfo *const token = newToken ();
+ exception_t exception;
+
+ ClassNames = stringListNew ();
+ FunctionNames = stringListNew ();
+
+ exception = (exception_t) (setjmp (Exception));
+ while (exception == ExceptionNone)
+ parseJsFile (token);
+
+ stringListDelete (ClassNames);
+ stringListDelete (FunctionNames);
+ ClassNames = NULL;
+ FunctionNames = NULL;
+ deleteToken (token);
+}
+
+/* Create parser definition stucture */
+extern parserDefinition* JavaScriptParser (void)
+{
+ static const char *const extensions [] = { "js", NULL };
+ parserDefinition *const def = parserNew ("JavaScript");
+ def->extensions = extensions;
+ /*
+ * New definitions for parsing instead of regex
+ */
+ def->kinds = JsKinds;
+ def->kindCount = KIND_COUNT (JsKinds);
+ def->parser = findJsTags;
+ def->initialize = initialize;
+
+ return def;
+}
+/* vi:set tabstop=4 shiftwidth=4 noexpandtab: */
diff --git a/keyword.c b/keyword.c
new file mode 100644
index 0000000..2a549d9
--- /dev/null
+++ b/keyword.c
@@ -0,0 +1,258 @@
+/*
+* $Id: keyword.c 715 2009-07-06 03:31:00Z dhiebert $
+*
+* Copyright (c) 1998-2002, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* Manages a keyword hash.
+*/
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include <string.h>
+
+#include "debug.h"
+#include "keyword.h"
+#include "options.h"
+#include "routines.h"
+
+/*
+* MACROS
+*/
+#define HASH_EXPONENT 7 /* must be less than 17 */
+
+/*
+* DATA DECLARATIONS
+*/
+typedef struct sHashEntry {
+ struct sHashEntry *next;
+ const char *string;
+ langType language;
+ int value;
+} hashEntry;
+
+/*
+* DATA DEFINITIONS
+*/
+static const unsigned int TableSize = 1 << HASH_EXPONENT;
+static hashEntry **HashTable = NULL;
+
+/*
+* FUNCTION DEFINITIONS
+*/
+
+static hashEntry **getHashTable (void)
+{
+ static boolean allocated = FALSE;
+
+ if (! allocated)
+ {
+ unsigned int i;
+
+ HashTable = xMalloc (TableSize, hashEntry*);
+
+ for (i = 0 ; i < TableSize ; ++i)
+ HashTable [i] = NULL;
+
+ allocated = TRUE;
+ }
+ return HashTable;
+}
+
+static hashEntry *getHashTableEntry (unsigned long hashedValue)
+{
+ hashEntry **const table = getHashTable ();
+ hashEntry *entry;
+
+ Assert (hashedValue < TableSize);
+ entry = table [hashedValue];
+
+ return entry;
+}
+
+static unsigned long hashValue (const char *const string)
+{
+ unsigned long value = 0;
+ const unsigned char *p;
+
+ Assert (string != NULL);
+
+ /* We combine the various words of the multiword key using the method
+ * described on page 512 of Vol. 3 of "The Art of Computer Programming".
+ */
+ for (p = (const unsigned char *) string ; *p != '\0' ; ++p)
+ {
+ value <<= 1;
+ if (value & 0x00000100L)
+ value = (value & 0x000000ffL) + 1L;
+ value ^= *p;
+ }
+ /* Algorithm from page 509 of Vol. 3 of "The Art of Computer Programming"
+ * Treats "value" as a 16-bit integer plus 16-bit fraction.
+ */
+ value *= 40503L; /* = 2^16 * 0.6180339887 ("golden ratio") */
+ value &= 0x0000ffffL; /* keep fractional part */
+ value >>= 16 - HASH_EXPONENT; /* scale up by hash size and move down */
+
+ return value;
+}
+
+static hashEntry *newEntry (
+ const char *const string, langType language, int value)
+{
+ hashEntry *const entry = xMalloc (1, hashEntry);
+
+ entry->next = NULL;
+ entry->string = string;
+ entry->language = language;
+ entry->value = value;
+
+ return entry;
+}
+
+/* Note that it is assumed that a "value" of zero means an undefined keyword
+ * and clients of this function should observe this. Also, all keywords added
+ * should be added in lower case. If we encounter a case-sensitive language
+ * whose keywords are in upper case, we will need to redesign this.
+ */
+extern void addKeyword (const char *const string, langType language, int value)
+{
+ const unsigned long hashedValue = hashValue (string);
+ hashEntry *entry = getHashTableEntry (hashedValue);
+
+ if (entry == NULL)
+ {
+ hashEntry **const table = getHashTable ();
+ table [hashedValue] = newEntry (string, language, value);
+ }
+ else
+ {
+ hashEntry *prev = NULL;
+
+ while (entry != NULL)
+ {
+ if (language == entry->language &&
+ strcmp (string, entry->string) == 0)
+ {
+ Assert (("Already in table" == NULL));
+ }
+ prev = entry;
+ entry = entry->next;
+ }
+ if (entry == NULL)
+ {
+ Assert (prev != NULL);
+ prev->next = newEntry (string, language, value);
+ }
+ }
+}
+
+extern int lookupKeyword (const char *const string, langType language)
+{
+ const unsigned long hashedValue = hashValue (string);
+ hashEntry *entry = getHashTableEntry (hashedValue);
+ int result = -1;
+
+ while (entry != NULL)
+ {
+ if (language == entry->language && strcmp (string, entry->string) == 0)
+ {
+ result = entry->value;
+ break;
+ }
+ entry = entry->next;
+ }
+ return result;
+}
+
+extern void freeKeywordTable (void)
+{
+ if (HashTable != NULL)
+ {
+ unsigned int i;
+
+ for (i = 0 ; i < TableSize ; ++i)
+ {
+ hashEntry *entry = HashTable [i];
+
+ while (entry != NULL)
+ {
+ hashEntry *next = entry->next;
+ eFree (entry);
+ entry = next;
+ }
+ }
+ eFree (HashTable);
+ }
+}
+
+extern int analyzeToken (vString *const name, langType language)
+{
+ vString *keyword = vStringNew ();
+ int result;
+ vStringCopyToLower (keyword, name);
+ result = lookupKeyword (vStringValue (keyword), language);
+ vStringDelete (keyword);
+ return result;
+}
+
+#ifdef DEBUG
+
+static void printEntry (const hashEntry *const entry)
+{
+ printf (" %-15s %-7s\n", entry->string, getLanguageName (entry->language));
+}
+
+static unsigned int printBucket (const unsigned int i)
+{
+ hashEntry **const table = getHashTable ();
+ hashEntry *entry = table [i];
+ unsigned int measure = 1;
+ boolean first = TRUE;
+
+ printf ("%2d:", i);
+ if (entry == NULL)
+ printf ("\n");
+ else while (entry != NULL)
+ {
+ if (! first)
+ printf (" ");
+ else
+ {
+ printf (" ");
+ first = FALSE;
+ }
+ printEntry (entry);
+ entry = entry->next;
+ measure = 2 * measure;
+ }
+ return measure - 1;
+}
+
+extern void printKeywordTable (void)
+{
+ unsigned long emptyBucketCount = 0;
+ unsigned long measure = 0;
+ unsigned int i;
+
+ for (i = 0 ; i < TableSize ; ++i)
+ {
+ const unsigned int pass = printBucket (i);
+
+ measure += pass;
+ if (pass == 0)
+ ++emptyBucketCount;
+ }
+
+ printf ("spread measure = %ld\n", measure);
+ printf ("%ld empty buckets\n", emptyBucketCount);
+}
+
+#endif
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/keyword.h b/keyword.h
new file mode 100644
index 0000000..e10bbfd
--- /dev/null
+++ b/keyword.h
@@ -0,0 +1,34 @@
+/*
+* $Id: keyword.h 658 2008-04-20 23:21:35Z elliotth $
+*
+* Copyright (c) 1998-2002, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* External interface to keyword.c
+*/
+#ifndef _KEYWORD_H
+#define _KEYWORD_H
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include "parse.h"
+
+/*
+* FUNCTION PROTOTYPES
+*/
+extern void addKeyword (const char *const string, langType language, int value);
+extern int lookupKeyword (const char *const string, langType language);
+extern void freeKeywordTable (void);
+#ifdef DEBUG
+extern void printKeywordTable (void);
+#endif
+extern int analyzeToken (vString *const name, langType language);
+
+#endif /* _KEYWORD_H */
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/lisp.c b/lisp.c
new file mode 100644
index 0000000..6fdc4dd
--- /dev/null
+++ b/lisp.c
@@ -0,0 +1,139 @@
+/*
+* $Id: lisp.c 717 2009-07-07 03:40:50Z dhiebert $
+*
+* Copyright (c) 2000-2002, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* This module contains functions for generating tags for LISP files.
+*/
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include "parse.h"
+#include "read.h"
+#include "vstring.h"
+
+/*
+* DATA DEFINITIONS
+*/
+typedef enum {
+ K_FUNCTION
+} lispKind;
+
+static kindOption LispKinds [] = {
+ { TRUE, 'f', "function", "functions" }
+};
+
+/*
+* FUNCTION DEFINITIONS
+*/
+
+/*
+ * lisp tag functions
+ * look for (def or (DEF, quote or QUOTE
+ */
+static int L_isdef (const unsigned char *strp)
+{
+ return ( (strp [1] == 'd' || strp [1] == 'D')
+ && (strp [2] == 'e' || strp [2] == 'E')
+ && (strp [3] == 'f' || strp [3] == 'F'));
+}
+
+static int L_isquote (const unsigned char *strp)
+{
+ return ( (*(++strp) == 'q' || *strp == 'Q')
+ && (*(++strp) == 'u' || *strp == 'U')
+ && (*(++strp) == 'o' || *strp == 'O')
+ && (*(++strp) == 't' || *strp == 'T')
+ && (*(++strp) == 'e' || *strp == 'E')
+ && isspace (*(++strp)));
+}
+
+static void L_getit (vString *const name, const unsigned char *dbp)
+{
+ const unsigned char *p;
+
+ if (*dbp == '\'') /* Skip prefix quote */
+ dbp++;
+ else if (*dbp == '(' && L_isquote (dbp)) /* Skip "(quote " */
+ {
+ dbp += 7;
+ while (isspace (*dbp))
+ dbp++;
+ }
+ for (p=dbp ; *p!='\0' && *p!='(' && !isspace ((int) *p) && *p!=')' ; p++)
+ vStringPut (name, *p);
+ vStringTerminate (name);
+
+ if (vStringLength (name) > 0)
+ makeSimpleTag (name, LispKinds, K_FUNCTION);
+ vStringClear (name);
+}
+
+/* Algorithm adapted from from GNU etags.
+ */
+static void findLispTags (void)
+{
+ vString *name = vStringNew ();
+ const unsigned char* p;
+
+
+ while ((p = fileReadLine ()) != NULL)
+ {
+ if (*p == '(')
+ {
+ if (L_isdef (p))
+ {
+ while (*p != '\0' && !isspace ((int) *p))
+ p++;
+ while (isspace ((int) *p))
+ p++;
+ L_getit (name, p);
+ }
+ else
+ {
+ /* Check for (foo::defmumble name-defined ... */
+ do
+ p++;
+ while (*p != '\0' && !isspace ((int) *p)
+ && *p != ':' && *p != '(' && *p != ')');
+ if (*p == ':')
+ {
+ do
+ p++;
+ while (*p == ':');
+
+ if (L_isdef (p - 1))
+ {
+ while (*p != '\0' && !isspace ((int) *p))
+ p++;
+ while (isspace (*p))
+ p++;
+ L_getit (name, p);
+ }
+ }
+ }
+ }
+ }
+ vStringDelete (name);
+}
+
+extern parserDefinition* LispParser (void)
+{
+ static const char *const extensions [] = {
+ "cl", "clisp", "el", "l", "lisp", "lsp", NULL
+ };
+ parserDefinition* def = parserNew ("Lisp");
+ def->kinds = LispKinds;
+ def->kindCount = KIND_COUNT (LispKinds);
+ def->extensions = extensions;
+ def->parser = findLispTags;
+ return def;
+}
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/lregex.c b/lregex.c
new file mode 100644
index 0000000..59f5df6
--- /dev/null
+++ b/lregex.c
@@ -0,0 +1,704 @@
+/*
+* $Id: lregex.c 576 2007-06-30 04:16:23Z elliotth $
+*
+* Copyright (c) 2000-2003, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* This module contains functions for applying regular expression matching.
+*
+* The code for utlizing the Gnu regex package with regards to processing the
+* regex option and checking for regex matches was adapted from routines in
+* Gnu etags.
+*/
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include <string.h>
+
+#ifdef HAVE_REGCOMP
+# include <ctype.h>
+# include <stddef.h>
+# ifdef HAVE_SYS_TYPES_H
+# include <sys/types.h> /* declare off_t (not known to regex.h on FreeBSD) */
+# endif
+# include <regex.h>
+#endif
+
+#include "debug.h"
+#include "entry.h"
+#include "parse.h"
+#include "read.h"
+#include "routines.h"
+
+#ifdef HAVE_REGEX
+
+/*
+* MACROS
+*/
+
+/* Back-references \0 through \9 */
+#define BACK_REFERENCE_COUNT 10
+
+#if defined (HAVE_REGCOMP) && !defined (REGCOMP_BROKEN)
+# define POSIX_REGEX
+#endif
+
+#define REGEX_NAME "Regex"
+
+/*
+* DATA DECLARATIONS
+*/
+#if defined (POSIX_REGEX)
+
+struct sKind {
+ boolean enabled;
+ char letter;
+ char* name;
+ char* description;
+};
+
+enum pType { PTRN_TAG, PTRN_CALLBACK };
+
+typedef struct {
+ regex_t *pattern;
+ enum pType type;
+ union {
+ struct {
+ char *name_pattern;
+ struct sKind kind;
+ } tag;
+ struct {
+ regexCallback function;
+ } callback;
+ } u;
+} regexPattern;
+
+#endif
+
+typedef struct {
+ regexPattern *patterns;
+ unsigned int count;
+} patternSet;
+
+/*
+* DATA DEFINITIONS
+*/
+
+static boolean regexBroken = FALSE;
+
+/* Array of pattern sets, indexed by language */
+static patternSet* Sets = NULL;
+static int SetUpper = -1; /* upper language index in list */
+
+/*
+* FUNCTION DEFINITIONS
+*/
+
+static void clearPatternSet (const langType language)
+{
+ if (language <= SetUpper)
+ {
+ patternSet* const set = Sets + language;
+ unsigned int i;
+ for (i = 0 ; i < set->count ; ++i)
+ {
+ regexPattern *p = &set->patterns [i];
+#if defined (POSIX_REGEX)
+ regfree (p->pattern);
+#endif
+ eFree (p->pattern);
+ p->pattern = NULL;
+
+ if (p->type == PTRN_TAG)
+ {
+ eFree (p->u.tag.name_pattern);
+ p->u.tag.name_pattern = NULL;
+ eFree (p->u.tag.kind.name);
+ p->u.tag.kind.name = NULL;
+ if (p->u.tag.kind.description != NULL)
+ {
+ eFree (p->u.tag.kind.description);
+ p->u.tag.kind.description = NULL;
+ }
+ }
+ }
+ if (set->patterns != NULL)
+ eFree (set->patterns);
+ set->patterns = NULL;
+ set->count = 0;
+ }
+}
+
+/*
+* Regex psuedo-parser
+*/
+
+static void makeRegexTag (
+ const vString* const name, const struct sKind* const kind)
+{
+ if (kind->enabled)
+ {
+ tagEntryInfo e;
+ Assert (name != NULL && vStringLength (name) > 0);
+ Assert (kind != NULL);
+ initTagEntry (&e, vStringValue (name));
+ e.kind = kind->letter;
+ e.kindName = kind->name;
+ makeTagEntry (&e);
+ }
+}
+
+/*
+* Regex pattern definition
+*/
+
+/* Take a string like "/blah/" and turn it into "blah", making sure
+ * that the first and last characters are the same, and handling
+ * quoted separator characters. Actually, stops on the occurrence of
+ * an unquoted separator. Also turns "\t" into a Tab character.
+ * Returns pointer to terminating separator. Works in place. Null
+ * terminates name string.
+ */
+static char* scanSeparators (char* name)
+{
+ char sep = name [0];
+ char *copyto = name;
+ boolean quoted = FALSE;
+
+ for (++name ; *name != '\0' ; ++name)
+ {
+ if (quoted)
+ {
+ if (*name == sep)
+ *copyto++ = sep;
+ else if (*name == 't')
+ *copyto++ = '\t';
+ else
+ {
+ /* Something else is quoted, so preserve the quote. */
+ *copyto++ = '\\';
+ *copyto++ = *name;
+ }
+ quoted = FALSE;
+ }
+ else if (*name == '\\')
+ quoted = TRUE;
+ else if (*name == sep)
+ {
+ break;
+ }
+ else
+ *copyto++ = *name;
+ }
+ *copyto = '\0';
+ return name;
+}
+
+/* Parse `regexp', in form "/regex/name/[k,Kind/]flags" (where the separator
+ * character is whatever the first character of `regexp' is), by breaking it
+ * up into null terminated strings, removing the separators, and expanding
+ * '\t' into tabs. When complete, `regexp' points to the line matching
+ * pattern, a pointer to the name matching pattern is written to `name', a
+ * pointer to the kinds is written to `kinds' (possibly NULL), and a pointer
+ * to the trailing flags is written to `flags'. If the pattern is not in the
+ * correct format, a false value is returned.
+ */
+static boolean parseTagRegex (
+ char* const regexp, char** const name,
+ char** const kinds, char** const flags)
+{
+ boolean result = FALSE;
+ const int separator = (unsigned char) regexp [0];
+
+ *name = scanSeparators (regexp);
+ if (*regexp == '\0')
+ error (WARNING, "empty regexp");
+ else if (**name != separator)
+ error (WARNING, "%s: incomplete regexp", regexp);
+ else
+ {
+ char* const third = scanSeparators (*name);
+ if (**name == '\0')
+ error (WARNING, "%s: regexp missing name pattern", regexp);
+ if ((*name) [strlen (*name) - 1] == '\\')
+ error (WARNING, "error in name pattern: \"%s\"", *name);
+ if (*third != separator)
+ error (WARNING, "%s: regexp missing final separator", regexp);
+ else
+ {
+ char* const fourth = scanSeparators (third);
+ if (*fourth == separator)
+ {
+ *kinds = third;
+ scanSeparators (fourth);
+ *flags = fourth;
+ }
+ else
+ {
+ *flags = third;
+ *kinds = NULL;
+ }
+ result = TRUE;
+ }
+ }
+ return result;
+}
+
+static void addCompiledTagPattern (
+ const langType language, regex_t* const pattern,
+ char* const name, const char kind, char* const kindName,
+ char *const description)
+{
+ patternSet* set;
+ regexPattern *ptrn;
+ if (language > SetUpper)
+ {
+ int i;
+ Sets = xRealloc (Sets, (language + 1), patternSet);
+ for (i = SetUpper + 1 ; i <= language ; ++i)
+ {
+ Sets [i].patterns = NULL;
+ Sets [i].count = 0;
+ }
+ SetUpper = language;
+ }
+ set = Sets + language;
+ set->patterns = xRealloc (set->patterns, (set->count + 1), regexPattern);
+ ptrn = &set->patterns [set->count];
+ set->count += 1;
+
+ ptrn->pattern = pattern;
+ ptrn->type = PTRN_TAG;
+ ptrn->u.tag.name_pattern = name;
+ ptrn->u.tag.kind.enabled = TRUE;
+ ptrn->u.tag.kind.letter = kind;
+ ptrn->u.tag.kind.name = kindName;
+ ptrn->u.tag.kind.description = description;
+}
+
+static void addCompiledCallbackPattern (
+ const langType language, regex_t* const pattern,
+ const regexCallback callback)
+{
+ patternSet* set;
+ regexPattern *ptrn;
+ if (language > SetUpper)
+ {
+ int i;
+ Sets = xRealloc (Sets, (language + 1), patternSet);
+ for (i = SetUpper + 1 ; i <= language ; ++i)
+ {
+ Sets [i].patterns = NULL;
+ Sets [i].count = 0;
+ }
+ SetUpper = language;
+ }
+ set = Sets + language;
+ set->patterns = xRealloc (set->patterns, (set->count + 1), regexPattern);
+ ptrn = &set->patterns [set->count];
+ set->count += 1;
+
+ ptrn->pattern = pattern;
+ ptrn->type = PTRN_CALLBACK;
+ ptrn->u.callback.function = callback;
+}
+
+#if defined (POSIX_REGEX)
+
+static regex_t* compileRegex (const char* const regexp, const char* const flags)
+{
+ int cflags = REG_EXTENDED | REG_NEWLINE;
+ regex_t *result = NULL;
+ int errcode;
+ int i;
+ for (i = 0 ; flags != NULL && flags [i] != '\0' ; ++i)
+ {
+ switch ((int) flags [i])
+ {
+ case 'b': cflags &= ~REG_EXTENDED; break;
+ case 'e': cflags |= REG_EXTENDED; break;
+ case 'i': cflags |= REG_ICASE; break;
+ default: error (WARNING, "unknown regex flag: '%c'", *flags); break;
+ }
+ }
+ result = xMalloc (1, regex_t);
+ errcode = regcomp (result, regexp, cflags);
+ if (errcode != 0)
+ {
+ char errmsg[256];
+ regerror (errcode, result, errmsg, 256);
+ error (WARNING, "regcomp %s: %s", regexp, errmsg);
+ regfree (result);
+ eFree (result);
+ result = NULL;
+ }
+ return result;
+}
+
+#endif
+
+static void parseKinds (
+ const char* const kinds, char* const kind, char** const kindName,
+ char **description)
+{
+ *kind = '\0';
+ *kindName = NULL;
+ *description = NULL;
+ if (kinds == NULL || kinds [0] == '\0')
+ {
+ *kind = 'r';
+ *kindName = eStrdup ("regex");
+ }
+ else if (kinds [0] != '\0')
+ {
+ const char* k = kinds;
+ if (k [0] != ',' && (k [1] == ',' || k [1] == '\0'))
+ *kind = *k++;
+ else
+ *kind = 'r';
+ if (*k == ',')
+ ++k;
+ if (k [0] == '\0')
+ *kindName = eStrdup ("regex");
+ else
+ {
+ const char *const comma = strchr (k, ',');
+ if (comma == NULL)
+ *kindName = eStrdup (k);
+ else
+ {
+ *kindName = (char*) eMalloc (comma - k + 1);
+ strncpy (*kindName, k, comma - k);
+ (*kindName) [comma - k] = '\0';
+ k = comma + 1;
+ if (k [0] != '\0')
+ *description = eStrdup (k);
+ }
+ }
+ }
+}
+
+static void printRegexKind (const regexPattern *pat, unsigned int i, boolean indent)
+{
+ const struct sKind *const kind = &pat [i].u.tag.kind;
+ const char *const indentation = indent ? " " : "";
+ Assert (pat [i].type == PTRN_TAG);
+ printf ("%s%c %s %s\n", indentation,
+ kind->letter != '\0' ? kind->letter : '?',
+ kind->description != NULL ? kind->description : kind->name,
+ kind->enabled ? "" : " [off]");
+}
+
+static void processLanguageRegex (const langType language,
+ const char* const parameter)
+{
+ if (parameter == NULL || parameter [0] == '\0')
+ clearPatternSet (language);
+ else if (parameter [0] != '@')
+ addLanguageRegex (language, parameter);
+ else if (! doesFileExist (parameter + 1))
+ error (WARNING, "cannot open regex file");
+ else
+ {
+ const char* regexfile = parameter + 1;
+ FILE* const fp = fopen (regexfile, "r");
+ if (fp == NULL)
+ error (WARNING | PERROR, regexfile);
+ else
+ {
+ vString* const regex = vStringNew ();
+ while (readLine (regex, fp))
+ addLanguageRegex (language, vStringValue (regex));
+ fclose (fp);
+ vStringDelete (regex);
+ }
+ }
+}
+
+/*
+* Regex pattern matching
+*/
+
+#if defined (POSIX_REGEX)
+
+static vString* substitute (
+ const char* const in, const char* out,
+ const int nmatch, const regmatch_t* const pmatch)
+{
+ vString* result = vStringNew ();
+ const char* p;
+ for (p = out ; *p != '\0' ; p++)
+ {
+ if (*p == '\\' && isdigit ((int) *++p))
+ {
+ const int dig = *p - '0';
+ if (0 < dig && dig < nmatch && pmatch [dig].rm_so != -1)
+ {
+ const int diglen = pmatch [dig].rm_eo - pmatch [dig].rm_so;
+ vStringNCatS (result, in + pmatch [dig].rm_so, diglen);
+ }
+ }
+ else if (*p != '\n' && *p != '\r')
+ vStringPut (result, *p);
+ }
+ vStringTerminate (result);
+ return result;
+}
+
+static void matchTagPattern (const vString* const line,
+ const regexPattern* const patbuf,
+ const regmatch_t* const pmatch)
+{
+ vString *const name = substitute (vStringValue (line),
+ patbuf->u.tag.name_pattern, BACK_REFERENCE_COUNT, pmatch);
+ vStringStripLeading (name);
+ vStringStripTrailing (name);
+ if (vStringLength (name) > 0)
+ makeRegexTag (name, &patbuf->u.tag.kind);
+ else
+ error (WARNING, "%s:%ld: null expansion of name pattern \"%s\"",
+ getInputFileName (), getInputLineNumber (),
+ patbuf->u.tag.name_pattern);
+ vStringDelete (name);
+}
+
+static void matchCallbackPattern (
+ const vString* const line, const regexPattern* const patbuf,
+ const regmatch_t* const pmatch)
+{
+ regexMatch matches [BACK_REFERENCE_COUNT];
+ unsigned int count = 0;
+ int i;
+ for (i = 0 ; i < BACK_REFERENCE_COUNT && pmatch [i].rm_so != -1 ; ++i)
+ {
+ matches [i].start = pmatch [i].rm_so;
+ matches [i].length = pmatch [i].rm_eo - pmatch [i].rm_so;
+ ++count;
+ }
+ patbuf->u.callback.function (vStringValue (line), matches, count);
+}
+
+static boolean matchRegexPattern (const vString* const line,
+ const regexPattern* const patbuf)
+{
+ boolean result = FALSE;
+ regmatch_t pmatch [BACK_REFERENCE_COUNT];
+ const int match = regexec (patbuf->pattern, vStringValue (line),
+ BACK_REFERENCE_COUNT, pmatch, 0);
+ if (match == 0)
+ {
+ result = TRUE;
+ if (patbuf->type == PTRN_TAG)
+ matchTagPattern (line, patbuf, pmatch);
+ else if (patbuf->type == PTRN_CALLBACK)
+ matchCallbackPattern (line, patbuf, pmatch);
+ else
+ {
+ Assert ("invalid pattern type" == NULL);
+ result = FALSE;
+ }
+ }
+ return result;
+}
+
+#endif
+
+/* PUBLIC INTERFACE */
+
+/* Match against all patterns for specified language. Returns true if at least
+ * on pattern matched.
+ */
+extern boolean matchRegex (const vString* const line, const langType language)
+{
+ boolean result = FALSE;
+ if (language != LANG_IGNORE && language <= SetUpper &&
+ Sets [language].count > 0)
+ {
+ const patternSet* const set = Sets + language;
+ unsigned int i;
+ for (i = 0 ; i < set->count ; ++i)
+ if (matchRegexPattern (line, set->patterns + i))
+ result = TRUE;
+ }
+ return result;
+}
+
+extern void findRegexTags (void)
+{
+ /* merely read all lines of the file */
+ while (fileReadLine () != NULL)
+ ;
+}
+
+#endif /* HAVE_REGEX */
+
+extern void addTagRegex (
+ const langType language __unused__,
+ const char* const regex __unused__,
+ const char* const name __unused__,
+ const char* const kinds __unused__,
+ const char* const flags __unused__)
+{
+#ifdef HAVE_REGEX
+ Assert (regex != NULL);
+ Assert (name != NULL);
+ if (! regexBroken)
+ {
+ regex_t* const cp = compileRegex (regex, flags);
+ if (cp != NULL)
+ {
+ char kind;
+ char* kindName;
+ char* description;
+ parseKinds (kinds, &kind, &kindName, &description);
+ addCompiledTagPattern (language, cp, eStrdup (name),
+ kind, kindName, description);
+ }
+ }
+#endif
+}
+
+extern void addCallbackRegex (
+ const langType language __unused__,
+ const char* const regex __unused__,
+ const char* const flags __unused__,
+ const regexCallback callback __unused__)
+{
+#ifdef HAVE_REGEX
+ Assert (regex != NULL);
+ if (! regexBroken)
+ {
+ regex_t* const cp = compileRegex (regex, flags);
+ if (cp != NULL)
+ addCompiledCallbackPattern (language, cp, callback);
+ }
+#endif
+}
+
+extern void addLanguageRegex (
+ const langType language __unused__, const char* const regex __unused__)
+{
+#ifdef HAVE_REGEX
+ if (! regexBroken)
+ {
+ char *const regex_pat = eStrdup (regex);
+ char *name, *kinds, *flags;
+ if (parseTagRegex (regex_pat, &name, &kinds, &flags))
+ {
+ addTagRegex (language, regex_pat, name, kinds, flags);
+ eFree (regex_pat);
+ }
+ }
+#endif
+}
+
+/*
+* Regex option parsing
+*/
+
+extern boolean processRegexOption (const char *const option,
+ const char *const parameter __unused__)
+{
+ boolean handled = FALSE;
+ const char* const dash = strchr (option, '-');
+ if (dash != NULL && strncmp (option, "regex", dash - option) == 0)
+ {
+#ifdef HAVE_REGEX
+ langType language;
+ language = getNamedLanguage (dash + 1);
+ if (language == LANG_IGNORE)
+ error (WARNING, "unknown language \"%s\" in --%s option", (dash + 1), option);
+ else
+ processLanguageRegex (language, parameter);
+#else
+ error (WARNING, "regex support not available; required for --%s option",
+ option);
+#endif
+ handled = TRUE;
+ }
+ return handled;
+}
+
+extern void disableRegexKinds (const langType language __unused__)
+{
+#ifdef HAVE_REGEX
+ if (language <= SetUpper && Sets [language].count > 0)
+ {
+ patternSet* const set = Sets + language;
+ unsigned int i;
+ for (i = 0 ; i < set->count ; ++i)
+ if (set->patterns [i].type == PTRN_TAG)
+ set->patterns [i].u.tag.kind.enabled = FALSE;
+ }
+#endif
+}
+
+extern boolean enableRegexKind (
+ const langType language __unused__,
+ const int kind __unused__, const boolean mode __unused__)
+{
+ boolean result = FALSE;
+#ifdef HAVE_REGEX
+ if (language <= SetUpper && Sets [language].count > 0)
+ {
+ patternSet* const set = Sets + language;
+ unsigned int i;
+ for (i = 0 ; i < set->count ; ++i)
+ if (set->patterns [i].type == PTRN_TAG &&
+ set->patterns [i].u.tag.kind.letter == kind)
+ {
+ set->patterns [i].u.tag.kind.enabled = mode;
+ result = TRUE;
+ }
+ }
+#endif
+ return result;
+}
+
+extern void printRegexKinds (const langType language __unused__, boolean indent __unused__)
+{
+#ifdef HAVE_REGEX
+ if (language <= SetUpper && Sets [language].count > 0)
+ {
+ patternSet* const set = Sets + language;
+ unsigned int i;
+ for (i = 0 ; i < set->count ; ++i)
+ if (set->patterns [i].type == PTRN_TAG)
+ printRegexKind (set->patterns, i, indent);
+ }
+#endif
+}
+
+extern void freeRegexResources (void)
+{
+#ifdef HAVE_REGEX
+ int i;
+ for (i = 0 ; i <= SetUpper ; ++i)
+ clearPatternSet (i);
+ if (Sets != NULL)
+ eFree (Sets);
+ Sets = NULL;
+ SetUpper = -1;
+#endif
+}
+
+/* Check for broken regcomp() on Cygwin */
+extern void checkRegex (void)
+{
+#if defined (HAVE_REGEX) && defined (CHECK_REGCOMP)
+ regex_t patbuf;
+ int errcode;
+ if (regcomp (&patbuf, "/hello/", 0) != 0)
+ {
+ error (WARNING, "Disabling broken regex");
+ regexBroken = TRUE;
+ }
+#endif
+}
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/lua.c b/lua.c
new file mode 100644
index 0000000..d385544
--- /dev/null
+++ b/lua.c
@@ -0,0 +1,133 @@
+/*
+* $Id: lua.c 443 2006-05-30 04:37:13Z darren $
+*
+* Copyright (c) 2000-2001, Max Ischenko <mfi@ukr.net>.
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* This module contains functions for generating tags for Lua language.
+*/
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include <string.h>
+
+#include "options.h"
+#include "parse.h"
+#include "read.h"
+#include "vstring.h"
+
+/*
+* DATA DEFINITIONS
+*/
+typedef enum {
+ K_FUNCTION
+} luaKind;
+
+static kindOption LuaKinds [] = {
+ { TRUE, 'f', "function", "functions" }
+};
+
+/*
+* FUNCTION DEFINITIONS
+*/
+
+/* for debugging purposes */
+static void __unused__ print_string (char *p, char *q)
+{
+ for ( ; p != q; p++)
+ fprintf (errout, "%c", *p);
+ fprintf (errout, "\n");
+}
+
+/*
+ * Helper function.
+ * Returns 1 if line looks like a line of Lua code.
+ *
+ * TODO: Recognize UNIX bang notation.
+ * (Lua treat first line as a comment if it starts with #!)
+ *
+ */
+static boolean is_a_code_line (const unsigned char *line)
+{
+ boolean result;
+ const unsigned char *p = line;
+ while (isspace ((int) *p))
+ p++;
+ if (p [0] == '\0')
+ result = FALSE;
+ else if (p [0] == '-' && p [1] == '-')
+ result = FALSE;
+ else
+ result = TRUE;
+ return result;
+}
+
+static void extract_name (const char *begin, const char *end, vString *name)
+{
+ if (begin != NULL && end != NULL && begin < end)
+ {
+ const char *cp;
+
+ while (isspace ((int) *begin))
+ begin++;
+ while (isspace ((int) *end))
+ end--;
+ if (begin < end)
+ {
+ for (cp = begin ; cp != end; cp++)
+ vStringPut (name, (int) *cp);
+ vStringTerminate (name);
+
+ makeSimpleTag (name, LuaKinds, K_FUNCTION);
+ vStringClear (name);
+ }
+ }
+}
+
+static void findLuaTags (void)
+{
+ vString *name = vStringNew ();
+ const unsigned char *line;
+
+ while ((line = fileReadLine ()) != NULL)
+ {
+ const char *p, *q;
+
+ if (! is_a_code_line (line))
+ continue;
+
+ p = (const char*) strstr ((const char*) line, "function");
+ if (p == NULL)
+ continue;
+
+ q = strchr ((const char*) line, '=');
+
+ if (q == NULL) {
+ p = p + 9; /* skip the `function' word */
+ q = strchr ((const char*) p, '(');
+ extract_name (p, q, name);
+ } else {
+ p = (const char*) &line[0];
+ extract_name (p, q, name);
+ }
+ }
+ vStringDelete (name);
+}
+
+extern parserDefinition* LuaParser (void)
+{
+ static const char* const extensions [] = { "lua", NULL };
+ parserDefinition* def = parserNew ("Lua");
+ def->kinds = LuaKinds;
+ def->kindCount = KIND_COUNT (LuaKinds);
+ def->extensions = extensions;
+ def->parser = findLuaTags;
+ return def;
+}
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/mac.c b/mac.c
new file mode 100644
index 0000000..af4d16f
--- /dev/null
+++ b/mac.c
@@ -0,0 +1,273 @@
+/*
+* $Id: mac.c 443 2006-05-30 04:37:13Z darren $
+*
+* Copyright (c) 2001, Maarten L. Hekkelman
+*
+* Author: Maarten L. Hekkelman <maarten@hekkelman.com>
+* http://www.hekkelman.com
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License. It is provided on an as-is basis and no
+* responsibility is accepted for its failure to perform as expected.
+*
+* This module contains support functions for Exuberant Ctags on Macintosh.
+*/
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h"
+
+#include <Files.h>
+#include <TextUtils.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+/*
+* FUNCTION DEFINITIONS
+*/
+
+static int get_path(const char* in_unix_path, unsigned char* out_mac_path)
+{
+ int l = strlen(in_unix_path);
+ int result = 0;
+
+ if (l > 254)
+ result = -1;
+ else
+ {
+ const char* s = in_unix_path;
+ char *d = (char*)out_mac_path + 1;
+
+ if (*s != '/')
+ *d++ = ':';
+ else
+ ++s;
+
+ while (*s)
+ {
+ if (s[0] == '.' && s[1] == '.' && s[2] == '/')
+ {
+ s += 3;
+ *d++ = ':';
+ }
+ else if (s[0] == '.' && s[1] == '/')
+ s += 2;
+ else if (s[0] == '/')
+ {
+ *d++ = ':';
+
+ ++s;
+ while (*s == '/')
+ ++s;
+ }
+ else
+ *d++ = *s++;
+ }
+
+ out_mac_path[0] = (d - (char*)out_mac_path) - 1;
+ }
+
+ return result;
+}
+
+DIR *opendir(const char *dirname)
+{
+ DIR* dirp = (DIR*)calloc(1, sizeof(DIR));
+
+ if (dirp != NULL)
+ {
+ OSErr err;
+ Str255 s;
+ CInfoPBRec pb = { 0 };
+
+ if (strcmp(dirname, "."))
+ {
+ get_path(dirname, s);
+ pb.hFileInfo.ioNamePtr = s;
+ }
+ else
+ pb.hFileInfo.ioNamePtr = NULL;
+
+ err = PBGetCatInfoSync(&pb);
+ if (err != noErr || (pb.hFileInfo.ioFlAttrib & ioDirMask) == 0)
+ {
+ free(dirp);
+ dirp = NULL;
+ }
+ else
+ {
+ dirp->file.vRefNum = pb.hFileInfo.ioVRefNum;
+ dirp->file.parID = pb.hFileInfo.ioDirID;
+ dirp->file.name[0] = '\0';
+ dirp->index = 1;
+ }
+ }
+
+ return dirp;
+}
+
+struct dirent *readdir(DIR *dirp)
+{
+ if (dirp)
+ {
+ CInfoPBRec pb = { 0 };
+
+ pb.hFileInfo.ioVRefNum = dirp->file.vRefNum;
+ pb.hFileInfo.ioDirID = dirp->file.parID;
+ pb.hFileInfo.ioFDirIndex = dirp->index++;
+ pb.hFileInfo.ioNamePtr = dirp->file.name;
+
+ if (PBGetCatInfoSync(&pb) != noErr)
+ return NULL;
+
+ memcpy(dirp->ent.d_name, dirp->file.name + 1, dirp->file.name[0]);
+ dirp->ent.d_name[dirp->file.name[0]] = 0;
+ return &dirp->ent;
+ }
+ return NULL;
+}
+
+int closedir(DIR *dirp)
+{
+ if (dirp)
+ free(dirp);
+ return 0;
+}
+
+void rewinddir(DIR *dirp)
+{
+ if (dirp)
+ dirp->index = 1;
+}
+
+int mstat(const char* file, struct stat* st)
+{
+ CInfoPBRec pb;
+ unsigned char path[256];
+ int result = 0;
+
+ memset(&pb, 0, sizeof(CInfoPBRec));
+
+ if (strcmp(file, ".") == 0)
+ {
+ memset(st, 0, sizeof(struct stat));
+ st->st_mode = S_IFDIR;
+ st->st_ino = -1;
+ }
+ else
+ {
+ result = get_path(file, path);
+
+ if (result == 0)
+ {
+ pb.hFileInfo.ioNamePtr = path;
+
+ if (PBGetCatInfoSync(&pb) != noErr)
+ result = -1;
+ else
+ {
+ memset(st, 0, sizeof(struct stat));
+
+ if (pb.hFileInfo.ioFlAttrib & ioDirMask)
+ st->st_mode = S_IFDIR;
+ else
+ st->st_mode = S_IFREG;
+
+ st->st_ino = pb.hFileInfo.ioFlStBlk;
+ st->st_dev = pb.hFileInfo.ioVRefNum;
+ st->st_nlink = 1;
+ st->st_size = pb.hFileInfo.ioFlLgLen;
+ st->st_atime = pb.hFileInfo.ioFlMdDat;
+ st->st_mtime = pb.hFileInfo.ioFlMdDat;
+ st->st_ctime = pb.hFileInfo.ioFlCrDat;
+ }
+ }
+ }
+
+ return result;
+}
+
+#undef fopen
+
+FILE* mfopen(const char* file, const char* mode)
+{
+ unsigned char path[256];
+
+ if (get_path(file, path) == 0)
+ {
+ int l = path[0];
+ memmove(path, path + 1, l);
+ path[l] = 0;
+ return fopen((char*)path, mode);
+ }
+ else
+ return NULL;
+}
+
+char* getcwd(char* out_path, int out_path_len)
+{
+ OSErr err = noErr;
+ CInfoPBRec pb;
+ FSSpec cwd;
+
+ if (out_path == NULL)
+ {
+ if (out_path_len < PATH_MAX)
+ out_path_len = PATH_MAX;
+ out_path = (char*)malloc(out_path_len);
+ }
+
+ err = FSMakeFSSpec(0, 0, "\p:", &cwd);
+
+ if (cwd.parID == fsRtParID)
+ {
+ *out_path = '/';
+ memcpy(out_path + 1, cwd.name + 1, cwd.name[0]);
+ out_path[1 + cwd.name[0]] = 0;
+ }
+ else
+ {
+ /* The object isn't a volume */
+
+ /* Is the object a file or a directory? */
+
+ char t[PATH_MAX];
+ char* s;
+
+ s = t + PATH_MAX - cwd.name[0] - 1;
+ memcpy(s, cwd.name + 1, cwd.name[0]);
+ s[cwd.name[0]] = 0;
+
+ /* Get the ancestor directory names */
+ pb.dirInfo.ioNamePtr = cwd.name;
+ pb.dirInfo.ioVRefNum = cwd.vRefNum;
+ pb.dirInfo.ioDrParID = cwd.parID;
+ do /* loop until we have an error or find the root directory */
+ {
+ pb.dirInfo.ioFDirIndex = -1;
+ pb.dirInfo.ioDrDirID = pb.dirInfo.ioDrParID;
+ err = PBGetCatInfoSync(&pb);
+ if ( err == noErr )
+ {
+ *--s = '/';
+ s -= cwd.name[0];
+ memcpy(s, cwd.name + 1, cwd.name[0]);
+ }
+ }
+ while (err == noErr && pb.dirInfo.ioDrDirID != fsRtDirID && s > t + 1);
+
+ if (s > t + 1)
+ {
+ *--s = '/';
+ strcpy(out_path, s);
+ }
+ else
+ strcpy(out_path, ".");
+ }
+
+ return out_path;
+}
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/magic.diff b/magic.diff
new file mode 100644
index 0000000..0457756
--- /dev/null
+++ b/magic.diff
@@ -0,0 +1,21 @@
+This file contains a patch to the Linux file /usr/share/magic which will
+allow the "file" command to properly identify tags file producted by
+Exuberant Ctags. To apply the patch, issue the following command as root:
+
+ patch -p0 < magic.diff
+
+*** /usr/share/magic.orig Wed Feb 16 19:04:09 2000
+--- /usr/share/magic Mon Aug 14 20:27:01 2000
+***************
+*** 1155,1160 ****
+--- 1155,1164 ----
+ >84 belong&0x18000000 =0x18000000 undefined fpmode
+
+ #------------------------------------------------------------------------------
++ # ctags: file (1) magic for Exuberant Ctags files
++ 0 string !_TAG Exuberant Ctags tag file
++
++ #------------------------------------------------------------------------------
+ # database: file(1) magic for various databases
+ #
+ # extracted from header/code files by Graeme Wilford (eep2gw@ee.surrey.ac.uk)
diff --git a/main.c b/main.c
new file mode 100644
index 0000000..79948fe
--- /dev/null
+++ b/main.c
@@ -0,0 +1,579 @@
+/*
+* $Id: main.c 536 2007-06-02 06:09:00Z elliotth $
+*
+* Copyright (c) 1996-2003, Darren Hiebert
+*
+* Author: Darren Hiebert <dhiebert@users.sourceforge.net>
+* http://ctags.sourceforge.net
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License. It is provided on an as-is basis and no
+* responsibility is accepted for its failure to perform as expected.
+*
+* This is a reimplementation of the ctags (1) program. It is an attempt to
+* provide a fully featured ctags program which is free of the limitations
+* which most (all?) others are subject to.
+*
+* This module contains the start-up code and routines to determine the list
+* of files to parsed for tags.
+*/
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include <string.h>
+
+/* To provide timings features if available.
+ */
+#ifdef HAVE_CLOCK
+# ifdef HAVE_TIME_H
+# include <time.h>
+# endif
+#else
+# ifdef HAVE_TIMES
+# ifdef HAVE_SYS_TIMES_H
+# include <sys/times.h>
+# endif
+# endif
+#endif
+
+/* To provide directory searching for recursion feature.
+ */
+#ifdef AMIGA
+# include <dos/dosasl.h> /* for struct AnchorPath */
+# include <clib/dos_protos.h> /* function prototypes */
+# define ANCHOR_BUF_SIZE 512
+# define ANCHOR_SIZE (sizeof (struct AnchorPath) + ANCHOR_BUF_SIZE)
+# ifdef __SASC
+ extern struct DosLibrary *DOSBase;
+# include <pragmas/dos_pragmas.h>
+# endif
+#endif
+
+#ifdef HAVE_DIRENT_H
+# ifdef __BORLANDC__
+# define boolean BORLAND_boolean
+# endif
+# ifdef HAVE_SYS_TYPES_H
+# include <sys/types.h> /* required by dirent.h */
+# endif
+# include <dirent.h> /* to declare opendir() */
+# undef boolean
+#endif
+#ifdef HAVE_DIRECT_H
+# include <direct.h> /* to _getcwd() */
+#endif
+#ifdef HAVE_DOS_H
+# include <dos.h> /* to declare FA_DIREC */
+#endif
+#ifdef HAVE_DIR_H
+# include <dir.h> /* to declare findfirst() and findnext */
+#endif
+#ifdef HAVE_IO_H
+# include <io.h> /* to declare _findfirst() */
+#endif
+
+
+#include "debug.h"
+#include "keyword.h"
+#include "main.h"
+#include "options.h"
+#include "read.h"
+#include "routines.h"
+
+/*
+* MACROS
+*/
+#define plural(value) (((unsigned long)(value) == 1L) ? "" : "s")
+
+/*
+* DATA DEFINITIONS
+*/
+static struct { long files, lines, bytes; } Totals = { 0, 0, 0 };
+
+#ifdef AMIGA
+# include "ctags.h"
+ static const char *VERsion = "$VER: "PROGRAM_NAME" "PROGRAM_VERSION" "
+# ifdef __SASC
+ __AMIGADATE__
+# else
+ __DATE__
+# endif
+ " "AUTHOR_NAME" $";
+#endif
+
+/*
+* FUNCTION PROTOTYPES
+*/
+static boolean createTagsForEntry (const char *const entryName);
+
+/*
+* FUNCTION DEFINITIONS
+*/
+
+extern void addTotals (
+ const unsigned int files, const long unsigned int lines,
+ const long unsigned int bytes)
+{
+ Totals.files += files;
+ Totals.lines += lines;
+ Totals.bytes += bytes;
+}
+
+extern boolean isDestinationStdout (void)
+{
+ boolean toStdout = FALSE;
+
+ if (Option.xref || Option.filter ||
+ (Option.tagFileName != NULL && (strcmp (Option.tagFileName, "-") == 0
+#if defined (VMS)
+ || strcmp (Option.tagFileName, "sys$output") == 0
+#else
+ || strcmp (Option.tagFileName, "/dev/stdout") == 0
+#endif
+ )))
+ toStdout = TRUE;
+ return toStdout;
+}
+
+#if defined (HAVE_OPENDIR)
+static boolean recurseUsingOpendir (const char *const dirName)
+{
+ boolean resize = FALSE;
+ DIR *const dir = opendir (dirName);
+ if (dir == NULL)
+ error (WARNING | PERROR, "cannot recurse into directory \"%s\"", dirName);
+ else
+ {
+ struct dirent *entry;
+ while ((entry = readdir (dir)) != NULL)
+ {
+ if (strcmp (entry->d_name, ".") != 0 &&
+ strcmp (entry->d_name, "..") != 0)
+ {
+ vString *filePath;
+ if (strcmp (dirName, ".") == 0)
+ filePath = vStringNewInit (entry->d_name);
+ else
+ filePath = combinePathAndFile (dirName, entry->d_name);
+ resize |= createTagsForEntry (vStringValue (filePath));
+ vStringDelete (filePath);
+ }
+ }
+ closedir (dir);
+ }
+ return resize;
+}
+
+#elif defined (HAVE_FINDFIRST) || defined (HAVE__FINDFIRST)
+
+static boolean createTagsForWildcardEntry (
+ const char *const pattern, const size_t dirLength,
+ const char *const entryName)
+{
+ boolean resize = FALSE;
+ /* we must not recurse into the directories "." or ".." */
+ if (strcmp (entryName, ".") != 0 && strcmp (entryName, "..") != 0)
+ {
+ vString *const filePath = vStringNew ();
+ vStringNCopyS (filePath, pattern, dirLength);
+ vStringCatS (filePath, entryName);
+ resize = createTagsForEntry (vStringValue (filePath));
+ vStringDelete (filePath);
+ }
+ return resize;
+}
+
+static boolean createTagsForWildcardUsingFindfirst (const char *const pattern)
+{
+ boolean resize = FALSE;
+ const size_t dirLength = baseFilename (pattern) - pattern;
+#if defined (HAVE_FINDFIRST)
+ struct ffblk fileInfo;
+ int result = findfirst (pattern, &fileInfo, FA_DIREC);
+ while (result == 0)
+ {
+ const char *const entry = (const char *) fileInfo.ff_name;
+ resize |= createTagsForWildcardEntry (pattern, dirLength, entry);
+ result = findnext (&fileInfo);
+ }
+#elif defined (HAVE__FINDFIRST)
+ struct _finddata_t fileInfo;
+ findfirst_t hFile = _findfirst (pattern, &fileInfo);
+ if (hFile != -1L)
+ {
+ do
+ {
+ const char *const entry = (const char *) fileInfo.name;
+ resize |= createTagsForWildcardEntry (pattern, dirLength, entry);
+ } while (_findnext (hFile, &fileInfo) == 0);
+ _findclose (hFile);
+ }
+#endif
+ return resize;
+}
+
+#elif defined (AMIGA)
+
+static boolean createTagsForAmigaWildcard (const char *const pattern)
+{
+ boolean resize = FALSE;
+ struct AnchorPath *const anchor =
+ (struct AnchorPath *) eMalloc ((size_t) ANCHOR_SIZE);
+ LONG result;
+
+ memset (anchor, 0, (size_t) ANCHOR_SIZE);
+ anchor->ap_Strlen = ANCHOR_BUF_SIZE;
+ /* Allow '.' for current directory */
+#ifdef APF_DODOT
+ anchor->ap_Flags = APF_DODOT | APF_DOWILD;
+#else
+ anchor->ap_Flags = APF_DoDot | APF_DoWild;
+#endif
+ result = MatchFirst ((UBYTE *) pattern, anchor);
+ while (result == 0)
+ {
+ resize |= createTagsForEntry ((char *) anchor->ap_Buf);
+ result = MatchNext (anchor);
+ }
+ MatchEnd (anchor);
+ eFree (anchor);
+ return resize;
+}
+#endif
+
+static boolean recurseIntoDirectory (const char *const dirName)
+{
+ boolean resize = FALSE;
+ if (isRecursiveLink (dirName))
+ verbose ("ignoring \"%s\" (recursive link)\n", dirName);
+ else if (! Option.recurse)
+ verbose ("ignoring \"%s\" (directory)\n", dirName);
+ else
+ {
+ verbose ("RECURSING into directory \"%s\"\n", dirName);
+#if defined (HAVE_OPENDIR)
+ resize = recurseUsingOpendir (dirName);
+#elif defined (HAVE_FINDFIRST) || defined (HAVE__FINDFIRST)
+ {
+ vString *const pattern = vStringNew ();
+ vStringCopyS (pattern, dirName);
+ vStringPut (pattern, OUTPUT_PATH_SEPARATOR);
+ vStringCatS (pattern, "*.*");
+ resize = createTagsForWildcardUsingFindfirst (vStringValue (pattern));
+ vStringDelete (pattern);
+ }
+#elif defined (AMIGA)
+ {
+ vString *const pattern = vStringNew ();
+ if (*dirName != '\0' && strcmp (dirName, ".") != 0)
+ {
+ vStringCopyS (pattern, dirName);
+ if (dirName [strlen (dirName) - 1] != '/')
+ vStringPut (pattern, '/');
+ }
+ vStringCatS (pattern, "#?");
+ resize = createTagsForAmigaWildcard (vStringValue (pattern));
+ vStringDelete (pattern);
+ }
+#endif
+ }
+ return resize;
+}
+
+static boolean createTagsForEntry (const char *const entryName)
+{
+ boolean resize = FALSE;
+ fileStatus *status = eStat (entryName);
+
+ Assert (entryName != NULL);
+ if (isExcludedFile (entryName))
+ verbose ("excluding \"%s\"\n", entryName);
+ else if (status->isSymbolicLink && ! Option.followLinks)
+ verbose ("ignoring \"%s\" (symbolic link)\n", entryName);
+ else if (! status->exists)
+ error (WARNING | PERROR, "cannot open source file \"%s\"", entryName);
+ else if (status->isDirectory)
+ resize = recurseIntoDirectory (entryName);
+ else if (! status->isNormalFile)
+ verbose ("ignoring \"%s\" (special file)\n", entryName);
+ else
+ resize = parseFile (entryName);
+
+ eStatFree (status);
+ return resize;
+}
+
+#ifdef MANUAL_GLOBBING
+
+static boolean createTagsForWildcardArg (const char *const arg)
+{
+ boolean resize = FALSE;
+ vString *const pattern = vStringNewInit (arg);
+ char *patternS = vStringValue (pattern);
+
+#if defined (HAVE_FINDFIRST) || defined (HAVE__FINDFIRST)
+ /* We must transform the "." and ".." forms into something that can
+ * be expanded by the findfirst/_findfirst functions.
+ */
+ if (Option.recurse &&
+ (strcmp (patternS, ".") == 0 || strcmp (patternS, "..") == 0))
+ {
+ vStringPut (pattern, OUTPUT_PATH_SEPARATOR);
+ vStringCatS (pattern, "*.*");
+ }
+ resize |= createTagsForWildcardUsingFindfirst (patternS);
+#endif
+ vStringDelete (pattern);
+ return resize;
+}
+
+#endif
+
+static boolean createTagsForArgs (cookedArgs *const args)
+{
+ boolean resize = FALSE;
+
+ /* Generate tags for each argument on the command line.
+ */
+ while (! cArgOff (args))
+ {
+ const char *const arg = cArgItem (args);
+
+#ifdef MANUAL_GLOBBING
+ resize |= createTagsForWildcardArg (arg);
+#else
+ resize |= createTagsForEntry (arg);
+#endif
+ cArgForth (args);
+ parseOptions (args);
+ }
+ return resize;
+}
+
+/* Read from an opened file a list of file names for which to generate tags.
+ */
+static boolean createTagsFromFileInput (FILE *const fp, const boolean filter)
+{
+ boolean resize = FALSE;
+ if (fp != NULL)
+ {
+ cookedArgs *args = cArgNewFromLineFile (fp);
+ parseOptions (args);
+ while (! cArgOff (args))
+ {
+ resize |= createTagsForEntry (cArgItem (args));
+ if (filter)
+ {
+ if (Option.filterTerminator != NULL)
+ fputs (Option.filterTerminator, stdout);
+ fflush (stdout);
+ }
+ cArgForth (args);
+ parseOptions (args);
+ }
+ cArgDelete (args);
+ }
+ return resize;
+}
+
+/* Read from a named file a list of file names for which to generate tags.
+ */
+static boolean createTagsFromListFile (const char *const fileName)
+{
+ boolean resize;
+ Assert (fileName != NULL);
+ if (strcmp (fileName, "-") == 0)
+ resize = createTagsFromFileInput (stdin, FALSE);
+ else
+ {
+ FILE *const fp = fopen (fileName, "r");
+ if (fp == NULL)
+ error (FATAL | PERROR, "cannot open list file \"%s\"", fileName);
+ resize = createTagsFromFileInput (fp, FALSE);
+ fclose (fp);
+ }
+ return resize;
+}
+
+#if defined (HAVE_CLOCK)
+# define CLOCK_AVAILABLE
+# ifndef CLOCKS_PER_SEC
+# define CLOCKS_PER_SEC 1000000
+# endif
+#elif defined (HAVE_TIMES)
+# define CLOCK_AVAILABLE
+# define CLOCKS_PER_SEC 60
+static clock_t clock (void)
+{
+ struct tms buf;
+
+ times (&buf);
+ return (buf.tms_utime + buf.tms_stime);
+}
+#else
+# define clock() (clock_t)0
+#endif
+
+static void printTotals (const clock_t *const timeStamps)
+{
+ const unsigned long totalTags = TagFile.numTags.added +
+ TagFile.numTags.prev;
+
+ fprintf (errout, "%ld file%s, %ld line%s (%ld kB) scanned",
+ Totals.files, plural (Totals.files),
+ Totals.lines, plural (Totals.lines),
+ Totals.bytes/1024L);
+#ifdef CLOCK_AVAILABLE
+ {
+ const double interval = ((double) (timeStamps [1] - timeStamps [0])) /
+ CLOCKS_PER_SEC;
+
+ fprintf (errout, " in %.01f seconds", interval);
+ if (interval != (double) 0.0)
+ fprintf (errout, " (%lu kB/s)",
+ (unsigned long) (Totals.bytes / interval) / 1024L);
+ }
+#endif
+ fputc ('\n', errout);
+
+ fprintf (errout, "%lu tag%s added to tag file",
+ TagFile.numTags.added, plural (TagFile.numTags.added));
+ if (Option.append)
+ fprintf (errout, " (now %lu tags)", totalTags);
+ fputc ('\n', errout);
+
+ if (totalTags > 0 && Option.sorted != SO_UNSORTED)
+ {
+ fprintf (errout, "%lu tag%s sorted", totalTags, plural (totalTags));
+#ifdef CLOCK_AVAILABLE
+ fprintf (errout, " in %.02f seconds",
+ ((double) (timeStamps [2] - timeStamps [1])) / CLOCKS_PER_SEC);
+#endif
+ fputc ('\n', errout);
+ }
+
+#ifdef DEBUG
+ fprintf (errout, "longest tag line = %lu\n",
+ (unsigned long) TagFile.max.line);
+#endif
+}
+
+static boolean etagsInclude (void)
+{
+ return (boolean)(Option.etags && Option.etagsInclude != NULL);
+}
+
+static void makeTags (cookedArgs *args)
+{
+ clock_t timeStamps [3];
+ boolean resize = FALSE;
+ boolean files = (boolean)(! cArgOff (args) || Option.fileList != NULL
+ || Option.filter);
+
+ if (! files)
+ {
+ if (filesRequired ())
+ error (FATAL, "No files specified. Try \"%s --help\".",
+ getExecutableName ());
+ else if (! Option.recurse && ! etagsInclude ())
+ return;
+ }
+
+#define timeStamp(n) timeStamps[(n)]=(Option.printTotals ? clock():(clock_t)0)
+ if (! Option.filter)
+ openTagFile ();
+
+ timeStamp (0);
+
+ if (! cArgOff (args))
+ {
+ verbose ("Reading command line arguments\n");
+ resize = createTagsForArgs (args);
+ }
+ if (Option.fileList != NULL)
+ {
+ verbose ("Reading list file\n");
+ resize = (boolean) (createTagsFromListFile (Option.fileList) || resize);
+ }
+ if (Option.filter)
+ {
+ verbose ("Reading filter input\n");
+ resize = (boolean) (createTagsFromFileInput (stdin, TRUE) || resize);
+ }
+ if (! files && Option.recurse)
+ resize = recurseIntoDirectory (".");
+
+ timeStamp (1);
+
+ if (! Option.filter)
+ closeTagFile (resize);
+
+ timeStamp (2);
+
+ if (Option.printTotals)
+ printTotals (timeStamps);
+#undef timeStamp
+}
+
+/*
+ * Start up code
+ */
+
+extern int main (int __unused__ argc, char **argv)
+{
+ cookedArgs *args;
+#ifdef VMS
+ extern int getredirection (int *ac, char ***av);
+
+ /* do wildcard expansion and I/O redirection */
+ getredirection (&argc, &argv);
+#endif
+
+#ifdef AMIGA
+ /* This program doesn't work when started from the Workbench */
+ if (argc == 0)
+ exit (1);
+#endif
+
+#ifdef __EMX__
+ _wildcard (&argc, &argv); /* expand wildcards in argument list */
+#endif
+
+#if defined (macintosh) && BUILD_MPW_TOOL == 0
+ argc = ccommand (&argv);
+#endif
+
+ setCurrentDirectory ();
+ setExecutableName (*argv++);
+ checkRegex ();
+
+ args = cArgNewFromArgv (argv);
+ previewFirstOption (args);
+ testEtagsInvocation ();
+ initializeParsing ();
+ initOptions ();
+ readOptionConfiguration ();
+ verbose ("Reading initial options from command line\n");
+ parseOptions (args);
+ checkOptions ();
+ makeTags (args);
+
+ /* Clean up.
+ */
+ cArgDelete (args);
+ freeKeywordTable ();
+ freeRoutineResources ();
+ freeSourceFileResources ();
+ freeTagFileResources ();
+ freeOptionResources ();
+ freeParserResources ();
+ freeRegexResources ();
+
+ exit (0);
+ return 0;
+}
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/main.h b/main.h
new file mode 100644
index 0000000..ad9a8e6
--- /dev/null
+++ b/main.h
@@ -0,0 +1,32 @@
+/*
+* $Id: main.h 443 2006-05-30 04:37:13Z darren $
+*
+* Copyright (c) 1998-2002, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* External interface to main.c
+*/
+#ifndef _MAIN_H
+#define _MAIN_H
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include <stdio.h>
+
+#include "vstring.h"
+
+/*
+* FUNCTION PROTOTYPES
+*/
+extern void addTotals (const unsigned int files, const long unsigned int lines, const long unsigned int bytes);
+extern boolean isDestinationStdout (void);
+extern int main (int argc, char **argv);
+
+#endif /* _MAIN_H */
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/maintainer.mak b/maintainer.mak
new file mode 100644
index 0000000..6c76c2c
--- /dev/null
+++ b/maintainer.mak
@@ -0,0 +1,476 @@
+# $Id: maintainer.mak 722 2009-07-09 16:10:35Z dhiebert $
+#
+# Copyright (c) 1996-2009, Darren Hiebert
+#
+# Development makefile for Exuberant Ctags. Also used to build releases.
+# Requires GNU make.
+
+OBJEXT := o
+
+include source.mak
+
+DSOURCES := $(SOURCES) debug.c
+
+VERSION_FILES:= ctags.h ctags.1 NEWS
+
+LIB_FILES := readtags.c readtags.h
+
+ENVIRONMENT_MAKEFILES := \
+ mk_bc3.mak mk_bc5.mak mk_djg.mak mk_manx.mak mk_mingw.mak \
+ mk_mpw.mak mk_mvc.mak mk_os2.mak mk_qdos.mak mk_sas.mak \
+
+COMMON_FILES := COPYING EXTENDING.html FAQ INSTALL.oth MAINTAINERS NEWS README \
+ $(ENVIRONMENT_MAKEFILES) source.mak \
+ $(DSOURCES) $(HEADERS) $(LIB_FILES) \
+ $(ENVIRONMENT_SOURCES) $(ENVIRONMENT_HEADERS)
+
+UNIX_FILES := $(COMMON_FILES) \
+ .indent.pro INSTALL configure.ac \
+ Makefile.in maintainer.mak \
+ descrip.mms mkinstalldirs magic.diff \
+ ctags.spec ctags.1
+
+REGEX_DIR := gnu_regex
+
+WIN_FILES := $(COMMON_FILES) $(VERSION_FILES)
+
+SVN_FILES := $(UNIX_FILES)
+
+DEP_DIR := .deps
+
+OBJECTS := $(patsubst %.c,%.o,$(notdir $(SOURCES)))
+DOBJECTS := $(patsubst %.c,%.od,$(notdir $(DSOURCES)))
+DEPS := $(patsubst %.c,$(DEP_DIR)/%.d,$(notdir $(SOURCES)))
+
+WARNINGS := -Wall -W -Wpointer-arith -Wcast-align -Wwrite-strings \
+ -Wmissing-prototypes -Wmissing-declarations \
+ -Wnested-externs -Wcast-qual -Wshadow -pedantic \
+ -Wstrict-prototypes \
+ # -Wtraditional -Wconversion -Werror
+
+PRODUCER := Darren B. Hiebert
+EMAIL := dhiebert@users.sourceforge.net
+CTAGS_WEBSITE := http://ctags.sourceforge.net
+RPM_ROOT := rpms
+RPM_ABS_ROOT := $(PWD)/$(RPM_ROOT)
+WINDOWS_DIR := win32
+RELEASE_DIR := releases
+CTAGS_WEBDIR := website
+win_version = $(subst .,,$(version))
+HOST_ARCH := $(shell uname -p)
+
+ifneq ($(findstring $(HOST_ARCH),i386 i686),)
+COMP_ARCH := -march=i686
+endif
+
+CC := gcc
+INCLUDE := -I.
+DEFS := -DHAVE_CONFIG_H
+COMP_FLAGS := $(INCLUDE) $(DEFS) $(CFLAGS)
+PROF_OPT := -O3 $(COMP_ARCH)
+#OPT := $(PROF_OPT) -fomit-frame-pointer
+OPT := $(PROF_OPT)
+DCFLAGS := $(COMP_FLAGS) -DDEBUG -DINTERNAL_SORT
+LD := gcc
+LDFLAGS :=
+RPM_FLAGS := -O3 $(COMP_ARCH)
+
+AUTO_GEN := configure config.h.in
+CONFIG_GEN := config.cache config.log config.status config.run config.h Makefile
+PROF_GEN := gmon.out
+COV_GEN := *.da *.gcov
+
+UNIX2DOS := perl -pe 's/(\r\n|\n|\r)/\r\n/g'
+MAN2HTML := tbl | groff -Wall -mtty-char -mandoc -Thtml -c
+
+#
+# Targets
+#
+default: all
+ifeq ($(findstring clean,$(MAKECMDGOALS)),)
+ifeq ($(wildcard config.h),)
+ctags dctags ctags.prof ctags.cov:
+ $(MAKE) config.h
+ $(MAKE) $(MAKECMDGOALS)
+else
+all: dctags tags syntax.vim
+
+-include $(DEPS) $(DEP_DIR)/readtags.d
+
+#
+# Executable targets
+#
+ctags: $(OBJECTS)
+ @ echo "-- Linking $@"
+ @ $(LD) -o $@ $(LDFLAGS) $^
+
+dctags: $(DOBJECTS) debug.od
+ @ echo "-- Building $@"
+ $(LD) -o $@ $(LDFLAGS) $^
+
+ctags.prof: $(SOURCES) $(HEADERS) Makefile
+ $(CC) -pg $(PROF_OPT) $(COMP_FLAGS) $(WARNINGS) $(SOURCES) -o $@
+
+ctags.cov: $(SOURCES) $(HEADERS) Makefile
+ $(CC) -fprofile-arcs -ftest-coverage $(COMP_FLAGS) $(WARNINGS) $(SOURCES) -o $@
+
+gcov: $(SOURCES:.c=.c.gcov)
+
+readtags: readtags.[ch]
+ $(CC) -g $(COMP_FLAGS) -DDEBUG -DREADTAGS_MAIN -o $@ readtags.c
+
+readtags.o: readtags.c readtags.h
+ $(CC) $(COMP_FLAGS) -c readtags.c
+
+etyperef: etyperef.o keyword.o routines.o strlist.o vstring.o
+ $(CC) -o $@ $^
+
+etyperef.o: eiffel.c
+ $(CC) -DTYPE_REFERENCE_TOOL $(OPT) $(COMP_FLAGS) -o $@ -c $<
+
+endif
+endif
+
+#
+# Support targets
+#
+FORCE:
+
+config.h.in: configure.ac
+ autoheader
+ @ touch $@
+
+configure: configure.ac
+ autoconf
+
+config.status: configure
+ ./config.status --recheck
+
+config.h: config.h.in config.status
+ ./config.status
+ touch $@
+
+depclean:
+ rm -f $(DEPS)
+
+profclean:
+ rm -f $(PROF_GEN)
+
+gcovclean:
+ rm -f $(COV_GEN)
+
+clean: depclean profclean gcovclean clean-test
+ rm -f *.[ois] *.o[dm] ctags dctags ctags*.exe readtags etyperef \
+ ctags.man ctags.html ctags.prof ctags.cov *.bb *.bbg tags TAGS syntax.vim
+
+distclean: clean
+ rm -f $(CONFIG_GEN)
+
+maintainer-clean maintclean: distclean
+ rm -f $(AUTO_GEN)
+
+%.man: %.1 Makefile
+ tbl $< | groff -Wall -mtty-char -mandoc -Tascii -c | sed 's/.//g' > $@
+
+%.html: %.1 Makefile
+ cat $< | $(MAN2HTML) > $@
+
+tags: $(DSOURCES) $(HEADERS) $(LIB_FILES) Makefile *.mak
+ @ echo "-- Building tag file"
+ @ ctags *
+
+#
+# Create a Vim syntax file for all typedefs
+#
+syntax: syntax.vim
+syntax.vim: $(DSOURCES) $(HEADERS) $(LIB_FILES)
+ @ echo "-- Generating syntax file"
+ @ ctags --c-types=cgstu --file-scope -o- $^ |\
+ awk '{print $$1}' | sort -u | fmt |\
+ awk '{printf("syntax keyword Typedef\t%s\n", $$0)}' > $@
+
+#
+# Testing
+#
+-include testing.mak
+
+#
+# Help
+#
+help:
+ @ echo "Major targets:"
+ @ echo "default : Build dctags, with debugging support"
+ @ echo "ctags : Build optimized binary"
+ @ echo "help-release: Print help on releasing ctags"
+
+#
+# Release management
+#
+
+help-release:
+ @ echo "1. make release-svn-X.Y"
+ @ echo "2. make release-source-X.Y"
+ @ echo "3. move ctags-X.Y.tar.gz to Linux"
+ @ echo "4. On Linux: Extract tar; make -f maintainer.mak release-rpm-X.Y"
+ @ echo "5. On Windows: cd $(WINDOWS_DIR)/winXY; nmake -f mk_mvc.mak ctags.exe mostlyclean"
+ @ echo "6. make version=X.Y win-zip"
+ @ echo "7. make website-X.Y"
+
+.SECONDARY:
+
+RPM_ARCH := i386
+RPM_SUBDIRS := BUILD SOURCES SPECS SRPMS RPMS
+RPM_DIRS := $(addprefix $(RPM_ROOT)/,$(RPM_SUBDIRS))
+
+$(RELEASE_DIR)/ctags-%-1.$(RPM_ARCH).rpm: \
+ $(RPM_ROOT)/RPMS/$(RPM_ARCH)/ctags-%-1.$(RPM_ARCH).rpm \
+ | $(RELEASE_DIR)
+ ln -f $< $@
+ chmod 644 $@
+
+$(RELEASE_DIR)/ctags-%-1.src.rpm: \
+ $(RPM_ROOT)/SRPMS/ctags-%-1.src.rpm \
+ | $(RELEASE_DIR)
+ ln -f $< $@
+ chmod 644 $@
+
+$(eval $(RPM_DIRS) $(RELEASE_DIR): ; mkdir -p $$@)
+
+$(RPM_ROOT)/SRPMS/ctags-%-1.src.rpm \
+$(RPM_ROOT)/RPMS/$(RPM_ARCH)/ctags-%-1.$(RPM_ARCH).rpm: \
+ $(RPM_ROOT)/SOURCES/ctags-%.tar.gz \
+ $(RPM_ROOT)/SPECS/ctags-%.spec \
+ | $(RPM_DIRS)
+ rpmbuild --define '_topdir $(RPM_ABS_ROOT)' --define 'optflags $(RPM_FLAGS)' --define 'packager $(PRODUCER) $(CTAGS_WEBSITE)' -ba $(RPM_ROOT)/SPECS/ctags-$*.spec
+ rm -fr $(RPM_ROOT)/BUILD/ctags-$*
+
+$(RPM_ROOT)/rpmrc: rpmmacros maintainer.mak
+ echo "optflags: $(RPM_ARCH) $(RPM_FLAGS)" > $@
+ echo "macrofiles: $(PWD)/rpmmacros" >> $@
+
+$(RPM_ROOT)/rpmmacros: maintainer.mak
+ echo "%_topdir $(RPM_ABS_ROOT)" > $@
+ echo '%_gpg_name "$(PRODUCER) <$(EMAIL)>"' >> $@
+ echo "%packager $(PRODUCER) $(CTAGS_WEBSITE)" >> $@
+ echo "%_i18ndomains %{nil}" >> $@
+ echo "%debug_package %{nil}" >> $@
+
+$(RPM_ROOT)/SPECS/ctags-%.spec: ctags.spec | $(RPM_ROOT)/SPECS
+ sed -e "s/@VERSION@/$*/" ctags.spec > $(RPM_ROOT)/SPECS/ctags-$*.spec
+
+$(RPM_ROOT)/SOURCES/ctags-%.tar.gz: $(RELEASE_DIR)/ctags-%.tar.gz | $(RPM_ROOT)/SOURCES
+ ln -f $< $@
+
+$(RELEASE_DIR)/ctags-%.tar.gz: $(UNIX_FILES) | $(RELEASE_DIR)
+ @ echo "---------- Building tar ball"
+ if [ -d $(@D)/dirs/ctags-$* ]; then rm -fr $(@D)/dirs/ctags-$*; fi
+ mkdir -p $(@D)/dirs/ctags-$*
+ cp -pr $(UNIX_FILES) $(REGEX_DIR) $(@D)/dirs/ctags-$*/
+ sed -e 's/\(PROGRAM_VERSION\) "\([^ ]*\)"/\1 "$*"/' ctags.h > $(@D)/dirs/ctags-$*/ctags.h
+ sed -e 's/"\(Version\) \([^ ]*\)"/"\1 $*"/' ctags.1 > $(@D)/dirs/ctags-$*/ctags.1
+ sed -e 's/\(Current Version:\) [^ ]*/\1 $*/' -e 's/@VERSION@/$*/' -e "s/@DATE@/`date +'%d %b %Y'`/" NEWS > $(@D)/dirs/ctags-$*/NEWS
+ (cd $(@D)/dirs/ctags-$* ;\
+ chmod 644 * ;\
+ chmod 755 mkinstalldirs ;\
+ chmod 755 $(REGEX_DIR) ;\
+ chmod 644 $(REGEX_DIR)/* ;\
+ autoheader ;\
+ chmod 644 config.h.in ;\
+ autoconf ;\
+ chmod 755 configure ;\
+ rm -fr autom4te.cache ;\
+ cat ctags.1 | $(MAN2HTML) > ctags.html ;\
+ )
+ cd $(@D)/dirs && tar -zcf ../$(@F) ctags-$*
+ chmod 644 $@
+
+clean-rpm:
+ rm -fr $(RPM_ROOT)
+
+ifneq ($(findstring win-,$(MAKECMDGOALS)),)
+ifeq ($(version),,)
+$(error $(MAKECMDGOALS) target requires value for 'version')
+endif
+endif
+
+check-version-%:
+ @ if [ -z "$(version)" ]; then echo "target requires value for 'version'" >&2; exit 1; fi
+
+$(WINDOWS_DIR)/ctags$(win_version): \
+ $(RELEASE_DIR)/ctags-$(version).tar.gz maintainer.mak \
+ | $(WINDOWS_DIR)
+ @ echo "---------- Building Win32 release directory"
+ rm -fr "$(WINDOWS_DIR)/ctags$(win_version)"
+ mkdir -p "$(WINDOWS_DIR)/ctags$(win_version)"
+ for file in $(WIN_FILES) ctags.html; do \
+ $(UNIX2DOS) < "$(RELEASE_DIR)/dirs/ctags-$(version)/$${file}" > $@/$${file} ;\
+ done
+ mkdir $@/$(REGEX_DIR)
+ for file in $(REGEX_DIR)/*; do \
+ $(UNIX2DOS) < "$${file}" > $@/$(REGEX_DIR)/`basename $${file}` ;\
+ done
+ chmod 644 $@/*
+ chmod 755 $@/$(REGEX_DIR)
+ chmod 644 $@/$(REGEX_DIR)/*
+
+$(RELEASE_DIR)/ctags%.zip: \
+ check-version-% \
+ $(WINDOWS_DIR)/ctags% \
+ $(WINDOWS_DIR)/ctags%/ctags.exe
+ cd $(WINDOWS_DIR) && zip -r ../$@ ctags$*
+
+win-source: $(WINDOWS_DIR)/ctags$(win_version)
+
+win-zip: $(RELEASE_DIR)/ctags$(win_version).zip
+
+release-win-%:
+ $(MAKE) version="$*" win-source
+
+release-tar-%: $(RELEASE_DIR)/ctags-%.tar.gz
+ :
+
+release-rpm-%: \
+ $(RELEASE_DIR)/ctags-%-1.$(RPM_ARCH).rpm \
+ $(RELEASE_DIR)/ctags-%-1.src.rpm
+ :
+
+release-source-%: $(RELEASE_DIR)/ctags-%.tar.gz
+ $(MAKE) version="$*" win-source
+
+release-bin-%: release-rpm-%
+ $(MAKE) version="$*" win-zip
+
+$(WINDOWS_DIR):
+ mkdir -p $@
+
+#
+# Web site files
+#
+website-%: website-man-% website-index-% website-news-% \
+ $(CTAGS_WEBDIR)/EXTENDING.html
+ :
+
+website-man-%: ctags.1 Makefile
+ @ echo "---------- Generating $(CTAGS_WEBDIR)/ctags.html"
+ umask 022 ; \
+ sed -e 's/"\(Version\) \([^ ]*\)"/"\1 $*"/' ctags.1 |\
+ $(MAN2HTML) > $(CTAGS_WEBDIR)/ctags.html
+
+website-index-%: index.html Makefile
+ @ echo "---------- Generating $(CTAGS_WEBDIR)/index.html"
+ umask 022 ; \
+ sed -e "s/@VERSION@/$*/g" \
+ -e "s/@DOS_VERSION@/`echo $* | sed 's/\.//g'`/g" \
+ -e "s/@DATE@/`date +'%d %B %Y'`/" \
+ $< > $(CTAGS_WEBDIR)/index.html
+
+website-news-%: NEWS maintainer.mak
+ @ echo "---------- Generating $(CTAGS_WEBDIR)/news.html"
+ umask 022 ; \
+ sed -e 's/\(Current Version:\) [^ ]*/\1 $*/' \
+ -e 's/@VERSION@/$*/' \
+ -e "s/@DATE@/`date +'%d %b %Y'`/" \
+ -e 's/</\&lt;/g' -e 's/>/\&gt;/g' \
+ -e 's@^Current Version:.*$$@<html><head><title>Exuberant Ctags: Change Notes</title></head><body><h1>Change Notes</h1><pre>@' \
+ -e 's@\(^ctags-.* (.*)\)$$@<b>\1</b>@' \
+ -e 's@^vim:.*$$@</pre><hr><a href="http:index.html">Back to <strong>Exuberant Ctags</strong></a></body></html>@' \
+ $< > $(CTAGS_WEBDIR)/news.html
+
+$(CTAGS_WEBDIR)/EXTENDING.html: EXTENDING.html
+ @ echo "---------- Generating $(CTAGS_WEBDIR)/EXTENDING.html"
+ cp $< $@ && chmod 644 $@
+
+#
+# SVN management
+#
+svn_url := https://ctags.svn.sourceforge.net/svnroot/ctags
+
+release-svn-%: svn-tagcheck-%
+ @ echo "---------- Tagging release $*"
+ svn copy -m'Release of ctags-$*' $(svn_url)/trunk $(svn_url)/tags/ctags-$*
+
+rerelease-svn-%:
+ @ echo "---------- Tagging release $*"
+ svn remove -m'Regenerating release of ctags-$*' $(svn_url)/tags/ctags-$*
+ svn copy -m'Release of ctags-$*' $(svn_url)/trunk $(svn_url)/tags/ctags-$*
+
+svn-tagcheck-%:
+ if svn list $(svn_url)/tags/ | grep -q 'ctags-$*/$$' >/dev/null 2>&1 ;then \
+ echo "ctags-$* already exists; use rerelease-$*" >&2 ;\
+ exit 1 ;\
+ fi
+
+svn-files:
+ @ls -1 $(SVN_FILES)
+
+#
+# Dependency file generation
+#
+$(DEPS): %.c maintainer.mak
+ @ if [ ! -d $(DEP_DIR) ] ;then mkdir -p $(DEP_DIR) ;fi
+ @ $(CC) -M $(DCFLAGS) $< | sed 's/\($*\.o\)\([ :]\)/\1 $*.od $(@F)\2/g' > $@
+
+
+%.inc: %.c Makefile
+ -@ $(CC) -MM $(DCFLAGS) $<
+
+#
+# Compilation rules
+#
+%.o %.od: gnu_regex/%.c
+# @ echo "-- Compiling $<"
+ $(CC) $(CFLAGS) -D__USE_GNU -Dbool=int -Dfalse=0 -Dtrue=1 -I$(REGEX_DIR) $(OPT) -c $<
+
+%.o: %.c
+ @ echo "-- Compiling $<"
+ @ $(CC) $(COMP_FLAGS) -DEXTERNAL_SORT $(OPT) $(WARNINGS) -Wuninitialized -c $<
+
+%.od: %.c
+ @ echo "-- Compiling (debug) $<"
+ @ $(CC) -g $(DCFLAGS) $(WARNINGS) -o $*.od -c $<
+
+%.i: %.c FORCE
+ $(CC) $(DCFLAGS) $(WARNINGS) -Wuninitialized -O -E $< > $@
+
+%.ic: %.c FORCE
+ $(CC) $(DCFLAGS) $(WARNINGS) -Wuninitialized -O -E $< | sed '/^[ ]*$/d' > $@
+
+%.s: %.c FORCE
+ $(CC) $(DCFLAGS) $(WARNINGS) -S $< > $@
+
+readtags.err: DCFLAGS += -DREADTAGS_MAIN
+
+%.err: %.c
+ @ $(CC) $(DCFLAGS) $(WARNINGS) -Wuninitialized -O -c $<
+ @ rm $*.o
+
+%.c.gcov: %.da
+ @ gcov $*.c
+
+%.sproto: %.c
+ @ genproto -s -m __ARGS $<
+
+%.proto: %.c
+ @ genproto -e -m __ARGS $<
+
+# Print out the value of a variable
+# From http://www.cmcrossroads.com/ubbthreads/showflat.php?Cat=0&Board=cmbasics&Number=28829
+print-%:
+ @echo $* = $($*)
+
+# Print out the expanded values of all variables
+# From http://www.cmcrossroads.com/ubbthreads/showflat.php?Cat=0&Number=29581
+.PHONY: print-vars
+print-vars:
+ @$(foreach V,$(sort $(.VARIABLES)), \
+ $(if $(filter-out environment% default automatic, \
+ $(origin $V)),$(warning $V=$($V))))
+
+# Print out the declared values of all variables
+.PHONY: print-vars-decl
+print-vars-decl:
+ @$(foreach V,$(sort $(.VARIABLES)), \
+ $(if $(filter-out environment% default automatic, \
+ $(origin $V)),$(warning $V=$(value $V))))
+
+# vi:ts=4 sw=4
diff --git a/make.c b/make.c
new file mode 100644
index 0000000..f468b5a
--- /dev/null
+++ b/make.c
@@ -0,0 +1,217 @@
+/*
+* $Id: make.c 681 2008-10-12 22:43:00Z dhiebert $
+*
+* Copyright (c) 2000-2005, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* This module contains functions for generating tags for makefiles.
+*/
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include <string.h>
+#include <ctype.h>
+
+#include "options.h"
+#include "parse.h"
+#include "read.h"
+#include "vstring.h"
+
+/*
+* DATA DEFINITIONS
+*/
+typedef enum {
+ K_MACRO
+} shKind;
+
+static kindOption MakeKinds [] = {
+ { TRUE, 'm', "macro", "macros"}
+};
+
+/*
+* FUNCTION DEFINITIONS
+*/
+
+static int nextChar (void)
+{
+ int c = fileGetc ();
+ if (c == '\\')
+ {
+ c = fileGetc ();
+ if (c == '\n')
+ c = fileGetc ();
+ }
+ return c;
+}
+
+static void skipLine (void)
+{
+ int c;
+ do
+ c = nextChar ();
+ while (c != EOF && c != '\n');
+ if (c == '\n')
+ fileUngetc (c);
+}
+
+static int skipToNonWhite (void)
+{
+ int c;
+ do
+ c = nextChar ();
+ while (c != '\n' && isspace (c));
+ return c;
+}
+
+static boolean isIdentifier (int c)
+{
+ return (boolean)(c != '\0' && (isalnum (c) || strchr (".-_", c) != NULL));
+}
+
+static void readIdentifier (const int first, vString *const id)
+{
+ int c = first;
+ vStringClear (id);
+ while (isIdentifier (c))
+ {
+ vStringPut (id, c);
+ c = nextChar ();
+ }
+ fileUngetc (c);
+ vStringTerminate (id);
+}
+
+static void skipToMatch (const char *const pair)
+{
+ const int begin = pair [0], end = pair [1];
+ const unsigned long inputLineNumber = getInputLineNumber ();
+ int matchLevel = 1;
+ int c = '\0';
+
+ while (matchLevel > 0)
+ {
+ c = nextChar ();
+ if (c == begin)
+ ++matchLevel;
+ else if (c == end)
+ --matchLevel;
+ else if (c == '\n')
+ break;
+ }
+ if (c == EOF)
+ verbose ("%s: failed to find match for '%c' at line %lu\n",
+ getInputFileName (), begin, inputLineNumber);
+}
+
+static void findMakeTags (void)
+{
+ vString *name = vStringNew ();
+ boolean newline = TRUE;
+ boolean in_define = FALSE;
+ boolean in_rule = FALSE;
+ boolean variable_possible = TRUE;
+ int c;
+
+ while ((c = nextChar ()) != EOF)
+ {
+ if (newline)
+ {
+ if (in_rule)
+ {
+ if (c == '\t')
+ {
+ skipLine (); /* skip rule */
+ continue;
+ }
+ else
+ in_rule = FALSE;
+ }
+ variable_possible = (boolean)(!in_rule);
+ newline = FALSE;
+ }
+ if (c == '\n')
+ newline = TRUE;
+ else if (isspace (c))
+ continue;
+ else if (c == '#')
+ skipLine ();
+ else if (c == '(')
+ skipToMatch ("()");
+ else if (c == '{')
+ skipToMatch ("{}");
+ else if (c == ':')
+ {
+ variable_possible = TRUE;
+ in_rule = TRUE;
+ }
+ else if (variable_possible && isIdentifier (c))
+ {
+ readIdentifier (c, name);
+ if (strcmp (vStringValue (name), "endef") == 0)
+ in_define = FALSE;
+ else if (in_define)
+ skipLine ();
+ else if (strcmp (vStringValue (name), "define") == 0 &&
+ isIdentifier (c))
+ {
+ in_define = TRUE;
+ c = skipToNonWhite ();
+ readIdentifier (c, name);
+ makeSimpleTag (name, MakeKinds, K_MACRO);
+ skipLine ();
+ }
+ else {
+ if (strcmp(vStringValue (name), "export") == 0 &&
+ isIdentifier (c))
+ {
+ c = skipToNonWhite ();
+ readIdentifier (c, name);
+ }
+ c = skipToNonWhite ();
+ if (strchr (":?+", c) != NULL)
+ {
+ boolean append = (boolean)(c == '+');
+ if (c == ':')
+ in_rule = TRUE;
+ c = nextChar ();
+ if (c != '=')
+ fileUngetc (c);
+ else if (append)
+ {
+ skipLine ();
+ continue;
+ }
+ }
+ if (c == '=')
+ {
+ makeSimpleTag (name, MakeKinds, K_MACRO);
+ in_rule = FALSE;
+ skipLine ();
+ }
+ }
+ }
+ else
+ variable_possible = FALSE;
+ }
+ vStringDelete (name);
+}
+
+extern parserDefinition* MakefileParser (void)
+{
+ static const char *const patterns [] = { "[Mm]akefile", "GNUmakefile", NULL };
+ static const char *const extensions [] = { "mak", "mk", NULL };
+ parserDefinition* const def = parserNew ("Make");
+ def->kinds = MakeKinds;
+ def->kindCount = KIND_COUNT (MakeKinds);
+ def->patterns = patterns;
+ def->extensions = extensions;
+ def->parser = findMakeTags;
+ return def;
+}
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/matlab.c b/matlab.c
new file mode 100644
index 0000000..0811457
--- /dev/null
+++ b/matlab.c
@@ -0,0 +1,44 @@
+/*
+* $Id$
+*
+* Copyright (c) 2008, David Fishburn
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* This module contains functions for generating tags for MATLAB language files.
+*/
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include <string.h>
+#include "parse.h"
+
+/*
+* FUNCTION DEFINITIONS
+*/
+
+static void installMatLabRegex (const langType language)
+{
+ /* function [x,y,z] = asdf */
+ addTagRegex (language, "^function[ \t]*\\[.*\\][ \t]*=[ \t]*([a-zA-Z0-9_]+)", "\\1", "f,function", NULL);
+ /* function x = asdf */
+ addTagRegex (language, "^function[ \t]*[a-zA-Z0-9_]+[ \t]*=[ \t]*([a-zA-Z0-9_]+)", "\\1", "f,function", NULL);
+ /* function asdf */
+ addTagRegex (language, "^function[ \t]*([a-zA-Z0-9_]+)[^=]*$", "\\1", "f,function", NULL);
+}
+
+extern parserDefinition* MatLabParser ()
+{
+ static const char *const extensions [] = { "m", NULL };
+ parserDefinition* const def = parserNew ("MatLab");
+ def->extensions = extensions;
+ def->initialize = installMatLabRegex;
+ def->regex = TRUE;
+ return def;
+}
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/mk_bc3.mak b/mk_bc3.mak
new file mode 100644
index 0000000..e4da935
--- /dev/null
+++ b/mk_bc3.mak
@@ -0,0 +1,46 @@
+# $Id: mk_bc3.mak 278 2003-02-24 02:27:53Z darren $
+#
+# Simple makefile for Borland C++ 3.1
+
+!include source.mak
+
+# Adjust the paths to your location of the borland C files
+BCCLOC = c:\borlandc
+CC = $(BCCLOC)\bin\bcc
+INC = -I$(BCCLOC)\include
+LIB = -L$(BCCLOC)\lib
+
+# Add this file for wildcard expansion (does NOT work with 4.0!)
+#EXTRA = $(BCCLOC)\lib\wildargs.obj
+
+# The following compile options can be changed for better machines.
+# replace -1- with -2 to produce code for a 80286 or higher
+# replace -1- with -3 to produce code for a 80386 or higher
+# add -v for source debugging
+OPTIMIZE= -1- -O1
+
+CFLAGS = -DMSDOS -ml -d -w-ccc -w-par -w-pia -w-rch -w-sus $(INC)
+LFLAGS = $(LIB) $(EXTRA)
+EXTRA_LIBS =
+
+ctags.exe: $(SOURCES) respbc3
+ $(CC) $(OPTIMIZE) -e$@ @respbc3
+
+debug: dctags.exe
+
+dctags.exe: $(SOURCES) respbc3 debug.c
+ $(CC) -DDEBUG -v -e$@ @respbc3 debug.c
+
+respbc3: mk_bc3.mak
+ copy &&|
+$(CFLAGS)
+$(LFLAGS)
+$(SOURCES)
+$(EXTRA_LIBS)
+| $@
+
+clean:
+ del *.exe
+ del *.obj
+ del respbc3
+ del tags
diff --git a/mk_bc5.mak b/mk_bc5.mak
new file mode 100644
index 0000000..20a2946
--- /dev/null
+++ b/mk_bc5.mak
@@ -0,0 +1,49 @@
+# $Id: mk_bc5.mak 623 2007-09-10 02:52:22Z dhiebert $
+#
+# Makefile for Win32 using Borland C++ compiler, version 5.5 (free version)
+
+!include source.mak
+
+REGEX_DEFINE = -DHAVE_REGCOMP -DREGEX_MALLOC -DSTDC_HEADERS=1
+DEFINES = -DWIN32 $(REGEX_DEFINE)
+INCLUDES = -I. -Ignu_regex
+WARNINGS = -w-aus -w-par -w-pia -w-pro -w-sus
+CFLAGS = -d -DSTRICT -lTpe -lap
+BCC = bcc32
+
+# Optimizations if your platform supports all of them.
+OPT = -O2 -OS -lGt
+
+# Allows multithreading
+#MT_OPT = -tWM -lcw32mt
+
+ctags: ctags.exe
+
+ctags.exe: respbc5
+ $(BCC) $(OPT) $(MT_OPT) -e$@ $(LDFLAGS) @respbc5
+
+readtags.exe: readtags.c
+ $(BCC) $(CFLAGS) $(OPT) $(MT_OPT) -e$@ $(DEFINES) -DREADTAGS_MAIN readtags.c $(LDFLAGS)
+
+# Debug version
+dctags.exe: respbc5
+ $(BCC) -DDEBUG -e$@ $(LDFLAGS) @respbc5 debug.c
+
+regex.obj:
+ $(BCC) -c -o$@ -w- $(DEFINES) -Dconst= $(INCLUDES)
+
+respbc5: $(SOURCES) $(REGEX_SOURCES) $(HEADERS) $(REGEX_HEADERS) mk_bc5.mak
+ echo $(DEFINES) $(INCLUDES) > $@
+ echo $(WARNINGS) >> $@
+ echo $(CFLAGS) >> $@
+ echo $(SOURCES) $(REGEX_SOURCES) >> $@
+
+mostlyclean:
+ - del *.obj
+ - del *.tds
+ - del dctags.exe
+ - del respbc5
+ - del tags
+
+clean: mostlyclean
+ - del ctags.exe
diff --git a/mk_djg.mak b/mk_djg.mak
new file mode 100644
index 0000000..8ea9313
--- /dev/null
+++ b/mk_djg.mak
@@ -0,0 +1,18 @@
+# $Id: mk_djg.mak 307 2003-03-31 04:53:22Z darren $
+#
+# The most simplistic Makefile, for DJGPP Version 2 on Windows
+#
+# Rather than using this makefile, it is preferable to run "configure", then
+# "make" under BASH on DJGPP (i.e. the standard means of building a package on
+# Unix), but you have to have a fuller complement of DJGPP packages installed
+# to do this.
+
+include source.mak
+
+CFLAGS = -O2 -Wall -DMSDOS
+
+ctags.exe: $(SOURCES)
+ gcc $(CFLAGS) -s -o ctags.exe $(SOURCES) -lpc
+
+clean:
+ del ctags.exe
diff --git a/mk_manx.mak b/mk_manx.mak
new file mode 100644
index 0000000..e1f513c
--- /dev/null
+++ b/mk_manx.mak
@@ -0,0 +1,65 @@
+# $Id: mk_manx.mak 264 2003-02-13 02:59:30Z darren $
+#
+# Makefile for ctags on the Amiga, using Aztec/Manx C 5.0 or later
+
+OBJEXT = o
+
+OBJECTS = \
+ args.$(OBJEXT) \
+ asm.$(OBJEXT) \
+ asp.$(OBJEXT) \
+ awk.$(OBJEXT) \
+ beta.$(OBJEXT) \
+ c.$(OBJEXT) \
+ cobol.$(OBJEXT) \
+ eiffel.$(OBJEXT) \
+ entry.$(OBJEXT) \
+ erlang.$(OBJEXT) \
+ fortran.$(OBJEXT) \
+ get.$(OBJEXT) \
+ keyword.$(OBJEXT) \
+ lisp.$(OBJEXT) \
+ lregex.$(OBJEXT) \
+ lua.$(OBJEXT) \
+ main.$(OBJEXT) \
+ make.$(OBJEXT) \
+ options.$(OBJEXT) \
+ parse.$(OBJEXT) \
+ pascal.$(OBJEXT) \
+ perl.$(OBJEXT) \
+ php.$(OBJEXT) \
+ python.$(OBJEXT) \
+ read.$(OBJEXT) \
+ rexx.$(OBJEXT) \
+ routines.$(OBJEXT) \
+ ruby.$(OBJEXT) \
+ scheme.$(OBJEXT) \
+ sh.$(OBJEXT) \
+ slang.$(OBJEXT) \
+ sort.$(OBJEXT) \
+ sml.$(OBJEXT) \
+ sql.$(OBJEXT) \
+ strlist.$(OBJEXT) \
+ tcl.$(OBJEXT) \
+ verilog.$(OBJEXT) \
+ vim.$(OBJEXT) \
+ vstring.$(OBJEXT) \
+ yacc.$(OBJEXT)
+
+CC = cc
+
+#>>>>> choose between debugging (-bs) or optimizing (-so)
+OPTIONS = -so
+#OPTIONS = -bs
+
+#>>>>>> choose -g for debugging
+LN_DEBUG =
+#LN_DEBUG = -g
+
+CFLAGS = $(OPTIONS) -wapruq -ps -qf -DAMIGA -Dconst=
+
+Ctags: $(OBJECTS)
+ ln +q -m $(LN_DEBUG) -o Ctags $(OBJECTS) -lc16 -lm16
+
+.c.o:
+ $(CC) $(CFLAGS) -o $*.o $*.c
diff --git a/mk_mingw.mak b/mk_mingw.mak
new file mode 100644
index 0000000..32a61ae
--- /dev/null
+++ b/mk_mingw.mak
@@ -0,0 +1,31 @@
+# $Id: mk_mingw.mak 723 2009-07-09 20:53:19Z dhiebert $
+#
+# Makefile for Exuberant Ctags under Win32 with MinGW compiler
+#
+
+include source.mak
+
+REGEX_DEFINES = -DHAVE_REGCOMP -D__USE_GNU -Dbool=int -Dfalse=0 -Dtrue=1 -Dstrcasecmp=stricmp
+
+CFLAGS = -Wall
+DEFINES = -DWIN32 $(REGEX_DEFINES)
+INCLUDES = -I. -Ignu_regex
+CC = gcc
+
+ctags.exe: OPT = -O4
+dctags.exe: OPT = -g
+dctags.exe: DEBUG = -DDEBUG
+dctags.exe: SOURCES += debug.c
+
+ctags: ctags.exe
+
+ctags.exe dctags.exe: $(SOURCES) $(REGEX_SOURCES) $(HEADERS) $(REGEX_HEADERS)
+ $(CC) $(OPT) $(CFLAGS) $(DEFINES) $(INCLUDES) -o $@ $(SOURCES) $(REGEX_SOURCES)
+
+readtags.exe: readtags.c
+ $(CC) $(OPT) $(CFLAGS) -DREADTAGS_MAIN $(DEFINES) $(INCLUDES) -o $@ $<
+
+clean:
+ - rm -f ctags.exe
+ - rm -f dctags.exe
+ - rm -f tags
diff --git a/mk_mpw.mak b/mk_mpw.mak
new file mode 100644
index 0000000..20c49e5
--- /dev/null
+++ b/mk_mpw.mak
@@ -0,0 +1,130 @@
+# $Id: mk_mpw.mak 264 2003-02-13 02:59:30Z darren $
+#
+# Makefile for Macintosh using MPW
+#
+# Created by: Maarten L. Hekkelman <maarten@hekkelman.com>
+
+HEADERS = ¶
+ args.h ctags.h debug.h entry.h general.h get.h keyword.h ¶
+ main.h options.h parse.h parsers.h read.h routines.h sort.h ¶
+ strlist.h vstring.h mac.h
+
+SOURCES = ¶
+ args.c ¶
+ asm.c ¶
+ asp.c ¶
+ awk.c ¶
+ beta.c ¶
+ c.c ¶
+ cobol.c ¶
+ eiffel.c ¶
+ entry.c ¶
+ erlang.c ¶
+ fortran.c ¶
+ get.c ¶
+ keyword.c ¶
+ lisp.c ¶
+ lregex.c ¶
+ lua.c ¶
+ main.c ¶
+ make.c ¶
+ options.c ¶
+ parse.c ¶
+ pascal.c ¶
+ perl.c ¶
+ php.c ¶
+ python.c ¶
+ read.c ¶
+ rexx.c ¶ ¶
+ routines.c ¶
+ ruby.c ¶
+ scheme.c ¶
+ sh.c ¶
+ slang.c ¶
+ sort.c ¶
+ sml.c ¶
+ sql.c ¶
+ strlist.c ¶
+ tcl.c ¶
+ verilog.c ¶
+ vim.c ¶
+ vstring.c ¶
+ yacc.c ¶
+ mac.c
+
+OBJ = ¶
+ args.o ¶
+ asm.o ¶
+ asp.o ¶
+ awk.o ¶
+ beta.o ¶
+ c.o ¶
+ cobol.o ¶
+ eiffel.o ¶
+ entry.o ¶
+ erlang.o ¶
+ fortran.o ¶
+ get.o ¶
+ keyword.o ¶
+ lisp.o ¶
+ lregex.o ¶
+ lua.o ¶
+ main.o ¶
+ make.o ¶
+ options.o ¶
+ parse.o ¶
+ pascal.o ¶
+ perl.o ¶
+ php.o ¶
+ python.o ¶
+ read.o ¶
+ rexx.o ¶
+ routines.o ¶
+ ruby.o ¶
+ scheme.o ¶
+ sh.o ¶
+ slang.o ¶
+ sort.o ¶
+ sml.o ¶
+ sql.o ¶
+ strlist.o ¶
+ tcl.o ¶
+ verilog.o ¶
+ vim.o ¶
+ vstring.o ¶
+ yacc.o ¶
+ mac.o
+
+LIBS = ¶
+ {PPCLibraries}PPCToolLibs.o ¶
+ {SharedLibraries}MathLib ¶
+ {SharedLibraries}InterfaceLib ¶
+ {SharedLibraries}StdCLib ¶
+ {MWPPCLibraries}'MSL StdCRuntime.Lib'
+
+CC = mwcppc
+LD = mwlinkppc
+
+# Using -nodefaults to avoid having {MWCIncludes} in our include paths
+# Needed since we're building a MPW Tool and not an application.
+COptions = -nodefaults -i : -i- -i {CIncludes} -opt full
+LOptions = -xm m -stacksize 128
+
+all Ä CTags
+
+CTags Ä TurnOfEcho {OBJ}
+ {LD} {LOptions} -o CTags {OBJ} {LIBS}
+
+{OBJ} Ä {HEADERS}
+
+tags Ä CTags
+ :CTags -p. {SOURCES} {HEADERS}
+
+clean Ä
+ Delete -y -i {OBJ} {CTags} tags
+
+.o Ä .c
+ {CC} {depDir}{default}.c -o {targDir}{default}.o {COptions}
+
+TurnOfEcho Ä
+ set echo 0
diff --git a/mk_mvc.mak b/mk_mvc.mak
new file mode 100644
index 0000000..80a128e
--- /dev/null
+++ b/mk_mvc.mak
@@ -0,0 +1,40 @@
+# $Id: mk_mvc.mak 724 2009-07-09 20:54:01Z dhiebert $
+#
+# Makefile for Win32 using Microsoft Visual C++ compiler
+
+include source.mak
+
+REGEX_DEFINES = -DHAVE_REGCOMP -D__USE_GNU -Dbool=int -Dfalse=0 -Dtrue=1 -Dstrcasecmp=stricmp
+DEFINES = -DWIN32 $(REGEX_DEFINES)
+INCLUDES = -I. -Ignu_regex
+OPT = /O2
+
+ctags: ctags.exe
+
+ctags.exe: respmvc
+ cl $(OPT) /Fe$@ @respmvc /link setargv.obj
+
+readtags.exe: readtags.c
+ cl /clr $(OPT) /Fe$@ $(DEFINES) -DREADTAGS_MAIN readtags.c /link setargv.obj
+
+# Debug version
+dctags.exe: respmvc
+ cl /Zi -DDEBUG /Fe$@ @respmvc debug.c /link setargv.obj
+
+regex.obj:
+ cl /c $(OPT) /Fo$@ $(INCLUDES) $(DEFINES) gnu_regex/regex.c
+
+respmvc: $(SOURCES) $(REGEX_SOURCES) $(HEADERS) $(REGEX_HEADERS) mk_mvc.mak
+ echo $(DEFINES) > $@
+ echo $(INCLUDES) >> $@
+ echo $(SOURCES) >> $@
+ echo $(REGEX_SOURCES) >> $@
+
+mostlyclean:
+ - del *.obj
+ - del dctags.exe
+ - del respmvc
+ - del tags
+
+clean: mostlyclean
+ - del ctags.exe
diff --git a/mk_os2.mak b/mk_os2.mak
new file mode 100644
index 0000000..403a496
--- /dev/null
+++ b/mk_os2.mak
@@ -0,0 +1,104 @@
+# $Id: mk_os2.mak 74 2002-01-27 21:20:55Z darren $
+#
+# A Makefile for OS/2 using EMX/gcc
+# You may want to use the OS/2 port of autoconf for building
+# and comment-out the according statements in this Makefile.
+# You need a library to provide regex support.
+# libExt might do this, but currently (2/2001) it doesn't work well
+# together with ctags ...
+#
+# Provided and supported by
+# Alexander Mai
+# <st002279@hrzpub.tu-darmstadt.de> or <amai@users.sf.net>
+
+default:
+ @echo "Enter $(MAKE) -f mk_os2.mak target"
+ @echo "where target is one of:"
+ @echo " small (small executable req. EMX runtime)"
+ @echo " debug (executable for debugging purposes)"
+ @echo " release (stand-alone executable)"
+ @echo " clean (remove all files built)"
+
+# Use this to create a small binary
+# (requires EMX runtime libraries)
+small:
+ $(MAKE) -f mk_os2.mak all \
+ CC="gcc" \
+ CFLAGS="-O5 -mpentium -Wall" \
+ LFLAGS="-Zcrtdll -s" \
+ LIBS="-lregex" \
+ OBJEXT="o"
+
+# Use this to create a binary for debugging purposes
+# (requires EMX runtime libraries)
+debug:
+ $(MAKE) -f mk_os2.mak all \
+ CC="gcc" \
+ CFLAGS="-O0 -Wall -g" \
+ LFLAGS="-Zcrtdll -g" \
+ LIBS="-lregex" \
+ OBJEXT="o"
+
+# Use this to create a stand-alone binary for distribution
+# (requires link386 for linking but no EMX runtime libraries)
+release:
+ $(MAKE) -f mk_os2.mak all \
+ CC="gcc" \
+ CFLAGS="-g -O5 -mpentium -Wall" \
+ LFLAGS="-s -Zomf -Zsys -Zlinker /PM:VIO" \
+ LIBS="-lregex" \
+ OBJEXT="obj"
+
+# Use the line below if you have created config.h
+# (manually or by running configure)
+# Otherwise use built-in defaults (#ifdef OS2)!
+# DEFINES=-DHAVE_CONFIG_H
+DEFINES=-DOS2
+
+
+# General rules and definitions
+
+.SUFFIXES: .c .exe .h .o .obj
+
+include source.mak
+
+all: ctags.exe readtags.exe readtags.a readtags.lib
+ctags: ctags.exe
+etags: etags.exe
+
+ctags.exe: $(OBJECTS)
+ $(CC) $(CFLAGS) $(LFLAGS) -o $@ $^ $(LIBS)
+
+etags.exe: ctags.exe
+ @copy $< $@
+
+# The readtags executable
+readtags.exe: readtags.c
+ $(CC) $(CFLAGS) $(DEFINES) $(LFLAGS) -DREADTAGS_MAIN -o $@ $^ $(LIBS)
+
+# We build a.out and omf version of the readtags library
+readtags.o: readtags.c
+ $(CC) $(CFLAGS) $(DEFINES) -c -o $@ $^
+
+readtags.a: readtags.o
+ if exist $@ del $@
+ ar rc $@ $^
+
+readtags.lib: readtags.a
+ emxomf $<
+
+.c.o:
+ $(CC) $(CFLAGS) $(DEFINES) -I. -c $< -o $@
+
+.c.obj:
+ $(CC) $(CFLAGS) -Zomf $(DEFINES) -I. -c $< -o $@
+
+# Delete all files that are not part of the source distribution
+clean:
+ @if exist ctags.exe del ctags.exe
+ @if exist etags.exe del etags.exe
+ @if exist readtags.exe del readtags.exe
+ @if exist readtags.a del readtags.a
+ @if exist readtags.lib del readtags.lib
+ @if exist *.obj del *.obj
+ @if exist *.o del *.o
diff --git a/mk_qdos.mak b/mk_qdos.mak
new file mode 100644
index 0000000..b069040
--- /dev/null
+++ b/mk_qdos.mak
@@ -0,0 +1,100 @@
+# $Id: mk_qdos.mak 264 2003-02-13 02:59:30Z darren $
+#
+# Makefile for ctags on QDOS/SMS systems and C68 v4.24
+# Submitted by Thierry Godefroy <godefroy@imaginet.fr>
+
+# Directories:
+
+T = ram1_
+P = drv1_C68_
+
+# Programs name:
+
+CC = $(P)cc
+AS = $(P)as68
+ASM = $(P)qmac
+LD = $(P)ld
+
+# Programs flags:
+
+CCFLAGS = -tmp$(T) -v -Y$(P) -I$(P)include_ -O
+ASFLAGS = -V
+ASMFLAGS = -nolist
+LDFLAGS = -v -L$(P)lib_ -bufp150K\
+
+# Target name:
+
+EXEC = ctags
+
+# Additional libraries:
+
+LIBS =
+
+# Target dependencies:
+
+OBJEXT = o
+
+HEADERS = e_qdos.h \
+ args.h ctags.h debug.h entry.h general.h get.h keyword.h \
+ main.h options.h parse.h parsers.h read.h routines.h sort.h \
+ strlist.h vstring.h
+
+OBJECTS = qdos.$(OBJEXT) \
+ args.$(OBJEXT) \
+ asm.$(OBJEXT) \
+ asp.$(OBJEXT) \
+ awk.$(OBJEXT) \
+ beta.$(OBJEXT) \
+ c.$(OBJEXT) \
+ cobol.$(OBJEXT) \
+ eiffel.$(OBJEXT) \
+ entry.$(OBJEXT) \
+ erlang.$(OBJEXT) \
+ fortran.$(OBJEXT) \
+ get.$(OBJEXT) \
+ keyword.$(OBJEXT) \
+ lisp.$(OBJEXT) \
+ lregex.$(OBJEXT) \
+ lua.$(OBJEXT) \
+ main.$(OBJEXT) \
+ make.$(OBJEXT) \
+ options.$(OBJEXT) \
+ parse.$(OBJEXT) \
+ pascal.$(OBJEXT) \
+ perl.$(OBJEXT) \
+ php.$(OBJEXT) \
+ python.$(OBJEXT) \
+ read.$(OBJEXT) \
+ rexx.$(OBJEXT) \
+ routines.$(OBJEXT) \
+ ruby.$(OBJEXT) \
+ scheme.$(OBJEXT) \
+ sh.$(OBJEXT) \
+ slang.$(OBJEXT) \
+ sort.$(OBJEXT) \
+ sml.$(OBJEXT) \
+ sql.$(OBJEXT) \
+ strlist.$(OBJEXT) \
+ tcl.$(OBJEXT) \
+ verilog.$(OBJEXT) \
+ vim.$(OBJEXT) \
+ vstring.$(OBJEXT) \
+ yacc.$(OBJEXT)
+
+$(EXEC) : $(OBJECTS)
+ $(LD) -o$(EXEC) $(LDFLAGS) $(OBJECTS) $(LIBS)
+
+$(OBJECTS): $(HEADERS)
+
+# Construction rules:
+
+_c_o :
+ $(CC) -c $(CCFLAGS) $<
+
+_s_o :
+ $(AS) $(ASFLAGS) $< $@
+
+_asm_rel :
+ $(ASM) $< $(ASMFLAGS)
+
+#end
diff --git a/mk_sas.mak b/mk_sas.mak
new file mode 100644
index 0000000..476f5a6
--- /dev/null
+++ b/mk_sas.mak
@@ -0,0 +1,63 @@
+# $Id: mk_sas.mak 264 2003-02-13 02:59:30Z darren $
+#
+# Makefile for SAS/C Amiga Compiler
+# Submitted by Stefan Haubenthal <polluks@freeshell.org>
+
+CFLAGS= def AMIGA opt parm r sint
+
+OBJEXT = o
+
+OBJECTS = \
+ args.$(OBJEXT) \
+ asm.$(OBJEXT) \
+ asp.$(OBJEXT) \
+ awk.$(OBJEXT) \
+ beta.$(OBJEXT) \
+ c.$(OBJEXT) \
+ cobol.$(OBJEXT) \
+ eiffel.$(OBJEXT) \
+ entry.$(OBJEXT) \
+ erlang.$(OBJEXT) \
+ fortran.$(OBJEXT) \
+ get.$(OBJEXT) \
+ keyword.$(OBJEXT) \
+ lisp.$(OBJEXT) \
+ lregex.$(OBJEXT) \
+ lua.$(OBJEXT) \
+ main.$(OBJEXT) \
+ make.$(OBJEXT) \
+ options.$(OBJEXT) \
+ parse.$(OBJEXT) \
+ pascal.$(OBJEXT) \
+ perl.$(OBJEXT) \
+ php.$(OBJEXT) \
+ python.$(OBJEXT) \
+ read.$(OBJEXT) \
+ rexx.$(OBJEXT) \
+ routines.$(OBJEXT) \
+ ruby.$(OBJEXT) \
+ scheme.$(OBJEXT) \
+ sh.$(OBJEXT) \
+ slang.$(OBJEXT) \
+ sort.$(OBJEXT) \
+ sml.$(OBJEXT) \
+ sql.$(OBJEXT) \
+ strlist.$(OBJEXT) \
+ tcl.$(OBJEXT) \
+ verilog.$(OBJEXT) \
+ vim.$(OBJEXT) \
+ vstring.$(OBJEXT) \
+ yacc.$(OBJEXT)
+
+ctags: $(OBJECTS)
+ sc link to $@ $(OBJECTS) math s sint
+
+.c.o:
+ $(CC) $(CFLAGS) -o $*.o $*.c
+
+clean:
+ -delete $(OBJECTS) ctags.lnk
+
+archive: clean
+ @-delete force RAM:ctags.lha
+ lha -r a RAM:ctags // ctags
diff --git a/mkinstalldirs b/mkinstalldirs
new file mode 100755
index 0000000..b937de2
--- /dev/null
+++ b/mkinstalldirs
@@ -0,0 +1,40 @@
+#! /bin/sh
+# mkinstalldirs --- make directory hierarchy
+# Author: Noah Friedman <friedman@prep.ai.mit.edu>
+# Created: 1993-05-16
+# Public domain
+
+# $Id: mkinstalldirs 2 2001-11-02 04:53:43Z darren $
+
+errstatus=0
+
+for file
+do
+ set fnord `echo ":$file" | sed -ne 's/^:\//#/;s/^://;s/\// /g;s/^#/\//;p'`
+ shift
+
+ pathcomp=
+ for d
+ do
+ pathcomp="$pathcomp$d"
+ case "$pathcomp" in
+ -* ) pathcomp=./$pathcomp ;;
+ esac
+
+ if test ! -d "$pathcomp"; then
+ echo "mkdir $pathcomp" 1>&2
+
+ mkdir "$pathcomp" || lasterr=$?
+
+ if test ! -d "$pathcomp"; then
+ errstatus=$lasterr
+ fi
+ fi
+
+ pathcomp="$pathcomp/"
+ done
+done
+
+exit $errstatus
+
+# mkinstalldirs ends here
diff --git a/ocaml.c b/ocaml.c
new file mode 100644
index 0000000..8fd6872
--- /dev/null
+++ b/ocaml.c
@@ -0,0 +1,1842 @@
+/*
+* Copyright (c) 2009, Vincent Berthoux
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* This module contains functions for generating tags for Objective Caml
+* language files.
+*/
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include <string.h>
+
+#include "keyword.h"
+#include "entry.h"
+#include "options.h"
+#include "read.h"
+#include "routines.h"
+#include "vstring.h"
+
+/* To get rid of unused parameter warning in
+ * -Wextra */
+#ifdef UNUSED
+#elif defined(__GNUC__)
+# define UNUSED(x) UNUSED_ ## x __attribute__((unused))
+#elif defined(__LCLINT__)
+# define UNUSED(x) /*@unused@*/ x
+#else
+# define UNUSED(x) x
+#endif
+#define OCAML_MAX_STACK_SIZE 256
+
+typedef enum {
+ K_CLASS, /* Ocaml class, relatively rare */
+ K_METHOD, /* class method */
+ K_MODULE, /* Ocaml module OR functor */
+ K_VAR,
+ K_TYPE, /* name of an OCaml type */
+ K_FUNCTION,
+ K_CONSTRUCTOR, /* Constructor of a sum type */
+ K_RECORDFIELD,
+ K_EXCEPTION
+} ocamlKind;
+
+static kindOption OcamlKinds[] = {
+ {TRUE, 'c', "class", "classes"},
+ {TRUE, 'm', "method", "Object's method"},
+ {TRUE, 'M', "module", "Module or functor"},
+ {TRUE, 'v', "var", "Global variable"},
+ {TRUE, 't', "type", "Type name"},
+ {TRUE, 'f', "function", "A function"},
+ {TRUE, 'C', "Constructor", "A constructor"},
+ {TRUE, 'r', "Record field", "A 'structure' field"},
+ {TRUE, 'e', "Exception", "An exception"}
+};
+
+typedef enum {
+ OcaKEYWORD_and,
+ OcaKEYWORD_begin,
+ OcaKEYWORD_class,
+ OcaKEYWORD_do,
+ OcaKEYWORD_done,
+ OcaKEYWORD_else,
+ OcaKEYWORD_end,
+ OcaKEYWORD_exception,
+ OcaKEYWORD_for,
+ OcaKEYWORD_functor,
+ OcaKEYWORD_fun,
+ OcaKEYWORD_if,
+ OcaKEYWORD_in,
+ OcaKEYWORD_let,
+ OcaKEYWORD_match,
+ OcaKEYWORD_method,
+ OcaKEYWORD_module,
+ OcaKEYWORD_mutable,
+ OcaKEYWORD_object,
+ OcaKEYWORD_of,
+ OcaKEYWORD_rec,
+ OcaKEYWORD_sig,
+ OcaKEYWORD_struct,
+ OcaKEYWORD_then,
+ OcaKEYWORD_try,
+ OcaKEYWORD_type,
+ OcaKEYWORD_val,
+ OcaKEYWORD_virtual,
+ OcaKEYWORD_while,
+ OcaKEYWORD_with,
+
+ OcaIDENTIFIER,
+ Tok_PARL, /* '(' */
+ Tok_PARR, /* ')' */
+ Tok_BRL, /* '[' */
+ Tok_BRR, /* ']' */
+ Tok_CurlL, /* '{' */
+ Tok_CurlR, /* '}' */
+ Tok_Prime, /* '\'' */
+ Tok_Pipe, /* '|' */
+ Tok_EQ, /* '=' */
+ Tok_Val, /* string/number/poo */
+ Tok_Op, /* any operator recognized by the language */
+ Tok_semi, /* ';' */
+ Tok_comma, /* ',' */
+ Tok_To, /* '->' */
+ Tok_Sharp, /* '#' */
+ Tok_Backslash, /* '\\' */
+
+ Tok_EOF /* END of file */
+} ocamlKeyword;
+
+typedef struct sOcaKeywordDesc {
+ const char *name;
+ ocamlKeyword id;
+} ocaKeywordDesc;
+
+typedef ocamlKeyword ocaToken;
+
+static const ocaKeywordDesc OcamlKeywordTable[] = {
+ { "and" , OcaKEYWORD_and },
+ { "begin" , OcaKEYWORD_begin },
+ { "class" , OcaKEYWORD_class },
+ { "do" , OcaKEYWORD_do },
+ { "done" , OcaKEYWORD_done },
+ { "else" , OcaKEYWORD_else },
+ { "end" , OcaKEYWORD_end },
+ { "exception" , OcaKEYWORD_exception },
+ { "for" , OcaKEYWORD_for },
+ { "fun" , OcaKEYWORD_fun },
+ { "function" , OcaKEYWORD_fun },
+ { "functor" , OcaKEYWORD_functor },
+ { "in" , OcaKEYWORD_in },
+ { "let" , OcaKEYWORD_let },
+ { "match" , OcaKEYWORD_match },
+ { "method" , OcaKEYWORD_method },
+ { "module" , OcaKEYWORD_module },
+ { "mutable" , OcaKEYWORD_mutable },
+ { "object" , OcaKEYWORD_object },
+ { "of" , OcaKEYWORD_of },
+ { "rec" , OcaKEYWORD_rec },
+ { "sig" , OcaKEYWORD_sig },
+ { "struct" , OcaKEYWORD_struct },
+ { "then" , OcaKEYWORD_then },
+ { "try" , OcaKEYWORD_try },
+ { "type" , OcaKEYWORD_type },
+ { "val" , OcaKEYWORD_val },
+ { "value" , OcaKEYWORD_let }, /* just to handle revised syntax */
+ { "virtual" , OcaKEYWORD_virtual },
+ { "while" , OcaKEYWORD_while },
+ { "with" , OcaKEYWORD_with },
+
+ { "or" , Tok_Op },
+ { "mod " , Tok_Op },
+ { "land " , Tok_Op },
+ { "lor " , Tok_Op },
+ { "lxor " , Tok_Op },
+ { "lsl " , Tok_Op },
+ { "lsr " , Tok_Op },
+ { "asr" , Tok_Op },
+ { "->" , Tok_To },
+ { "true" , Tok_Val },
+ { "false" , Tok_Val }
+};
+
+static langType Lang_Ocaml;
+
+boolean exportLocalInfo = FALSE;
+
+/*//////////////////////////////////////////////////////////////////
+//// lexingInit */
+typedef struct _lexingState {
+ vString *name; /* current parsed identifier/operator */
+ const unsigned char *cp; /* position in stream */
+} lexingState;
+
+/* array of the size of all possible value for a char */
+boolean isOperator[1 << (8 * sizeof (char))] = { FALSE };
+
+static void initKeywordHash ( void )
+{
+ const size_t count = sizeof (OcamlKeywordTable) / sizeof (ocaKeywordDesc);
+ size_t i;
+
+ for (i = 0; i < count; ++i)
+ {
+ addKeyword (OcamlKeywordTable[i].name, Lang_Ocaml,
+ (int) OcamlKeywordTable[i].id);
+ }
+}
+
+/* definition of all the operator in OCaml,
+ * /!\ certain operator get special treatment
+ * in regards of their role in OCaml grammar :
+ * '|' ':' '=' '~' and '?' */
+static void initOperatorTable ( void )
+{
+ isOperator['!'] = TRUE;
+ isOperator['$'] = TRUE;
+ isOperator['%'] = TRUE;
+ isOperator['&'] = TRUE;
+ isOperator['*'] = TRUE;
+ isOperator['+'] = TRUE;
+ isOperator['-'] = TRUE;
+ isOperator['.'] = TRUE;
+ isOperator['/'] = TRUE;
+ isOperator[':'] = TRUE;
+ isOperator['<'] = TRUE;
+ isOperator['='] = TRUE;
+ isOperator['>'] = TRUE;
+ isOperator['?'] = TRUE;
+ isOperator['@'] = TRUE;
+ isOperator['^'] = TRUE;
+ isOperator['~'] = TRUE;
+ isOperator['|'] = TRUE;
+}
+
+/*//////////////////////////////////////////////////////////////////////
+//// Lexing */
+static boolean isNum (char c)
+{
+ return c >= '0' && c <= '9';
+}
+static boolean isLowerAlpha (char c)
+{
+ return c >= 'a' && c <= 'z';
+}
+
+static boolean isUpperAlpha (char c)
+{
+ return c >= 'A' && c <= 'Z';
+}
+
+static boolean isAlpha (char c)
+{
+ return isLowerAlpha (c) || isUpperAlpha (c);
+}
+
+static boolean isIdent (char c)
+{
+ return isNum (c) || isAlpha (c) || c == '_' || c == '\'';
+}
+
+static boolean isSpace (char c)
+{
+ return c == ' ' || c == '\t' || c == '\r' || c == '\n';
+}
+
+static void eatWhiteSpace (lexingState * st)
+{
+ const unsigned char *cp = st->cp;
+ while (isSpace (*cp))
+ cp++;
+
+ st->cp = cp;
+}
+
+static void eatString (lexingState * st)
+{
+ boolean lastIsBackSlash = FALSE;
+ boolean unfinished = TRUE;
+ const unsigned char *c = st->cp + 1;
+
+ while (unfinished)
+ {
+ /* end of line should never happen.
+ * we tolerate it */
+ if (c == NULL || c[0] == '\0')
+ break;
+ else if (*c == '"' && !lastIsBackSlash)
+ unfinished = FALSE;
+ else
+ lastIsBackSlash = *c == '\\';
+
+ c++;
+ }
+
+ st->cp = c;
+}
+
+static void eatComment (lexingState * st)
+{
+ boolean unfinished = TRUE;
+ boolean lastIsStar = FALSE;
+ const unsigned char *c = st->cp + 2;
+
+ while (unfinished)
+ {
+ /* we've reached the end of the line..
+ * so we have to reload a line... */
+ if (c == NULL || *c == '\0')
+ {
+ st->cp = fileReadLine ();
+ /* WOOPS... no more input...
+ * we return, next lexing read
+ * will be null and ok */
+ if (st->cp == NULL)
+ return;
+ c = st->cp;
+ continue;
+ }
+ /* we've reached the end of the comment */
+ else if (*c == ')' && lastIsStar)
+ unfinished = FALSE;
+ /* here we deal with imbricated comment, which
+ * are allowed in OCaml */
+ else if (c[0] == '(' && c[1] == '*')
+ {
+ st->cp = c;
+ eatComment (st);
+ c = st->cp;
+ lastIsStar = FALSE;
+ }
+ else
+ lastIsStar = '*' == *c;
+
+ c++;
+ }
+
+ st->cp = c;
+}
+
+static void readIdentifier (lexingState * st)
+{
+ const unsigned char *p;
+ vStringClear (st->name);
+
+ /* first char is a simple letter */
+ if (isAlpha (*st->cp) || *st->cp == '_')
+ vStringPut (st->name, (int) *st->cp);
+
+ /* Go till you get identifier chars */
+ for (p = st->cp + 1; isIdent (*p); p++)
+ vStringPut (st->name, (int) *p);
+
+ st->cp = p;
+
+ vStringTerminate (st->name);
+}
+
+static ocamlKeyword eatNumber (lexingState * st)
+{
+ while (isNum (*st->cp))
+ st->cp++;
+ return Tok_Val;
+}
+
+/* Operator can be defined in OCaml as a function
+ * so we must be ample enough to parse them normally */
+static ocamlKeyword eatOperator (lexingState * st)
+{
+ int count = 0;
+ const unsigned char *root = st->cp;
+
+ vStringClear (st->name);
+
+ while (isOperator[st->cp[count]])
+ {
+ vStringPut (st->name, st->cp[count]);
+ count++;
+ }
+
+ vStringTerminate (st->name);
+
+ st->cp += count;
+ if (count <= 1)
+ {
+ switch (root[0])
+ {
+ case '|':
+ return Tok_Pipe;
+ case '=':
+ return Tok_EQ;
+ default:
+ return Tok_Op;
+ }
+ }
+ else if (count == 2 && root[0] == '-' && root[1] == '>')
+ return Tok_To;
+ else
+ return Tok_Op;
+}
+
+/* The lexer is in charge of reading the file.
+ * Some of sub-lexer (like eatComment) also read file.
+ * lexing is finished when the lexer return Tok_EOF */
+static ocamlKeyword lex (lexingState * st)
+{
+ int retType;
+ /* handling data input here */
+ while (st->cp == NULL || st->cp[0] == '\0')
+ {
+ st->cp = fileReadLine ();
+ if (st->cp == NULL)
+ return Tok_EOF;
+ }
+
+ if (isAlpha (*st->cp))
+ {
+ readIdentifier (st);
+ retType = lookupKeyword (vStringValue (st->name), Lang_Ocaml);
+
+ if (retType == -1) /* If it's not a keyword */
+ {
+ return OcaIDENTIFIER;
+ }
+ else
+ {
+ return retType;
+ }
+ }
+ else if (isNum (*st->cp))
+ return eatNumber (st);
+ else if (isSpace (*st->cp))
+ {
+ eatWhiteSpace (st);
+ return lex (st);
+ }
+ /* OCaml permit the definition of our own operators
+ * so here we check all the consecuting chars which
+ * are operators to discard them. */
+ else if (isOperator[*st->cp])
+ return eatOperator (st);
+ else
+ switch (*st->cp)
+ {
+ case '(':
+ if (st->cp[1] == '*') /* ergl, a comment */
+ {
+ eatComment (st);
+ return lex (st);
+ }
+ else
+ {
+ st->cp++;
+ return Tok_PARL;
+ }
+
+ case ')':
+ st->cp++;
+ return Tok_PARR;
+ case '[':
+ st->cp++;
+ return Tok_BRL;
+ case ']':
+ st->cp++;
+ return Tok_BRR;
+ case '{':
+ st->cp++;
+ return Tok_CurlL;
+ case '}':
+ st->cp++;
+ return Tok_CurlR;
+ case '\'':
+ st->cp++;
+ return Tok_Prime;
+ case ',':
+ st->cp++;
+ return Tok_comma;
+ case '=':
+ st->cp++;
+ return Tok_EQ;
+ case ';':
+ st->cp++;
+ return Tok_semi;
+ case '"':
+ eatString (st);
+ return Tok_Val;
+ case '_':
+ st->cp++;
+ return Tok_Val;
+ case '#':
+ st->cp++;
+ return Tok_Sharp;
+ case '\\':
+ st->cp++;
+ return Tok_Backslash;
+
+ default:
+ st->cp++;
+ break;
+ }
+
+ /* default return if nothing is recognized,
+ * shouldn't happen, but at least, it will
+ * be handled without destroying the parsing. */
+ return Tok_Val;
+}
+
+/*//////////////////////////////////////////////////////////////////////
+//// Parsing */
+typedef void (*parseNext) (vString * const ident, ocaToken what);
+
+/********** Helpers */
+/* This variable hold the 'parser' which is going to
+ * handle the next token */
+parseNext toDoNext;
+
+/* Special variable used by parser eater to
+ * determine which action to put after their
+ * job is finished. */
+parseNext comeAfter;
+
+/* If a token put an end to current delcaration/
+ * statement */
+ocaToken terminatingToken;
+
+/* Token to be searched by the different
+ * parser eater. */
+ocaToken waitedToken;
+
+/* name of the last class, used for
+ * context stacking. */
+vString *lastClass;
+
+vString *voidName;
+
+typedef enum _sContextKind {
+ ContextStrong,
+ ContextSoft
+} contextKind;
+
+typedef enum _sContextType {
+ ContextType,
+ ContextModule,
+ ContextClass,
+ ContextValue,
+ ContextFunction,
+ ContextMethod,
+ ContextBlock
+} contextType;
+
+typedef struct _sOcamlContext {
+ contextKind kind; /* well if the context is strong or not */
+ contextType type;
+ parseNext callback; /* what to do when a context is pop'd */
+ vString *contextName; /* name, if any, of the surrounding context */
+} ocamlContext;
+
+/* context stack, can be used to output scope information
+ * into the tag file. */
+ocamlContext stack[OCAML_MAX_STACK_SIZE];
+/* current position in the tag */
+int stackIndex;
+
+/* special function, often recalled, so putting it here */
+static void globalScope (vString * const ident, ocaToken what);
+
+/* Return : index of the last named context if one
+ * is found, -1 otherwise */
+static int getLastNamedIndex ( void )
+{
+ int i;
+
+ for (i = stackIndex - 1; i >= 0; --i)
+ {
+ if (stack[i].contextName->buffer &&
+ strlen (stack[i].contextName->buffer) > 0)
+ {
+ return i;
+ }
+ }
+
+ return -1;
+}
+
+static const char *contextDescription (contextType t)
+{
+ switch (t)
+ {
+ case ContextFunction:
+ return "function";
+ case ContextMethod:
+ return "method";
+ case ContextValue:
+ return "value";
+ case ContextModule:
+ return "Module";
+ case ContextType:
+ return "type";
+ case ContextClass:
+ return "class";
+ case ContextBlock:
+ return "begin/end";
+ }
+
+ return NULL;
+}
+
+static char contextTypeSuffix (contextType t)
+{
+ switch (t)
+ {
+ case ContextFunction:
+ case ContextMethod:
+ case ContextValue:
+ case ContextModule:
+ return '/';
+ case ContextType:
+ return '.';
+ case ContextClass:
+ return '#';
+ case ContextBlock:
+ return ' ';
+ }
+
+ return '$';
+}
+
+/* Push a new context, handle null string */
+static void pushContext (contextKind kind, contextType type, parseNext after,
+ vString const *contextName)
+{
+ int parentIndex;
+
+ if (stackIndex >= OCAML_MAX_STACK_SIZE)
+ {
+ verbose ("OCaml Maximum depth reached");
+ return;
+ }
+
+
+ stack[stackIndex].kind = kind;
+ stack[stackIndex].type = type;
+ stack[stackIndex].callback = after;
+
+ parentIndex = getLastNamedIndex ();
+ if (contextName == NULL)
+ {
+ vStringClear (stack[stackIndex++].contextName);
+ return;
+ }
+
+ if (parentIndex >= 0)
+ {
+ vStringCopy (stack[stackIndex].contextName,
+ stack[parentIndex].contextName);
+ vStringPut (stack[stackIndex].contextName,
+ contextTypeSuffix (stack[parentIndex].type));
+
+ vStringCat (stack[stackIndex].contextName, contextName);
+ }
+ else
+ vStringCopy (stack[stackIndex].contextName, contextName);
+
+ stackIndex++;
+}
+
+static void pushStrongContext (vString * name, contextType type)
+{
+ pushContext (ContextStrong, type, &globalScope, name);
+}
+
+static void pushSoftContext (parseNext continuation,
+ vString * name, contextType type)
+{
+ pushContext (ContextSoft, type, continuation, name);
+}
+
+static void pushEmptyContext (parseNext continuation)
+{
+ pushContext (ContextSoft, ContextValue, continuation, NULL);
+}
+
+/* unroll the stack until the last named context.
+ * then discard it. Used to handle the :
+ * let f x y = ...
+ * in ...
+ * where the context is reseted after the in. Context may have
+ * been really nested before that. */
+static void popLastNamed ( void )
+{
+ int i = getLastNamedIndex ();
+
+ if (i >= 0)
+ {
+ stackIndex = i;
+ toDoNext = stack[i].callback;
+ vStringClear (stack[i].contextName);
+ }
+ else
+ {
+ /* ok, no named context found...
+ * (should not happen). */
+ stackIndex = 0;
+ toDoNext = &globalScope;
+ }
+}
+
+/* pop a context without regarding it's content
+ * (beside handling empty stack case) */
+static void popSoftContext ( void )
+{
+ if (stackIndex <= 0)
+ {
+ toDoNext = &globalScope;
+ }
+ else
+ {
+ stackIndex--;
+ toDoNext = stack[stackIndex].callback;
+ vStringClear (stack[stackIndex].contextName);
+ }
+}
+
+/* Reset everything until the last global space.
+ * a strong context can be :
+ * - module
+ * - class definition
+ * - the initial global space
+ * - a _global_ delcaration (let at global scope or in a module).
+ * Created to exit quickly deeply nested context */
+static contextType popStrongContext ( void )
+{
+ int i;
+
+ for (i = stackIndex - 1; i >= 0; --i)
+ {
+ if (stack[i].kind == ContextStrong)
+ {
+ stackIndex = i;
+ toDoNext = stack[i].callback;
+ vStringClear (stack[i].contextName);
+ return stack[i].type;
+ }
+ }
+ /* ok, no strong context found... */
+ stackIndex = 0;
+ toDoNext = &globalScope;
+ return -1;
+}
+
+/* Ignore everything till waitedToken and jump to comeAfter.
+ * If the "end" keyword is encountered break, doesn't remember
+ * why though. */
+static void tillToken (vString * const UNUSED (ident), ocaToken what)
+{
+ if (what == waitedToken)
+ toDoNext = comeAfter;
+ else if (what == OcaKEYWORD_end)
+ {
+ popStrongContext ();
+ toDoNext = &globalScope;
+ }
+}
+
+/* Ignore everything till a waitedToken is seen, but
+ * take care of balanced parentheses/bracket use */
+static void contextualTillToken (vString * const UNUSED (ident), ocaToken what)
+{
+ static int parentheses = 0;
+ static int bracket = 0;
+ static int curly = 0;
+
+ switch (what)
+ {
+ case Tok_PARL:
+ parentheses--;
+ break;
+ case Tok_PARR:
+ parentheses++;
+ break;
+ case Tok_CurlL:
+ curly--;
+ break;
+ case Tok_CurlR:
+ curly++;
+ break;
+ case Tok_BRL:
+ bracket--;
+ break;
+ case Tok_BRR:
+ bracket++;
+ break;
+
+ default: /* other token are ignored */
+ break;
+ }
+
+ if (what == waitedToken && parentheses == 0 && bracket == 0 && curly == 0)
+ toDoNext = comeAfter;
+
+ else if (what == OcaKEYWORD_end)
+ {
+ popStrongContext ();
+ toDoNext = &globalScope;
+ }
+}
+
+/* Wait for waitedToken and jump to comeAfter or let
+ * the globalScope handle declarations */
+static void tillTokenOrFallback (vString * const ident, ocaToken what)
+{
+ if (what == waitedToken)
+ toDoNext = comeAfter;
+ else
+ globalScope (ident, what);
+}
+
+/* ignore token till waitedToken, or give up if find
+ * terminatingToken. Use globalScope to handle new
+ * declarations. */
+static void tillTokenOrTerminatingOrFallback (vString * const ident,
+ ocaToken what)
+{
+ if (what == waitedToken)
+ toDoNext = comeAfter;
+ else if (what == terminatingToken)
+ toDoNext = globalScope;
+ else
+ globalScope (ident, what);
+}
+
+/* ignore the next token in the stream and jump to the
+ * given comeAfter state */
+static void ignoreToken (vString * const UNUSED (ident), ocaToken UNUSED (what))
+{
+ toDoNext = comeAfter;
+}
+
+/********** Grammar */
+/* the purpose of each function is detailled near their
+ * implementation */
+
+static void killCurrentState ( void )
+{
+
+ /* Tracking the kind of previous strong
+ * context, if it doesn't match with a
+ * really strong entity, repop */
+ switch (popStrongContext ())
+ {
+
+ case ContextValue:
+ popStrongContext ();
+ break;
+ case ContextFunction:
+ popStrongContext ();
+ break;
+ case ContextMethod:
+ popStrongContext ();
+ break;
+
+ case ContextType:
+ popStrongContext();
+ break;
+ case ContextBlock:
+ break;
+ case ContextModule:
+ break;
+ case ContextClass:
+ break;
+ default:
+ /* nothing more */
+ break;
+ }
+}
+
+/* used to prepare tag for OCaml, just in case their is a need to
+ * add additional information to the tag. */
+static void prepareTag (tagEntryInfo * tag, vString const *name, ocamlKind kind)
+{
+ int parentIndex;
+
+ initTagEntry (tag, vStringValue (name));
+ tag->kindName = OcamlKinds[kind].name;
+ tag->kind = OcamlKinds[kind].letter;
+
+ parentIndex = getLastNamedIndex ();
+ if (parentIndex >= 0)
+ {
+ tag->extensionFields.scope[0] =
+ contextDescription (stack[parentIndex].type);
+ tag->extensionFields.scope[1] =
+ vStringValue (stack[parentIndex].contextName);
+ }
+}
+
+/* Used to centralise tag creation, and be able to add
+ * more information to it in the future */
+static void addTag (vString * const ident, int kind)
+{
+ tagEntryInfo toCreate;
+ prepareTag (&toCreate, ident, kind);
+ makeTagEntry (&toCreate);
+}
+
+boolean needStrongPoping = FALSE;
+static void requestStrongPoping ( void )
+{
+ needStrongPoping = TRUE;
+}
+
+static void cleanupPreviousParser ( void )
+{
+ if (needStrongPoping)
+ {
+ needStrongPoping = FALSE;
+ popStrongContext ();
+ }
+}
+
+/* Due to some circular dependencies, the following functions
+ * must be forward-declared. */
+static void letParam (vString * const ident, ocaToken what);
+static void localScope (vString * const ident, ocaToken what);
+static void mayRedeclare (vString * const ident, ocaToken what);
+static void typeSpecification (vString * const ident, ocaToken what);
+
+/*
+ * Parse a record type
+ * type ident = // parsed previously
+ * {
+ * ident1: type1;
+ * ident2: type2;
+ * }
+ */
+static void typeRecord (vString * const ident, ocaToken what)
+{
+ switch (what)
+ {
+ case OcaIDENTIFIER:
+ addTag (ident, K_RECORDFIELD);
+ terminatingToken = Tok_CurlR;
+ waitedToken = Tok_semi;
+ comeAfter = &typeRecord;
+ toDoNext = &tillTokenOrTerminatingOrFallback;
+ break;
+
+ case OcaKEYWORD_mutable:
+ /* ignore it */
+ break;
+
+ case Tok_CurlR:
+ popStrongContext ();
+ toDoNext = &globalScope;
+ break;
+
+ default: /* don't care */
+ break;
+ }
+}
+
+/* handle :
+ * exception ExceptionName ... */
+static void exceptionDecl (vString * const ident, ocaToken what)
+{
+ if (what == OcaIDENTIFIER)
+ {
+ addTag (ident, K_EXCEPTION);
+ }
+ /* don't know what to do on else... */
+
+ toDoNext = &globalScope;
+}
+
+tagEntryInfo tempTag;
+vString *tempIdent;
+
+/* Ensure a constructor is not a type path beginning
+ * with a module */
+static void constructorValidation (vString * const ident, ocaToken what)
+{
+ switch (what)
+ {
+ case Tok_Op: /* if we got a '.' which is an operator */
+ toDoNext = &globalScope;
+ popStrongContext ();
+ needStrongPoping = FALSE;
+ break;
+
+ case OcaKEYWORD_of: /* OK, it must be a constructor :) */
+ makeTagEntry (&tempTag);
+ vStringClear (tempIdent);
+ toDoNext = &tillTokenOrFallback;
+ comeAfter = &typeSpecification;
+ waitedToken = Tok_Pipe;
+ break;
+
+ case Tok_Pipe: /* OK, it was a constructor :) */
+ makeTagEntry (&tempTag);
+ vStringClear (tempIdent);
+ toDoNext = &typeSpecification;
+ break;
+
+ default: /* and mean that we're not facing a module name */
+ makeTagEntry (&tempTag);
+ vStringClear (tempIdent);
+ toDoNext = &tillTokenOrFallback;
+ comeAfter = &typeSpecification;
+ waitedToken = Tok_Pipe;
+
+ /* nothing in the context, discard it */
+ popStrongContext ();
+
+ /* to be sure we use this token */
+ globalScope (ident, what);
+ }
+}
+
+
+/* Parse beginning of type definition
+ * type 'avar ident =
+ * or
+ * type ('var1, 'var2) ident =
+ */
+static void typeDecl (vString * const ident, ocaToken what)
+{
+
+ switch (what)
+ {
+ /* parameterized */
+ case Tok_Prime:
+ comeAfter = &typeDecl;
+ toDoNext = &ignoreToken;
+ break;
+ /* LOTS of parameters */
+ case Tok_PARL:
+ comeAfter = &typeDecl;
+ waitedToken = Tok_PARR;
+ toDoNext = &tillToken;
+ break;
+
+ case OcaIDENTIFIER:
+ addTag (ident, K_TYPE);
+ pushStrongContext (ident, ContextType);
+ requestStrongPoping ();
+ waitedToken = Tok_EQ;
+ comeAfter = &typeSpecification;
+ toDoNext = &tillTokenOrFallback;
+ break;
+
+ default:
+ globalScope (ident, what);
+ }
+}
+
+/* Parse type of kind
+ * type bidule = Ctor1 of ...
+ * | Ctor2
+ * | Ctor3 of ...
+ * or
+ * type bidule = | Ctor1 of ... | Ctor2
+ *
+ * when type bidule = { ... } is detected,
+ * let typeRecord handle it. */
+static void typeSpecification (vString * const ident, ocaToken what)
+{
+
+ switch (what)
+ {
+ case OcaIDENTIFIER:
+ if (isUpperAlpha (ident->buffer[0]))
+ {
+ /* here we handle type aliases of type
+ * type foo = AnotherModule.bar
+ * AnotherModule can mistakenly be took
+ * for a constructor. */
+ vStringCopy (tempIdent, ident);
+ prepareTag (&tempTag, tempIdent, K_CONSTRUCTOR);
+ toDoNext = &constructorValidation;
+ }
+ else
+ {
+ toDoNext = &tillTokenOrFallback;
+ comeAfter = &typeSpecification;
+ waitedToken = Tok_Pipe;
+ }
+ break;
+
+ case OcaKEYWORD_and:
+ toDoNext = &typeDecl;
+ break;
+
+ case Tok_BRL: /* the '[' & ']' are ignored to accommodate */
+ case Tok_BRR: /* with the revised syntax */
+ case Tok_Pipe:
+ /* just ignore it */
+ break;
+
+ case Tok_CurlL:
+ toDoNext = &typeRecord;
+ break;
+
+ default: /* don't care */
+ break;
+ }
+}
+
+
+static boolean dirtySpecialParam = FALSE;
+
+
+/* parse the ~label and ~label:type parameter */
+static void parseLabel (vString * const ident, ocaToken what)
+{
+ static int parCount = 0;
+
+ switch (what)
+ {
+ case OcaIDENTIFIER:
+ if (!dirtySpecialParam)
+ {
+
+ if (exportLocalInfo)
+ addTag (ident, K_VAR);
+
+ dirtySpecialParam = TRUE;
+ }
+ break;
+
+ case Tok_PARL:
+ parCount++;
+ break;
+
+ case Tok_PARR:
+ parCount--;
+ if (parCount == 0)
+ toDoNext = &letParam;
+ break;
+
+ case Tok_Op:
+ if (ident->buffer[0] == ':')
+ {
+ toDoNext = &ignoreToken;
+ comeAfter = &letParam;
+ }
+ else if (parCount == 0 && dirtySpecialParam)
+ {
+ toDoNext = &letParam;
+ letParam (ident, what);
+ }
+ break;
+
+ default:
+ if (parCount == 0 && dirtySpecialParam)
+ {
+ toDoNext = &letParam;
+ letParam (ident, what);
+ }
+ break;
+ }
+}
+
+
+/* Optional argument with syntax like this :
+ * ?(foo = value) */
+static void parseOptionnal (vString * const ident, ocaToken what)
+{
+ static int parCount = 0;
+
+
+ switch (what)
+ {
+ case OcaIDENTIFIER:
+ if (!dirtySpecialParam)
+ {
+ if (exportLocalInfo)
+ addTag (ident, K_VAR);
+
+ dirtySpecialParam = TRUE;
+
+ if (parCount == 0)
+ toDoNext = &letParam;
+ }
+ break;
+
+ case Tok_PARL:
+ parCount++;
+ break;
+
+ case Tok_PARR:
+ parCount--;
+ if (parCount == 0)
+ toDoNext = &letParam;
+ break;
+
+ default: /* don't care */
+ break;
+ }
+}
+
+
+/** handle let inside functions (so like it's name
+ * say : local let */
+static void localLet (vString * const ident, ocaToken what)
+{
+ switch (what)
+ {
+ case Tok_PARL:
+ /* We ignore this token to be able to parse such
+ * declarations :
+ * let (ident : type) = ...
+ */
+ break;
+
+ case OcaKEYWORD_rec:
+ /* just ignore to be able to parse such declarations:
+ * let rec ident = ... */
+ break;
+
+ case Tok_Op:
+ /* we are defining a new operator, it's a
+ * function definition */
+ if (exportLocalInfo)
+ addTag (ident, K_FUNCTION);
+
+ pushSoftContext (mayRedeclare, ident, ContextFunction);
+ toDoNext = &letParam;
+ break;
+
+ /* Can be a weiiird binding, or an '_' */
+ case Tok_Val:
+ if (exportLocalInfo)
+ addTag (ident, K_VAR);
+ pushSoftContext (mayRedeclare, ident, ContextValue);
+ toDoNext = &letParam;
+ break;
+
+ case OcaIDENTIFIER:
+ if (exportLocalInfo)
+ addTag (ident, K_VAR);
+ pushSoftContext (mayRedeclare, ident, ContextValue);
+ toDoNext = &letParam;
+ break;
+
+ case OcaKEYWORD_end:
+ popStrongContext ();
+ break;
+
+ default:
+ toDoNext = &localScope;
+ break;
+ }
+}
+
+/* parse :
+ * | pattern pattern -> ...
+ * or
+ * pattern apttern apttern -> ...
+ * we ignore all identifiers declared in the pattern,
+ * because their scope is likely to be even more limited
+ * than the let definitions.
+ * Used after a match ... with, or a function ... or fun ...
+ * because their syntax is similar. */
+static void matchPattern (vString * const UNUSED (ident), ocaToken what)
+{
+ switch (what)
+ {
+ case Tok_To:
+ pushEmptyContext (&matchPattern);
+ toDoNext = &mayRedeclare;
+ break;
+
+
+ case OcaKEYWORD_in:
+ popLastNamed ();
+ break;
+
+ default:
+ break;
+ }
+}
+
+/* Used at the beginning of a new scope (begin of a
+ * definition, parenthesis...) to catch inner let
+ * definition that may be in. */
+static void mayRedeclare (vString * const ident, ocaToken what)
+{
+ switch (what)
+ {
+ case OcaKEYWORD_let:
+ case OcaKEYWORD_val:
+ toDoNext = localLet;
+ break;
+
+ case OcaKEYWORD_object:
+ vStringClear (lastClass);
+ pushContext (ContextStrong, ContextClass,
+ &localScope, NULL /*voidName */ );
+ needStrongPoping = FALSE;
+ toDoNext = &globalScope;
+ break;
+
+ case OcaKEYWORD_for:
+ case OcaKEYWORD_while:
+ toDoNext = &tillToken;
+ waitedToken = OcaKEYWORD_do;
+ comeAfter = &mayRedeclare;
+ break;
+
+ case OcaKEYWORD_try:
+ toDoNext = &mayRedeclare;
+ pushSoftContext (matchPattern, ident, ContextFunction);
+ break;
+
+ case OcaKEYWORD_fun:
+ toDoNext = &matchPattern;
+ break;
+
+ /* Handle the special ;; from the OCaml
+ * Top level */
+ case Tok_semi:
+ default:
+ toDoNext = &localScope;
+ localScope (ident, what);
+ }
+}
+
+/* parse :
+ * p1 p2 ... pn = ...
+ * or
+ * ?(p1=v) p2 ~p3 ~pn:ja ... = ... */
+static void letParam (vString * const ident, ocaToken what)
+{
+ switch (what)
+ {
+ case Tok_EQ:
+ toDoNext = &mayRedeclare;
+ break;
+
+ case OcaIDENTIFIER:
+ if (exportLocalInfo)
+ addTag (ident, K_VAR);
+ break;
+
+ case Tok_Op:
+ switch (ident->buffer[0])
+ {
+ case ':':
+ /*popSoftContext(); */
+ /* we got a type signature */
+ comeAfter = &mayRedeclare;
+ toDoNext = &tillTokenOrFallback;
+ waitedToken = Tok_EQ;
+ break;
+
+ /* parse something like
+ * ~varname:type
+ * or
+ * ~varname
+ * or
+ * ~(varname: long type) */
+ case '~':
+ toDoNext = &parseLabel;
+ dirtySpecialParam = FALSE;
+ break;
+
+ /* Optional argument with syntax like this :
+ * ?(bla = value)
+ * or
+ * ?bla */
+ case '?':
+ toDoNext = &parseOptionnal;
+ dirtySpecialParam = FALSE;
+ break;
+
+ default:
+ break;
+ }
+ break;
+
+ default: /* don't care */
+ break;
+ }
+}
+
+
+/* parse object ...
+ * used to be sure the class definition is not a type
+ * alias */
+static void classSpecif (vString * const UNUSED (ident), ocaToken what)
+{
+ switch (what)
+ {
+ case OcaKEYWORD_object:
+ pushStrongContext (lastClass, ContextClass);
+ toDoNext = &globalScope;
+ break;
+
+ default:
+ vStringClear (lastClass);
+ toDoNext = &globalScope;
+ }
+}
+
+/* Handle a method ... class declaration.
+ * nearly a copy/paste of globalLet. */
+static void methodDecl (vString * const ident, ocaToken what)
+{
+ switch (what)
+ {
+ case Tok_PARL:
+ /* We ignore this token to be able to parse such
+ * declarations :
+ * let (ident : type) = ... */
+ break;
+
+ case OcaKEYWORD_mutable:
+ case OcaKEYWORD_virtual:
+ case OcaKEYWORD_rec:
+ /* just ignore to be able to parse such declarations:
+ * let rec ident = ... */
+ break;
+
+ case OcaIDENTIFIER:
+ addTag (ident, K_METHOD);
+ /* Normal pushing to get good subs */
+ pushStrongContext (ident, ContextMethod);
+ /*pushSoftContext( globalScope, ident, ContextMethod ); */
+ toDoNext = &letParam;
+ break;
+
+ case OcaKEYWORD_end:
+ popStrongContext ();
+ break;
+
+ default:
+ toDoNext = &globalScope;
+ break;
+ }
+}
+
+/* name of the last module, used for
+ * context stacking. */
+vString *lastModule;
+
+
+/* parse
+ * ... struct (* new global scope *) end
+ * or
+ * ... sig (* new global scope *) end
+ * or
+ * functor ... -> moduleSpecif
+ */
+static void moduleSpecif (vString * const ident, ocaToken what)
+{
+ switch (what)
+ {
+ case OcaKEYWORD_functor:
+ toDoNext = &contextualTillToken;
+ waitedToken = Tok_To;
+ comeAfter = &moduleSpecif;
+ break;
+
+ case OcaKEYWORD_struct:
+ case OcaKEYWORD_sig:
+ pushStrongContext (lastModule, ContextModule);
+ toDoNext = &globalScope;
+ break;
+
+ case Tok_PARL: /* ( */
+ toDoNext = &contextualTillToken;
+ comeAfter = &globalScope;
+ waitedToken = Tok_PARR;
+ contextualTillToken (ident, what);
+ break;
+
+ default:
+ vStringClear (lastModule);
+ toDoNext = &globalScope;
+ }
+}
+
+/* parse :
+ * module name = ...
+ * then pass the token stream to moduleSpecif */
+static void moduleDecl (vString * const ident, ocaToken what)
+{
+ switch (what)
+ {
+ case OcaKEYWORD_type:
+ /* just ignore it, name come after */
+ break;
+
+ case OcaIDENTIFIER:
+ addTag (ident, K_MODULE);
+ vStringCopy (lastModule, ident);
+ waitedToken = Tok_EQ;
+ comeAfter = &moduleSpecif;
+ toDoNext = &contextualTillToken;
+ break;
+
+ default: /* don't care */
+ break;
+ }
+}
+
+/* parse :
+ * class name = ...
+ * or
+ * class virtual ['a,'b] classname = ... */
+static void classDecl (vString * const ident, ocaToken what)
+{
+ switch (what)
+ {
+ case OcaIDENTIFIER:
+ addTag (ident, K_CLASS);
+ vStringCopy (lastClass, ident);
+ toDoNext = &contextualTillToken;
+ waitedToken = Tok_EQ;
+ comeAfter = &classSpecif;
+ break;
+
+ case Tok_BRL:
+ toDoNext = &tillToken;
+ waitedToken = Tok_BRR;
+ comeAfter = &classDecl;
+ break;
+
+ default:
+ break;
+ }
+}
+
+/* Handle a global
+ * let ident ...
+ * or
+ * let rec ident ... */
+static void globalLet (vString * const ident, ocaToken what)
+{
+ switch (what)
+ {
+ case Tok_PARL:
+ /* We ignore this token to be able to parse such
+ * declarations :
+ * let (ident : type) = ...
+ */
+ break;
+
+ case OcaKEYWORD_mutable:
+ case OcaKEYWORD_virtual:
+ case OcaKEYWORD_rec:
+ /* just ignore to be able to parse such declarations:
+ * let rec ident = ... */
+ break;
+
+ case Tok_Op:
+ /* we are defining a new operator, it's a
+ * function definition */
+ addTag (ident, K_FUNCTION);
+ pushStrongContext (ident, ContextFunction);
+ toDoNext = &letParam;
+ break;
+
+ case OcaIDENTIFIER:
+ addTag (ident, K_VAR);
+ pushStrongContext (ident, ContextValue);
+ requestStrongPoping ();
+ toDoNext = &letParam;
+ break;
+
+ case OcaKEYWORD_end:
+ popStrongContext ();
+ break;
+
+ default:
+ toDoNext = &globalScope;
+ break;
+ }
+}
+
+/* Handle the "strong" top levels, all 'big' declarations
+ * happen here */
+static void globalScope (vString * const UNUSED (ident), ocaToken what)
+{
+ /* Do not touch, this is used only by the global scope
+ * to handle an 'and' */
+ static parseNext previousParser = NULL;
+
+ switch (what)
+ {
+ case OcaKEYWORD_and:
+ cleanupPreviousParser ();
+ toDoNext = previousParser;
+ break;
+
+ case OcaKEYWORD_type:
+ cleanupPreviousParser ();
+ toDoNext = &typeDecl;
+ previousParser = &typeDecl;
+ break;
+
+ case OcaKEYWORD_class:
+ cleanupPreviousParser ();
+ toDoNext = &classDecl;
+ previousParser = &classDecl;
+ break;
+
+ case OcaKEYWORD_module:
+ cleanupPreviousParser ();
+ toDoNext = &moduleDecl;
+ previousParser = &moduleDecl;
+ break;
+
+ case OcaKEYWORD_end:
+ needStrongPoping = FALSE;
+ killCurrentState ();
+ /*popStrongContext(); */
+ break;
+
+ case OcaKEYWORD_method:
+ cleanupPreviousParser ();
+ toDoNext = &methodDecl;
+ /* and is not allowed in methods */
+ break;
+
+ /* val is mixed with let as global
+ * to be able to handle mli & new syntax */
+ case OcaKEYWORD_val:
+ case OcaKEYWORD_let:
+ cleanupPreviousParser ();
+ toDoNext = &globalLet;
+ previousParser = &globalLet;
+ break;
+
+ case OcaKEYWORD_exception:
+ cleanupPreviousParser ();
+ toDoNext = &exceptionDecl;
+ previousParser = NULL;
+ break;
+
+ /* must be a #line directive, discard the
+ * whole line. */
+ case Tok_Sharp:
+ /* ignore */
+ break;
+
+ default:
+ /* we don't care */
+ break;
+ }
+}
+
+/* Parse expression. Well ignore it is more the case,
+ * ignore all tokens except "shocking" keywords */
+static void localScope (vString * const ident, ocaToken what)
+{
+ switch (what)
+ {
+ case Tok_Pipe:
+ case Tok_PARR:
+ case Tok_BRR:
+ case Tok_CurlR:
+ popSoftContext ();
+ break;
+
+ /* Everything that `begin` has an `end`
+ * as end is overloaded and signal many end
+ * of things, we add an empty strong context to
+ * avoid problem with the end.
+ */
+ case OcaKEYWORD_begin:
+ pushContext (ContextStrong, ContextBlock, &mayRedeclare, NULL);
+ toDoNext = &mayRedeclare;
+ break;
+
+ case OcaKEYWORD_in:
+ popLastNamed ();
+ break;
+
+ /* Ok, we got a '{', which is much likely to create
+ * a record. We cannot treat it like other [ && (,
+ * because it may contain the 'with' keyword and screw
+ * everything else. */
+ case Tok_CurlL:
+ toDoNext = &contextualTillToken;
+ waitedToken = Tok_CurlR;
+ comeAfter = &localScope;
+ contextualTillToken (ident, what);
+ break;
+
+ /* Yeah imperative feature of OCaml,
+ * a ';' like in C */
+ case Tok_semi:
+ toDoNext = &mayRedeclare;
+ break;
+
+ case Tok_PARL:
+ case Tok_BRL:
+ pushEmptyContext (&localScope);
+ toDoNext = &mayRedeclare;
+ break;
+
+ case OcaKEYWORD_and:
+ popLastNamed ();
+ toDoNext = &localLet;
+ break;
+
+ case OcaKEYWORD_else:
+ case OcaKEYWORD_then:
+ popSoftContext ();
+ pushEmptyContext (&localScope);
+ toDoNext = &mayRedeclare;
+ break;
+
+ case OcaKEYWORD_if:
+ pushEmptyContext (&localScope);
+ toDoNext = &mayRedeclare;
+ break;
+
+ case OcaKEYWORD_match:
+ pushEmptyContext (&localScope);
+ toDoNext = &mayRedeclare;
+ break;
+
+ case OcaKEYWORD_with:
+ popSoftContext ();
+ toDoNext = &matchPattern;
+ pushEmptyContext (&matchPattern);
+ break;
+
+ case OcaKEYWORD_end:
+ killCurrentState ();
+ break;
+
+
+ case OcaKEYWORD_fun:
+ comeAfter = &mayRedeclare;
+ toDoNext = &tillToken;
+ waitedToken = Tok_To;
+ break;
+
+ case OcaKEYWORD_done:
+ case OcaKEYWORD_val:
+ /* doesn't care */
+ break;
+
+ default:
+ requestStrongPoping ();
+ globalScope (ident, what);
+ break;
+ }
+}
+
+/*////////////////////////////////////////////////////////////////
+//// Deal with the system */
+/* in OCaml the file name is the module name used in the language
+ * with it first letter put in upper case */
+static void computeModuleName ( void )
+{
+ /* in Ocaml the file name define a module.
+ * so we define a module =)
+ */
+ const char *filename = getSourceFileName ();
+ int beginIndex = 0;
+ int endIndex = strlen (filename) - 1;
+ vString *moduleName = vStringNew ();
+
+ while (filename[endIndex] != '.' && endIndex > 0)
+ endIndex--;
+
+ /* avoid problem with path in front of filename */
+ beginIndex = endIndex;
+ while (beginIndex > 0)
+ {
+ if (filename[beginIndex] == '\\' || filename[beginIndex] == '/')
+ {
+ beginIndex++;
+ break;
+ }
+
+ beginIndex--;
+ }
+
+ vStringNCopyS (moduleName, &filename[beginIndex], endIndex - beginIndex);
+ vStringTerminate (moduleName);
+
+ if (isLowerAlpha (moduleName->buffer[0]))
+ moduleName->buffer[0] += ('A' - 'a');
+
+ makeSimpleTag (moduleName, OcamlKinds, K_MODULE);
+ vStringDelete (moduleName);
+}
+
+/* Allocate all string of the context stack */
+static void initStack ( void )
+{
+ int i;
+ for (i = 0; i < OCAML_MAX_STACK_SIZE; ++i)
+ stack[i].contextName = vStringNew ();
+}
+
+static void clearStack ( void )
+{
+ int i;
+ for (i = 0; i < OCAML_MAX_STACK_SIZE; ++i)
+ vStringDelete (stack[i].contextName);
+}
+
+static void findOcamlTags (void)
+{
+ vString *name = vStringNew ();
+ lexingState st;
+ ocaToken tok;
+
+ computeModuleName ();
+ initStack ();
+ tempIdent = vStringNew ();
+ lastModule = vStringNew ();
+ lastClass = vStringNew ();
+ voidName = vStringNew ();
+ vStringCopyS (voidName, "_");
+
+ st.name = vStringNew ();
+ st.cp = fileReadLine ();
+ toDoNext = &globalScope;
+ tok = lex (&st);
+ while (tok != Tok_EOF)
+ {
+ (*toDoNext) (st.name, tok);
+ tok = lex (&st);
+ }
+
+ vStringDelete (name);
+ vStringDelete (voidName);
+ vStringDelete (tempIdent);
+ vStringDelete (lastModule);
+ vStringDelete (lastClass);
+ clearStack ();
+}
+
+static void ocamlInitialize (const langType language)
+{
+ Lang_Ocaml = language;
+
+ initOperatorTable ();
+ initKeywordHash ();
+}
+
+extern parserDefinition *OcamlParser (void)
+{
+ static const char *const extensions[] = { "ml", "mli", NULL };
+ parserDefinition *def = parserNew ("OCaml");
+ def->kinds = OcamlKinds;
+ def->kindCount = KIND_COUNT (OcamlKinds);
+ def->extensions = extensions;
+ def->parser = findOcamlTags;
+ def->initialize = ocamlInitialize;
+
+ return def;
+}
diff --git a/options.c b/options.c
new file mode 100644
index 0000000..d26627f
--- /dev/null
+++ b/options.c
@@ -0,0 +1,1829 @@
+/*
+* $Id: options.c 576 2007-06-30 04:16:23Z elliotth $
+*
+* Copyright (c) 1996-2003, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* This module contains functions to process command line options.
+*/
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <ctype.h> /* to declare isspace () */
+
+#include "ctags.h"
+#include "debug.h"
+#include "main.h"
+#define OPTION_WRITE
+#include "options.h"
+#include "parse.h"
+#include "routines.h"
+
+/*
+* MACROS
+*/
+#define INVOCATION "Usage: %s [options] [file(s)]\n"
+
+#define CTAGS_ENVIRONMENT "CTAGS"
+#define ETAGS_ENVIRONMENT "ETAGS"
+
+#define CTAGS_FILE "tags"
+#define ETAGS_FILE "TAGS"
+
+#ifndef ETAGS
+# define ETAGS "etags" /* name which causes default use of to -e */
+#endif
+
+/* The following separators are permitted for list options.
+ */
+#define EXTENSION_SEPARATOR '.'
+#define PATTERN_START '('
+#define PATTERN_STOP ')'
+#define IGNORE_SEPARATORS ", \t\n"
+
+#ifndef DEFAULT_FILE_FORMAT
+# define DEFAULT_FILE_FORMAT 2
+#endif
+
+#if defined (HAVE_OPENDIR) || defined (HAVE_FINDFIRST) || defined (HAVE__FINDFIRST) || defined (AMIGA)
+# define RECURSE_SUPPORTED
+#endif
+
+#define isCompoundOption(c) (boolean) (strchr ("fohiILpDb", (c)) != NULL)
+
+/*
+* Data declarations
+*/
+
+enum eOptionLimits {
+ MaxHeaderExtensions = 100, /* maximum number of extensions in -h option */
+ MaxSupportedTagFormat = 2
+};
+
+typedef struct sOptionDescription {
+ int usedByEtags;
+ const char *description;
+} optionDescription;
+
+typedef void (*parametricOptionHandler) (const char *const option, const char *const parameter);
+
+typedef const struct {
+ const char* name; /* name of option as specified by user */
+ parametricOptionHandler handler; /* routine to handle option */
+ boolean initOnly; /* option must be specified before any files */
+} parametricOption;
+
+typedef const struct {
+ const char* name; /* name of option as specified by user */
+ boolean* pValue; /* pointer to option value */
+ boolean initOnly; /* option must be specified before any files */
+} booleanOption;
+
+/*
+* DATA DEFINITIONS
+*/
+
+static boolean NonOptionEncountered;
+static stringList *OptionFiles;
+static stringList* Excluded;
+static boolean FilesRequired = TRUE;
+static boolean SkipConfiguration;
+
+static const char *const HeaderExtensions [] = {
+ "h", "H", "hh", "hpp", "hxx", "h++", "inc", "def", NULL
+};
+
+optionValues Option = {
+ {
+ FALSE, /* --extra=f */
+ FALSE, /* --extra=q */
+ TRUE, /* --file-scope */
+ },
+ {
+ FALSE, /* -fields=a */
+ TRUE, /* -fields=f */
+ FALSE, /* -fields=m */
+ FALSE, /* -fields=i */
+ TRUE, /* -fields=k */
+ FALSE, /* -fields=z */
+ FALSE, /* -fields=K */
+ FALSE, /* -fields=l */
+ FALSE, /* -fields=n */
+ TRUE, /* -fields=s */
+ FALSE, /* -fields=S */
+ TRUE /* -fields=t */
+ },
+ NULL, /* -I */
+ FALSE, /* -a */
+ FALSE, /* -B */
+ FALSE, /* -e */
+#ifdef MACROS_USE_PATTERNS
+ EX_PATTERN, /* -n, --excmd */
+#else
+ EX_MIX, /* -n, --excmd */
+#endif
+ FALSE, /* -R */
+ SO_SORTED, /* -u, --sort */
+ FALSE, /* -V */
+ FALSE, /* -x */
+ NULL, /* -L */
+ NULL, /* -o */
+ NULL, /* -h */
+ NULL, /* --etags-include */
+ DEFAULT_FILE_FORMAT,/* --format */
+ FALSE, /* --if0 */
+ FALSE, /* --kind-long */
+ LANG_AUTO, /* --lang */
+ TRUE, /* --links */
+ FALSE, /* --filter */
+ NULL, /* --filter-terminator */
+ FALSE, /* --tag-relative */
+ FALSE, /* --totals */
+ FALSE, /* --line-directives */
+#ifdef DEBUG
+ 0, 0 /* -D, -b */
+#endif
+};
+
+/*
+- Locally used only
+*/
+
+static optionDescription LongOptionDescription [] = {
+ {1," -a Append the tags to an existing tag file."},
+#ifdef DEBUG
+ {1," -b <line>"},
+ {1," Set break line."},
+#endif
+ {0," -B Use backward searching patterns (?...?)."},
+#ifdef DEBUG
+ {1," -D <level>"},
+ {1," Set debug level."},
+#endif
+ {0," -e Output tag file for use with Emacs."},
+ {1," -f <name>"},
+ {1," Write tags to specified file. Value of \"-\" writes tags to stdout"},
+ {1," [\"tags\"; or \"TAGS\" when -e supplied]."},
+ {0," -F Use forward searching patterns (/.../) (default)."},
+ {1," -h <list>"},
+ {1," Specify list of file extensions to be treated as include files."},
+ {1," [\".h.H.hh.hpp.hxx.h++\"]."},
+ {1," -I <list|@file>"},
+ {1," A list of tokens to be specially handled is read from either the"},
+ {1," command line or the specified file."},
+ {1," -L <file>"},
+ {1," A list of source file names are read from the specified file."},
+ {1," If specified as \"-\", then standard input is read."},
+ {0," -n Equivalent to --excmd=number."},
+ {0," -N Equivalent to --excmd=pattern."},
+ {1," -o Alternative for -f."},
+#ifdef RECURSE_SUPPORTED
+ {1," -R Equivalent to --recurse."},
+#else
+ {1," -R Not supported on this platform."},
+#endif
+ {0," -u Equivalent to --sort=no."},
+ {1," -V Equivalent to --verbose."},
+ {1," -x Print a tabular cross reference file to standard output."},
+ {1," --append=[yes|no]"},
+ {1," Should tags should be appended to existing tag file [no]?"},
+ {1," --etags-include=file"},
+ {1," Include reference to 'file' in Emacs-style tag file (requires -e)."},
+ {1," --exclude=pattern"},
+ {1," Exclude files and directories matching 'pattern'."},
+ {0," --excmd=number|pattern|mix"},
+#ifdef MACROS_USE_PATTERNS
+ {0," Uses the specified type of EX command to locate tags [pattern]."},
+#else
+ {0," Uses the specified type of EX command to locate tags [mix]."},
+#endif
+ {1," --extra=[+|-]flags"},
+ {1," Include extra tag entries for selected information (flags: \"fq\")."},
+ {1," --fields=[+|-]flags"},
+ {1," Include selected extension fields (flags: \"afmikKlnsStz\") [fks]."},
+ {1," --file-scope=[yes|no]"},
+ {1," Should tags scoped only for a single file (e.g. \"static\" tags"},
+ {1," be included in the output [yes]?"},
+ {1," --filter=[yes|no]"},
+ {1," Behave as a filter, reading file names from standard input and"},
+ {1," writing tags to standard output [no]."},
+ {1," --filter-terminator=string"},
+ {1," Specify string to print to stdout following the tags for each file"},
+ {1," parsed when --filter is enabled."},
+ {0," --format=level"},
+#if DEFAULT_FILE_FORMAT == 1
+ {0," Force output of specified tag file format [1]."},
+#else
+ {0," Force output of specified tag file format [2]."},
+#endif
+ {1," --help"},
+ {1," Print this option summary."},
+ {1," --if0=[yes|no]"},
+ {1," Should C code within #if 0 conditional branches be parsed [no]?"},
+ {1," --<LANG>-kinds=[+|-]kinds"},
+ {1," Enable/disable tag kinds for language <LANG>."},
+ {1," --langdef=name"},
+ {1," Define a new language to be parsed with regular expressions."},
+ {1," --langmap=map(s)"},
+ {1," Override default mapping of language to source file extension."},
+ {1," --language-force=language"},
+ {1," Force all files to be interpreted using specified language."},
+ {1," --languages=[+|-]list"},
+ {1," Restrict files scanned for tags to those mapped to langauges"},
+ {1," specified in the comma-separated 'list'. The list can contain any"},
+ {1," built-in or user-defined language [all]."},
+ {1," --license"},
+ {1," Print details of software license."},
+ {0," --line-directives=[yes|no]"},
+ {0," Should #line directives be processed [no]?"},
+ {1," --links=[yes|no]"},
+ {1," Indicate whether symbolic links should be followed [yes]."},
+ {1," --list-kinds=[language|all]"},
+ {1," Output a list of all tag kinds for specified language or all."},
+ {1," --list-languages"},
+ {1," Output list of supported languages."},
+ {1," --list-maps=[language|all]"},
+ {1," Output list of language mappings."},
+ {1," --options=file"},
+ {1," Specify file from which command line options should be read."},
+ {1," --recurse=[yes|no]"},
+#ifdef RECURSE_SUPPORTED
+ {1," Recurse into directories supplied on command line [no]."},
+#else
+ {1," Not supported on this platform."},
+#endif
+#ifdef HAVE_REGEX
+ {1," --regex-<LANG>=/line_pattern/name_pattern/[flags]"},
+ {1," Define regular expression for locating tags in specific language."},
+#endif
+ {0," --sort=[yes|no|foldcase]"},
+ {0," Should tags be sorted (optionally ignoring case) [yes]?."},
+ {0," --tag-relative=[yes|no]"},
+ {0," Should paths be relative to location of tag file [no; yes when -e]?"},
+ {1," --totals=[yes|no]"},
+ {1," Print statistics about source and tag files [no]."},
+ {1," --verbose=[yes|no]"},
+ {1," Enable verbose messages describing actions on each source file."},
+ {1," --version"},
+ {1," Print version identifier to standard output."},
+ {1, NULL}
+};
+
+static const char* const License1 =
+"This program is free software; you can redistribute it and/or\n"
+"modify it under the terms of the GNU General Public License\n"
+"as published by the Free Software Foundation; either version 2\n"
+"of the License, or (at your option) any later version.\n"
+"\n";
+static const char* const License2 =
+"This program is distributed in the hope that it will be useful,\n"
+"but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
+"MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n"
+"GNU General Public License for more details.\n"
+"\n"
+"You should have received a copy of the GNU General Public License\n"
+"along with this program; if not, write to the Free Software\n"
+"Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.\n";
+
+/* Contains a set of strings describing the set of "features" compiled into
+ * the code.
+ */
+static const char *const Features [] = {
+#ifdef WIN32
+ "win32",
+#endif
+#ifdef DJGPP
+ "msdos_32",
+#else
+# ifdef MSDOS
+ "msdos_16",
+# endif
+#endif
+#ifdef OS2
+ "os2",
+#endif
+#ifdef AMIGA
+ "amiga",
+#endif
+#ifdef VMS
+ "vms",
+#endif
+#ifdef HAVE_FNMATCH
+ "wildcards",
+#endif
+#ifdef HAVE_REGEX
+ "regex",
+#endif
+#ifndef EXTERNAL_SORT
+ "internal-sort",
+#endif
+#ifdef CUSTOM_CONFIGURATION_FILE
+ "custom-conf",
+#endif
+#if (defined (MSDOS) || defined (WIN32) || defined (OS2)) && defined (UNIX_PATH_SEPARATOR)
+ "unix-path-separator",
+#endif
+#ifdef DEBUG
+ "debug",
+#endif
+ NULL
+};
+
+/*
+* FUNCTION PROTOTYPES
+*/
+static boolean parseFileOptions (const char *const fileName);
+
+/*
+* FUNCTION DEFINITIONS
+*/
+
+extern void verbose (const char *const format, ...)
+{
+ if (Option.verbose)
+ {
+ va_list ap;
+ va_start (ap, format);
+ vprintf (format, ap);
+ va_end (ap);
+ }
+}
+
+static char *stringCopy (const char *const string)
+{
+ char* result = NULL;
+ if (string != NULL)
+ result = eStrdup (string);
+ return result;
+}
+
+static void freeString (char **const pString)
+{
+ if (*pString != NULL)
+ {
+ eFree (*pString);
+ *pString = NULL;
+ }
+}
+
+extern void freeList (stringList** const pList)
+{
+ if (*pList != NULL)
+ {
+ stringListDelete (*pList);
+ *pList = NULL;
+ }
+}
+
+extern void setDefaultTagFileName (void)
+{
+ if (Option.tagFileName != NULL)
+ ; /* accept given name */
+ else if (Option.etags)
+ Option.tagFileName = stringCopy (ETAGS_FILE);
+ else
+ Option.tagFileName = stringCopy (CTAGS_FILE);
+}
+
+extern boolean filesRequired (void)
+{
+ boolean result = FilesRequired;
+ if (Option.recurse)
+ result = FALSE;
+ return result;
+}
+
+extern void checkOptions (void)
+{
+ const char* notice;
+ if (Option.xref)
+ {
+ notice = "xref output";
+ if (Option.include.fileNames)
+ {
+ error (WARNING, "%s disables file name tags", notice);
+ Option.include.fileNames = FALSE;
+ }
+ }
+ if (Option.append)
+ {
+ notice = "append mode is not compatible with";
+ if (isDestinationStdout ())
+ error (FATAL, "%s tags to stdout", notice);
+ }
+ if (Option.filter)
+ {
+ notice = "filter mode";
+ if (Option.printTotals)
+ {
+ error (WARNING, "%s disables totals", notice);
+ Option.printTotals = FALSE;
+ }
+ if (Option.tagFileName != NULL)
+ error (WARNING, "%s ignores output tag file name", notice);
+ }
+}
+
+static void setEtagsMode (void)
+{
+ Option.etags = TRUE;
+ Option.sorted = SO_UNSORTED;
+ Option.lineDirectives = FALSE;
+ Option.tagRelative = TRUE;
+}
+
+extern void testEtagsInvocation (void)
+{
+ char* const execName = eStrdup (getExecutableName ());
+ char* const etags = eStrdup (ETAGS);
+#ifdef CASE_INSENSITIVE_FILENAMES
+ toLowerString (execName);
+ toLowerString (etags);
+#endif
+ if (strstr (execName, etags) != NULL)
+ {
+ verbose ("Running in etags mode\n");
+ setEtagsMode ();
+ }
+ eFree (execName);
+ eFree (etags);
+}
+
+/*
+ * Cooked argument parsing
+ */
+
+static void parseShortOption (cookedArgs *const args)
+{
+ args->simple [0] = *args->shortOptions++;
+ args->simple [1] = '\0';
+ args->item = args->simple;
+ if (! isCompoundOption (*args->simple))
+ args->parameter = "";
+ else if (*args->shortOptions == '\0')
+ {
+ argForth (args->args);
+ if (argOff (args->args))
+ args->parameter = NULL;
+ else
+ args->parameter = argItem (args->args);
+ args->shortOptions = NULL;
+ }
+ else
+ {
+ args->parameter = args->shortOptions;
+ args->shortOptions = NULL;
+ }
+}
+
+static void parseLongOption (cookedArgs *const args, const char *item)
+{
+ const char* const equal = strchr (item, '=');
+ if (equal == NULL)
+ {
+ args->item = eStrdup (item); /* FIXME: memory leak. */
+ args->parameter = "";
+ }
+ else
+ {
+ const size_t length = equal - item;
+ args->item = xMalloc (length + 1, char); /* FIXME: memory leak. */
+ strncpy (args->item, item, length);
+ args->item [length] = '\0';
+ args->parameter = equal + 1;
+ }
+ Assert (args->item != NULL);
+ Assert (args->parameter != NULL);
+}
+
+static void cArgRead (cookedArgs *const current)
+{
+ char* item;
+
+ Assert (current != NULL);
+ if (! argOff (current->args))
+ {
+ item = argItem (current->args);
+ current->shortOptions = NULL;
+ Assert (item != NULL);
+ if (strncmp (item, "--", (size_t) 2) == 0)
+ {
+ current->isOption = TRUE;
+ current->longOption = TRUE;
+ parseLongOption (current, item + 2);
+ Assert (current->item != NULL);
+ Assert (current->parameter != NULL);
+ }
+ else if (*item == '-')
+ {
+ current->isOption = TRUE;
+ current->longOption = FALSE;
+ current->shortOptions = item + 1;
+ parseShortOption (current);
+ }
+ else
+ {
+ current->isOption = FALSE;
+ current->longOption = FALSE;
+ current->item = item;
+ current->parameter = NULL;
+ }
+ }
+}
+
+extern cookedArgs* cArgNewFromString (const char* string)
+{
+ cookedArgs* const result = xMalloc (1, cookedArgs);
+ memset (result, 0, sizeof (cookedArgs));
+ result->args = argNewFromString (string);
+ cArgRead (result);
+ return result;
+}
+
+extern cookedArgs* cArgNewFromArgv (char* const* const argv)
+{
+ cookedArgs* const result = xMalloc (1, cookedArgs);
+ memset (result, 0, sizeof (cookedArgs));
+ result->args = argNewFromArgv (argv);
+ cArgRead (result);
+ return result;
+}
+
+extern cookedArgs* cArgNewFromFile (FILE* const fp)
+{
+ cookedArgs* const result = xMalloc (1, cookedArgs);
+ memset (result, 0, sizeof (cookedArgs));
+ result->args = argNewFromFile (fp);
+ cArgRead (result);
+ return result;
+}
+
+extern cookedArgs* cArgNewFromLineFile (FILE* const fp)
+{
+ cookedArgs* const result = xMalloc (1, cookedArgs);
+ memset (result, 0, sizeof (cookedArgs));
+ result->args = argNewFromLineFile (fp);
+ cArgRead (result);
+ return result;
+}
+
+extern void cArgDelete (cookedArgs* const current)
+{
+ Assert (current != NULL);
+ argDelete (current->args);
+ memset (current, 0, sizeof (cookedArgs));
+ eFree (current);
+}
+
+static boolean cArgOptionPending (cookedArgs* const current)
+{
+ boolean result = FALSE;
+ if (current->shortOptions != NULL)
+ if (*current->shortOptions != '\0')
+ result = TRUE;
+ return result;
+}
+
+extern boolean cArgOff (cookedArgs* const current)
+{
+ Assert (current != NULL);
+ return (boolean) (argOff (current->args) && ! cArgOptionPending (current));
+}
+
+extern boolean cArgIsOption (cookedArgs* const current)
+{
+ Assert (current != NULL);
+ return current->isOption;
+}
+
+extern const char* cArgItem (cookedArgs* const current)
+{
+ Assert (current != NULL);
+ return current->item;
+}
+
+extern void cArgForth (cookedArgs* const current)
+{
+ Assert (current != NULL);
+ Assert (! cArgOff (current));
+ if (cArgOptionPending (current))
+ parseShortOption (current);
+ else
+ {
+ Assert (! argOff (current->args));
+ argForth (current->args);
+ if (! argOff (current->args))
+ cArgRead (current);
+ else
+ {
+ current->isOption = FALSE;
+ current->longOption = FALSE;
+ current->shortOptions = NULL;
+ current->item = NULL;
+ current->parameter = NULL;
+ }
+ }
+}
+
+/*
+ * File extension and language mapping
+ */
+
+static void addExtensionList (
+ stringList *const slist, const char *const elist, const boolean clear)
+{
+ char *const extensionList = eStrdup (elist);
+ const char *extension = NULL;
+ boolean first = TRUE;
+
+ if (clear)
+ {
+ verbose (" clearing\n");
+ stringListClear (slist);
+ }
+ verbose (" adding: ");
+ if (elist != NULL && *elist != '\0')
+ {
+ extension = extensionList;
+ if (elist [0] == EXTENSION_SEPARATOR)
+ ++extension;
+ }
+ while (extension != NULL)
+ {
+ char *separator = strchr (extension, EXTENSION_SEPARATOR);
+ if (separator != NULL)
+ *separator = '\0';
+ verbose ("%s%s", first ? "" : ", ",
+ *extension == '\0' ? "(NONE)" : extension);
+ stringListAdd (slist, vStringNewInit (extension));
+ first = FALSE;
+ if (separator == NULL)
+ extension = NULL;
+ else
+ extension = separator + 1;
+ }
+ if (Option.verbose)
+ {
+ printf ("\n now: ");
+ stringListPrint (slist);
+ putchar ('\n');
+ }
+ eFree (extensionList);
+}
+
+static boolean isFalse (const char *parameter)
+{
+ return (boolean) (
+ strcasecmp (parameter, "0" ) == 0 ||
+ strcasecmp (parameter, "n" ) == 0 ||
+ strcasecmp (parameter, "no" ) == 0 ||
+ strcasecmp (parameter, "off") == 0);
+}
+
+static boolean isTrue (const char *parameter)
+{
+ return (boolean) (
+ strcasecmp (parameter, "1" ) == 0 ||
+ strcasecmp (parameter, "y" ) == 0 ||
+ strcasecmp (parameter, "yes") == 0 ||
+ strcasecmp (parameter, "on" ) == 0);
+}
+
+/* Determines whether the specified file name is considered to be a header
+ * file for the purposes of determining whether enclosed tags are global or
+ * static.
+ */
+extern boolean isIncludeFile (const char *const fileName)
+{
+ boolean result = FALSE;
+ const char *const extension = fileExtension (fileName);
+ if (Option.headerExt != NULL)
+ result = stringListExtensionMatched (Option.headerExt, extension);
+ return result;
+}
+
+/*
+ * Specific option processing
+ */
+
+static void processEtagsInclude (
+ const char *const option, const char *const parameter)
+{
+ if (! Option.etags)
+ error (FATAL, "Etags must be enabled to use \"%s\" option", option);
+ else
+ {
+ vString *const file = vStringNewInit (parameter);
+ if (Option.etagsInclude == NULL)
+ Option.etagsInclude = stringListNew ();
+ stringListAdd (Option.etagsInclude, file);
+ FilesRequired = FALSE;
+ }
+}
+
+static void processExcludeOption (
+ const char *const option __unused__, const char *const parameter)
+{
+ const char *const fileName = parameter + 1;
+ if (parameter [0] == '\0')
+ freeList (&Excluded);
+ else if (parameter [0] == '@')
+ {
+ stringList* const sl = stringListNewFromFile (fileName);
+ if (sl == NULL)
+ error (FATAL | PERROR, "cannot open \"%s\"", fileName);
+ if (Excluded == NULL)
+ Excluded = sl;
+ else
+ stringListCombine (Excluded, sl);
+ verbose (" adding exclude patterns from %s\n", fileName);
+ }
+ else
+ {
+ vString *const item = vStringNewInit (parameter);
+ if (Excluded == NULL)
+ Excluded = stringListNew ();
+ stringListAdd (Excluded, item);
+ verbose (" adding exclude pattern: %s\n", parameter);
+ }
+}
+
+extern boolean isExcludedFile (const char* const name)
+{
+ const char* base = baseFilename (name);
+ boolean result = FALSE;
+ if (Excluded != NULL)
+ {
+ result = stringListFileMatched (Excluded, base);
+ if (! result && name != base)
+ result = stringListFileMatched (Excluded, name);
+ }
+#ifdef AMIGA
+ /* not a good solution, but the only one which works often */
+ if (! result)
+ result = (boolean) (strcmp (name, TagFile.name) == 0);
+#endif
+ return result;
+}
+
+static void processExcmdOption (
+ const char *const option, const char *const parameter)
+{
+ switch (*parameter)
+ {
+ case 'm': Option.locate = EX_MIX; break;
+ case 'n': Option.locate = EX_LINENUM; break;
+ case 'p': Option.locate = EX_PATTERN; break;
+ default:
+ error (FATAL, "Invalid value for \"%s\" option", option);
+ break;
+ }
+}
+
+static void processExtraTagsOption (
+ const char *const option, const char *const parameter)
+{
+ struct sInclude *const inc = &Option.include;
+ const char *p = parameter;
+ boolean mode = TRUE;
+ int c;
+
+ if (*p != '+' && *p != '-')
+ {
+ inc->fileNames = FALSE;
+ inc->qualifiedTags = FALSE;
+#if 0
+ inc->fileScope = FALSE;
+#endif
+ }
+ while ((c = *p++) != '\0') switch (c)
+ {
+ case '+': mode = TRUE; break;
+ case '-': mode = FALSE; break;
+
+ case 'f': inc->fileNames = mode; break;
+ case 'q': inc->qualifiedTags = mode; break;
+#if 0
+ case 'F': inc->fileScope = mode; break;
+#endif
+
+ default: error(WARNING, "Unsupported parameter '%c' for \"%s\" option",
+ c, option);
+ break;
+ }
+}
+
+static void processFieldsOption (
+ const char *const option, const char *const parameter)
+{
+ struct sExtFields *field = &Option.extensionFields;
+ const char *p = parameter;
+ boolean mode = TRUE;
+ int c;
+
+ if (*p != '+' && *p != '-')
+ {
+ field->access = FALSE;
+ field->fileScope = FALSE;
+ field->implementation = FALSE;
+ field->inheritance = FALSE;
+ field->kind = FALSE;
+ field->kindKey = FALSE;
+ field->kindLong = FALSE;
+ field->language = FALSE;
+ field->scope = FALSE;
+ field->typeRef = FALSE;
+ }
+ while ((c = *p++) != '\0') switch (c)
+ {
+ case '+': mode = TRUE; break;
+ case '-': mode = FALSE; break;
+
+ case 'a': field->access = mode; break;
+ case 'f': field->fileScope = mode; break;
+ case 'm': field->implementation = mode; break;
+ case 'i': field->inheritance = mode; break;
+ case 'k': field->kind = mode; break;
+ case 'K': field->kindLong = mode; break;
+ case 'l': field->language = mode; break;
+ case 'n': field->lineNumber = mode; break;
+ case 's': field->scope = mode; break;
+ case 'S': field->signature = mode; break;
+ case 'z': field->kindKey = mode; break;
+ case 't': field->typeRef = mode; break;
+
+ default: error(WARNING, "Unsupported parameter '%c' for \"%s\" option",
+ c, option);
+ break;
+ }
+}
+
+static void processFilterTerminatorOption (
+ const char *const option __unused__, const char *const parameter)
+{
+ freeString (&Option.filterTerminator);
+ Option.filterTerminator = stringCopy (parameter);
+}
+
+static void processFormatOption (
+ const char *const option, const char *const parameter)
+{
+ unsigned int format;
+
+ if (sscanf (parameter, "%u", &format) < 1)
+ error (FATAL, "Invalid value for \"%s\" option",option);
+ else if (format <= (unsigned int) MaxSupportedTagFormat)
+ Option.tagFileFormat = format;
+ else
+ error (FATAL, "Unsupported value for \"%s\" option", option);
+}
+
+static void printInvocationDescription (void)
+{
+ printf (INVOCATION, getExecutableName ());
+}
+
+static void printOptionDescriptions (const optionDescription *const optDesc)
+{
+ int i;
+ for (i = 0 ; optDesc [i].description != NULL ; ++i)
+ {
+ if (! Option.etags || optDesc [i].usedByEtags)
+ puts (optDesc [i].description);
+ }
+}
+
+static void printFeatureList (void)
+{
+ int i;
+
+ for (i = 0 ; Features [i] != NULL ; ++i)
+ {
+ if (i == 0)
+ printf (" Optional compiled features: ");
+ printf ("%s+%s", (i>0 ? ", " : ""), Features [i]);
+#ifdef CUSTOM_CONFIGURATION_FILE
+ if (strcmp (Features [i], "custom-conf") == 0)
+ printf ("=%s", CUSTOM_CONFIGURATION_FILE);
+#endif
+ }
+ if (i > 0)
+ putchar ('\n');
+}
+
+static void printProgramIdentification (void)
+{
+ printf ("%s %s, %s %s\n",
+ PROGRAM_NAME, PROGRAM_VERSION,
+ PROGRAM_COPYRIGHT, AUTHOR_NAME);
+ printf (" Compiled: %s, %s\n", __DATE__, __TIME__);
+ printf (" Addresses: <%s>, %s\n", AUTHOR_EMAIL, PROGRAM_URL);
+ printFeatureList ();
+}
+
+static void processHelpOption (
+ const char *const option __unused__,
+ const char *const parameter __unused__)
+{
+ printProgramIdentification ();
+ putchar ('\n');
+ printInvocationDescription ();
+ putchar ('\n');
+ printOptionDescriptions (LongOptionDescription);
+ exit (0);
+}
+
+static void processLanguageForceOption (
+ const char *const option, const char *const parameter)
+{
+ langType language;
+ if (strcasecmp (parameter, "auto") == 0)
+ language = LANG_AUTO;
+ else
+ language = getNamedLanguage (parameter);
+
+ if (strcmp (option, "lang") == 0 || strcmp (option, "language") == 0)
+ error (WARNING,
+ "\"--%s\" option is obsolete; use \"--language-force\" instead",
+ option);
+ if (language == LANG_IGNORE)
+ error (FATAL, "Unknown language \"%s\" in \"%s\" option", parameter, option);
+ else
+ Option.language = language;
+}
+static char* skipPastMap (char* p)
+{
+ while (*p != EXTENSION_SEPARATOR &&
+ *p != PATTERN_START && *p != ',' && *p != '\0')
+ ++p;
+ return p;
+}
+
+/* Parses the mapping beginning at `map', adds it to the language map, and
+ * returns first character past the map.
+ */
+static char* addLanguageMap (const langType language, char* map)
+{
+ char* p = NULL;
+ const char first = *map;
+ if (first == EXTENSION_SEPARATOR) /* extension map */
+ {
+ ++map;
+ p = skipPastMap (map);
+ if (*p == '\0')
+ {
+ verbose (" .%s", map);
+ addLanguageExtensionMap (language, map);
+ p = map + strlen (map);
+ }
+ else
+ {
+ const char separator = *p;
+ *p = '\0';
+ verbose (" .%s", map);
+ addLanguageExtensionMap (language, map);
+ *p = separator;
+ }
+ }
+ else if (first == PATTERN_START) /* pattern map */
+ {
+ ++map;
+ for (p = map ; *p != PATTERN_STOP && *p != '\0' ; ++p)
+ {
+ if (*p == '\\' && *(p + 1) == PATTERN_STOP)
+ ++p;
+ }
+ if (*p == '\0')
+ error (FATAL, "Unterminated file name pattern for %s language",
+ getLanguageName (language));
+ else
+ {
+ *p++ = '\0';
+ verbose (" (%s)", map);
+ addLanguagePatternMap (language, map);
+ }
+ }
+ else
+ error (FATAL, "Badly formed language map for %s language",
+ getLanguageName (language));
+ return p;
+}
+
+static char* processLanguageMap (char* map)
+{
+ char* const separator = strchr (map, ':');
+ char* result = NULL;
+ if (separator != NULL)
+ {
+ langType language;
+ char *list = separator + 1;
+ boolean clear = FALSE;
+ *separator = '\0';
+ language = getNamedLanguage (map);
+ if (language != LANG_IGNORE)
+ {
+ const char *const deflt = "default";
+ char* p;
+ if (*list == '+')
+ ++list;
+ else
+ clear = TRUE;
+ for (p = list ; *p != ',' && *p != '\0' ; ++p) /*no-op*/ ;
+ if ((size_t) (p - list) == strlen (deflt) &&
+ strncasecmp (list, deflt, p - list) == 0)
+ {
+ verbose (" Restoring default %s language map: ", getLanguageName (language));
+ installLanguageMapDefault (language);
+ list = p;
+ }
+ else
+ {
+ if (clear)
+ {
+ verbose (" Setting %s language map:", getLanguageName (language));
+ clearLanguageMap (language);
+ }
+ else
+ verbose (" Adding to %s language map:", getLanguageName (language));
+ while (list != NULL && *list != '\0' && *list != ',')
+ list = addLanguageMap (language, list);
+ verbose ("\n");
+ }
+ if (list != NULL && *list == ',')
+ ++list;
+ result = list;
+ }
+ }
+ return result;
+}
+
+static void processLanguageMapOption (
+ const char *const option, const char *const parameter)
+{
+ char *const maps = eStrdup (parameter);
+ char *map = maps;
+
+ if (strcmp (parameter, "default") == 0)
+ {
+ verbose (" Restoring default language maps:\n");
+ installLanguageMapDefaults ();
+ }
+ else while (map != NULL && *map != '\0')
+ {
+ char* const next = processLanguageMap (map);
+ if (next == NULL)
+ error (WARNING, "Unknown language \"%s\" in \"%s\" option", parameter, option);
+ map = next;
+ }
+ eFree (maps);
+}
+
+static void processLanguagesOption (
+ const char *const option, const char *const parameter)
+{
+ char *const langs = eStrdup (parameter);
+ enum { Add, Remove, Replace } mode = Replace;
+ boolean first = TRUE;
+ char *lang = langs;
+ const char* prefix = "";
+ verbose (" Enabled languages: ");
+ while (lang != NULL)
+ {
+ char *const end = strchr (lang, ',');
+ if (lang [0] == '+')
+ {
+ ++lang;
+ mode = Add;
+ prefix = "+ ";
+ }
+ else if (lang [0] == '-')
+ {
+ ++lang;
+ mode = Remove;
+ prefix = "- ";
+ }
+ if (mode == Replace)
+ enableLanguages (FALSE);
+ if (end != NULL)
+ *end = '\0';
+ if (lang [0] != '\0')
+ {
+ if (strcmp (lang, "all") == 0)
+ enableLanguages ((boolean) (mode != Remove));
+ else
+ {
+ const langType language = getNamedLanguage (lang);
+ if (language == LANG_IGNORE)
+ error (WARNING, "Unknown language \"%s\" in \"%s\" option", lang, option);
+ else
+ enableLanguage (language, (boolean) (mode != Remove));
+ }
+ verbose ("%s%s%s", (first ? "" : ", "), prefix, lang);
+ prefix = "";
+ first = FALSE;
+ if (mode == Replace)
+ mode = Add;
+ }
+ lang = (end != NULL ? end + 1 : NULL);
+ }
+ verbose ("\n");
+ eFree (langs);
+}
+
+static void processLicenseOption (
+ const char *const option __unused__,
+ const char *const parameter __unused__)
+{
+ printProgramIdentification ();
+ puts ("");
+ puts (License1);
+ puts (License2);
+ exit (0);
+}
+
+static void processListKindsOption (
+ const char *const option, const char *const parameter)
+{
+ if (parameter [0] == '\0' || strcasecmp (parameter, "all") == 0)
+ printLanguageKinds (LANG_AUTO);
+ else
+ {
+ langType language = getNamedLanguage (parameter);
+ if (language == LANG_IGNORE)
+ error (FATAL, "Unknown language \"%s\" in \"%s\" option", parameter, option);
+ else
+ printLanguageKinds (language);
+ }
+ exit (0);
+}
+
+static void processListMapsOption (
+ const char *const __unused__ option,
+ const char *const __unused__ parameter)
+{
+ if (parameter [0] == '\0' || strcasecmp (parameter, "all") == 0)
+ printLanguageMaps (LANG_AUTO);
+ else
+ {
+ langType language = getNamedLanguage (parameter);
+ if (language == LANG_IGNORE)
+ error (FATAL, "Unknown language \"%s\" in \"%s\" option", parameter, option);
+ else
+ printLanguageMaps (language);
+ }
+ exit (0);
+}
+
+static void processListLanguagesOption (
+ const char *const option __unused__,
+ const char *const parameter __unused__)
+{
+ printLanguageList ();
+ exit (0);
+}
+
+static void processOptionFile (
+ const char *const option, const char *const parameter)
+{
+ if (parameter [0] == '\0')
+ error (WARNING, "no option file supplied for \"%s\"", option);
+ else if (! parseFileOptions (parameter))
+ error (FATAL | PERROR, "cannot open option file \"%s\"", parameter);
+}
+
+static void processSortOption (
+ const char *const option, const char *const parameter)
+{
+ if (isFalse (parameter))
+ Option.sorted = SO_UNSORTED;
+ else if (isTrue (parameter))
+ Option.sorted = SO_SORTED;
+ else if (strcasecmp (parameter, "f") == 0 ||
+ strcasecmp (parameter, "fold") == 0 ||
+ strcasecmp (parameter, "foldcase") == 0)
+ Option.sorted = SO_FOLDSORTED;
+ else
+ error (FATAL, "Invalid value for \"%s\" option", option);
+}
+
+static void installHeaderListDefaults (void)
+{
+ Option.headerExt = stringListNewFromArgv (HeaderExtensions);
+ if (Option.verbose)
+ {
+ printf (" Setting default header extensions: ");
+ stringListPrint (Option.headerExt);
+ putchar ('\n');
+ }
+}
+
+static void processHeaderListOption (const int option, const char *parameter)
+{
+ /* Check to make sure that the user did not enter "ctags -h *.c"
+ * by testing to see if the list is a filename that exists.
+ */
+ if (doesFileExist (parameter))
+ error (FATAL, "-%c: Invalid list", option);
+ if (strcmp (parameter, "default") == 0)
+ installHeaderListDefaults ();
+ else
+ {
+ boolean clear = TRUE;
+
+ if (parameter [0] == '+')
+ {
+ ++parameter;
+ clear = FALSE;
+ }
+ if (Option.headerExt == NULL)
+ Option.headerExt = stringListNew ();
+ verbose (" Header Extensions:\n");
+ addExtensionList (Option.headerExt, parameter, clear);
+ }
+}
+
+/*
+ * Token ignore processing
+ */
+
+/* Determines whether or not "name" should be ignored, per the ignore list.
+ */
+extern boolean isIgnoreToken (
+ const char *const name, boolean *const pIgnoreParens,
+ const char **const replacement)
+{
+ boolean result = FALSE;
+
+ if (Option.ignore != NULL)
+ {
+ const size_t nameLen = strlen (name);
+ unsigned int i;
+
+ if (pIgnoreParens != NULL)
+ *pIgnoreParens = FALSE;
+
+ for (i = 0 ; i < stringListCount (Option.ignore) ; ++i)
+ {
+ vString *token = stringListItem (Option.ignore, i);
+
+ if (strncmp (vStringValue (token), name, nameLen) == 0)
+ {
+ const size_t tokenLen = vStringLength (token);
+
+ if (nameLen == tokenLen)
+ {
+ result = TRUE;
+ break;
+ }
+ else if (tokenLen == nameLen + 1 &&
+ vStringChar (token, tokenLen - 1) == '+')
+ {
+ result = TRUE;
+ if (pIgnoreParens != NULL)
+ *pIgnoreParens = TRUE;
+ break;
+ }
+ else if (vStringChar (token, nameLen) == '=')
+ {
+ if (replacement != NULL)
+ *replacement = vStringValue (token) + nameLen + 1;
+ break;
+ }
+ }
+ }
+ }
+ return result;
+}
+
+static void saveIgnoreToken (vString *const ignoreToken)
+{
+ if (Option.ignore == NULL)
+ Option.ignore = stringListNew ();
+ stringListAdd (Option.ignore, ignoreToken);
+ verbose (" ignore token: %s\n", vStringValue (ignoreToken));
+}
+
+static void readIgnoreList (const char *const list)
+{
+ char* newList = stringCopy (list);
+ const char *token = strtok (newList, IGNORE_SEPARATORS);
+
+ while (token != NULL)
+ {
+ vString *const entry = vStringNewInit (token);
+
+ saveIgnoreToken (entry);
+ token = strtok (NULL, IGNORE_SEPARATORS);
+ }
+ eFree (newList);
+}
+
+static void addIgnoreListFromFile (const char *const fileName)
+{
+ stringList* tokens = stringListNewFromFile (fileName);
+ if (tokens == NULL)
+ error (FATAL | PERROR, "cannot open \"%s\"", fileName);
+ if (Option.ignore == NULL)
+ Option.ignore = tokens;
+ else
+ stringListCombine (Option.ignore, tokens);
+}
+
+static void processIgnoreOption (const char *const list)
+{
+ if (strchr ("@./\\", list [0]) != NULL)
+ {
+ const char* fileName = (*list == '@') ? list + 1 : list;
+ addIgnoreListFromFile (fileName);
+ }
+#if defined (MSDOS) || defined (WIN32) || defined (OS2)
+ else if (isalpha (list [0]) && list [1] == ':')
+ addIgnoreListFromFile (list);
+#endif
+ else if (strcmp (list, "-") == 0)
+ {
+ freeList (&Option.ignore);
+ verbose (" clearing list\n");
+ }
+ else
+ readIgnoreList (list);
+}
+
+static void processVersionOption (
+ const char *const option __unused__,
+ const char *const parameter __unused__)
+{
+ printProgramIdentification ();
+ exit (0);
+}
+
+/*
+ * Option tables
+ */
+
+static parametricOption ParametricOptions [] = {
+ { "etags-include", processEtagsInclude, FALSE },
+ { "exclude", processExcludeOption, FALSE },
+ { "excmd", processExcmdOption, FALSE },
+ { "extra", processExtraTagsOption, FALSE },
+ { "fields", processFieldsOption, FALSE },
+ { "filter-terminator", processFilterTerminatorOption, TRUE },
+ { "format", processFormatOption, TRUE },
+ { "help", processHelpOption, TRUE },
+ { "lang", processLanguageForceOption, FALSE },
+ { "language", processLanguageForceOption, FALSE },
+ { "language-force", processLanguageForceOption, FALSE },
+ { "languages", processLanguagesOption, FALSE },
+ { "langdef", processLanguageDefineOption, FALSE },
+ { "langmap", processLanguageMapOption, FALSE },
+ { "license", processLicenseOption, TRUE },
+ { "list-kinds", processListKindsOption, TRUE },
+ { "list-maps", processListMapsOption, TRUE },
+ { "list-languages", processListLanguagesOption, TRUE },
+ { "options", processOptionFile, FALSE },
+ { "sort", processSortOption, TRUE },
+ { "version", processVersionOption, TRUE },
+};
+
+static booleanOption BooleanOptions [] = {
+ { "append", &Option.append, TRUE },
+ { "file-scope", &Option.include.fileScope, FALSE },
+ { "file-tags", &Option.include.fileNames, FALSE },
+ { "filter", &Option.filter, TRUE },
+ { "if0", &Option.if0, FALSE },
+ { "kind-long", &Option.kindLong, TRUE },
+ { "line-directives",&Option.lineDirectives, FALSE },
+ { "links", &Option.followLinks, FALSE },
+#ifdef RECURSE_SUPPORTED
+ { "recurse", &Option.recurse, FALSE },
+#endif
+ { "tag-relative", &Option.tagRelative, TRUE },
+ { "totals", &Option.printTotals, TRUE },
+ { "verbose", &Option.verbose, FALSE },
+};
+
+/*
+ * Generic option parsing
+ */
+
+static void checkOptionOrder (const char* const option)
+{
+ if (NonOptionEncountered)
+ error (FATAL, "-%s option may not follow a file name", option);
+}
+
+static boolean processParametricOption (
+ const char *const option, const char *const parameter)
+{
+ const int count = sizeof (ParametricOptions) / sizeof (parametricOption);
+ boolean found = FALSE;
+ int i;
+
+ for (i = 0 ; i < count && ! found ; ++i)
+ {
+ parametricOption* const entry = &ParametricOptions [i];
+ if (strcmp (option, entry->name) == 0)
+ {
+ found = TRUE;
+ if (entry->initOnly)
+ checkOptionOrder (option);
+ (entry->handler) (option, parameter);
+ }
+ }
+ return found;
+}
+
+static boolean getBooleanOption (
+ const char *const option, const char *const parameter)
+{
+ boolean selection = TRUE;
+
+ if (parameter [0] == '\0')
+ selection = TRUE;
+ else if (isFalse (parameter))
+ selection = FALSE;
+ else if (isTrue (parameter))
+ selection = TRUE;
+ else
+ error (FATAL, "Invalid value for \"%s\" option", option);
+
+ return selection;
+}
+
+static boolean processBooleanOption (
+ const char *const option, const char *const parameter)
+{
+ const int count = sizeof (BooleanOptions) / sizeof (booleanOption);
+ boolean found = FALSE;
+ int i;
+
+ for (i = 0 ; i < count && ! found ; ++i)
+ {
+ booleanOption* const entry = &BooleanOptions [i];
+ if (strcmp (option, entry->name) == 0)
+ {
+ found = TRUE;
+ if (entry->initOnly)
+ checkOptionOrder (option);
+ *entry->pValue = getBooleanOption (option, parameter);
+ }
+ }
+ return found;
+}
+
+static void processLongOption (
+ const char *const option, const char *const parameter)
+{
+ Assert (parameter != NULL);
+ if (parameter == NULL && parameter [0] == '\0')
+ verbose (" Option: --%s\n", option);
+ else
+ verbose (" Option: --%s=%s\n", option, parameter);
+
+ if (processBooleanOption (option, parameter))
+ ;
+ else if (processParametricOption (option, parameter))
+ ;
+ else if (processKindOption (option, parameter))
+ ;
+ else if (processRegexOption (option, parameter))
+ ;
+#ifndef RECURSE_SUPPORTED
+ else if (strcmp (option, "recurse") == 0)
+ error (WARNING, "%s option not supported on this host", option);
+#endif
+ else
+ error (FATAL, "Unknown option: --%s", option);
+}
+
+static void processShortOption (
+ const char *const option, const char *const parameter)
+{
+ if (parameter == NULL || parameter [0] == '\0')
+ verbose (" Option: -%s\n", option);
+ else
+ verbose (" Option: -%s %s\n", option, parameter);
+
+ if (isCompoundOption (*option) && (parameter == NULL || parameter [0] == '\0'))
+ error (FATAL, "Missing parameter for \"%s\" option", option);
+ else switch (*option)
+ {
+ case '?':
+ processHelpOption ("?", NULL);
+ exit (0);
+ break;
+ case 'a':
+ checkOptionOrder (option);
+ Option.append = TRUE;
+ break;
+#ifdef DEBUG
+ case 'b':
+ if (atol (parameter) < 0)
+ error (FATAL, "-%s: Invalid line number", option);
+ Option.breakLine = atol (parameter);
+ break;
+ case 'D':
+ Option.debugLevel = strtol (parameter, NULL, 0);
+ if (debug (DEBUG_STATUS))
+ Option.verbose = TRUE;
+ break;
+#endif
+ case 'B':
+ Option.backward = TRUE;
+ break;
+ case 'e':
+ checkOptionOrder (option);
+ setEtagsMode ();
+ break;
+ case 'f':
+ case 'o':
+ checkOptionOrder (option);
+ if (Option.tagFileName != NULL)
+ {
+ error (WARNING,
+ "-%s option specified more than once, last value used",
+ option);
+ freeString (&Option.tagFileName);
+ }
+ else if (parameter [0] == '-' && parameter [1] != '\0')
+ error (FATAL, "output file name may not begin with a '-'");
+ Option.tagFileName = stringCopy (parameter);
+ break;
+ case 'F':
+ Option.backward = FALSE;
+ break;
+ case 'h':
+ processHeaderListOption (*option, parameter);
+ break;
+ case 'I':
+ processIgnoreOption (parameter);
+ break;
+ case 'L':
+ if (Option.fileList != NULL)
+ {
+ error (WARNING,
+ "-%s option specified more than once, last value used",
+ option);
+ freeString (&Option.fileList);
+ }
+ Option.fileList = stringCopy (parameter);
+ break;
+ case 'n':
+ Option.locate = EX_LINENUM;
+ break;
+ case 'N':
+ Option.locate = EX_PATTERN;
+ break;
+ case 'R':
+#ifdef RECURSE_SUPPORTED
+ Option.recurse = TRUE;
+#else
+ error (WARNING, "-%s option not supported on this host", option);
+#endif
+ break;
+ case 'u':
+ checkOptionOrder (option);
+ Option.sorted = SO_UNSORTED;
+ break;
+ case 'V':
+ Option.verbose = TRUE;
+ break;
+ case 'w':
+ /* silently ignored */
+ break;
+ case 'x':
+ checkOptionOrder (option);
+ Option.xref = TRUE;
+ break;
+ default:
+ error (FATAL, "Unknown option: -%s", option);
+ break;
+ }
+}
+
+extern void parseOption (cookedArgs* const args)
+{
+ Assert (! cArgOff (args));
+ if (args->isOption)
+ {
+ if (args->longOption)
+ processLongOption (args->item, args->parameter);
+ else
+ {
+ const char *parameter = args->parameter;
+ while (*parameter == ' ')
+ ++parameter;
+ processShortOption (args->item, parameter);
+ }
+ cArgForth (args);
+ }
+}
+
+extern void parseOptions (cookedArgs* const args)
+{
+ NonOptionEncountered = FALSE;
+ while (! cArgOff (args) && cArgIsOption (args))
+ parseOption (args);
+ if (! cArgOff (args) && ! cArgIsOption (args))
+ NonOptionEncountered = TRUE;
+}
+
+static const char *CheckFile;
+static boolean checkSameFile (const char *const fileName)
+{
+ return isSameFile (CheckFile, fileName);
+}
+
+static boolean parseFileOptions (const char* const fileName)
+{
+ boolean fileFound = FALSE;
+ const char* const format = "Considering option file %s: %s\n";
+ CheckFile = fileName;
+ if (stringListHasTest (OptionFiles, checkSameFile))
+ verbose (format, fileName, "already considered");
+ else
+ {
+ FILE* const fp = fopen (fileName, "r");
+ if (fp == NULL)
+ verbose (format, fileName, "not found");
+ else
+ {
+ cookedArgs* const args = cArgNewFromLineFile (fp);
+ vString* file = vStringNewInit (fileName);
+ stringListAdd (OptionFiles, file);
+ verbose (format, fileName, "reading...");
+ parseOptions (args);
+ if (NonOptionEncountered)
+ error (WARNING, "Ignoring non-option in %s\n", fileName);
+ cArgDelete (args);
+ fclose (fp);
+ fileFound = TRUE;
+ }
+ }
+ return fileFound;
+}
+
+/* Actions to be taken before reading any other options */
+extern void previewFirstOption (cookedArgs* const args)
+{
+ while (cArgIsOption (args))
+ {
+ if (strcmp (args->item, "V") == 0 || strcmp (args->item, "verbose") == 0)
+ parseOption (args);
+ else if (strcmp (args->item, "options") == 0 &&
+ strcmp (args->parameter, "NONE") == 0)
+ {
+ fprintf (stderr, "No options will be read from files or environment\n");
+ SkipConfiguration = TRUE;
+ cArgForth (args);
+ }
+ else
+ break;
+ }
+}
+
+static void parseConfigurationFileOptionsInDirectoryWithLeafname (const char* directory, const char* leafname)
+{
+ vString* const pathname = combinePathAndFile (directory, leafname);
+ parseFileOptions (vStringValue (pathname));
+ vStringDelete (pathname);
+}
+
+static void parseConfigurationFileOptionsInDirectory (const char* directory)
+{
+ parseConfigurationFileOptionsInDirectoryWithLeafname (directory, ".ctags");
+#ifdef MSDOS_STYLE_PATH
+ parseConfigurationFileOptionsInDirectoryWithLeafname (directory, "ctags.cnf");
+#endif
+}
+
+static void parseConfigurationFileOptions (void)
+{
+ /* We parse .ctags on all systems, and additionally ctags.cnf on DOS. */
+ const char* const home = getenv ("HOME");
+#ifdef CUSTOM_CONFIGURATION_FILE
+ parseFileOptions (CUSTOM_CONFIGURATION_FILE);
+#endif
+#ifdef MSDOS_STYLE_PATH
+ parseFileOptions ("/ctags.cnf");
+#endif
+ parseFileOptions ("/etc/ctags.conf");
+ parseFileOptions ("/usr/local/etc/ctags.conf");
+ if (home != NULL)
+ {
+ parseConfigurationFileOptionsInDirectory (home);
+ }
+ else
+ {
+#ifdef MSDOS_STYLE_PATH
+ /*
+ * Windows users don't usually set HOME.
+ * The OS sets HOMEDRIVE and HOMEPATH for them.
+ */
+ const char* homeDrive = getenv ("HOMEDRIVE");
+ const char* homePath = getenv ("HOMEPATH");
+ if (homeDrive != NULL && homePath != NULL)
+ {
+ vString* const windowsHome = vStringNew ();
+ vStringCatS (windowsHome, homeDrive);
+ vStringCatS (windowsHome, homePath);
+ parseConfigurationFileOptionsInDirectory (vStringValue (windowsHome));
+ vStringDelete (windowsHome);
+ }
+#endif
+ }
+ parseConfigurationFileOptionsInDirectory (".");
+}
+
+static void parseEnvironmentOptions (void)
+{
+ const char *envOptions = NULL;
+ const char* var = NULL;
+
+ if (Option.etags)
+ {
+ var = ETAGS_ENVIRONMENT;
+ envOptions = getenv (var);
+ }
+ if (envOptions == NULL)
+ {
+ var = CTAGS_ENVIRONMENT;
+ envOptions = getenv (var);
+ }
+ if (envOptions != NULL && envOptions [0] != '\0')
+ {
+ cookedArgs* const args = cArgNewFromString (envOptions);
+ verbose ("Reading options from $CTAGS\n");
+ parseOptions (args);
+ cArgDelete (args);
+ if (NonOptionEncountered)
+ error (WARNING, "Ignoring non-option in %s variable", var);
+ }
+}
+
+extern void readOptionConfiguration (void)
+{
+ if (! SkipConfiguration)
+ {
+ parseConfigurationFileOptions ();
+ parseEnvironmentOptions ();
+ }
+}
+
+/*
+* Option initialization
+*/
+
+extern void initOptions (void)
+{
+ OptionFiles = stringListNew ();
+ verbose ("Setting option defaults\n");
+ installHeaderListDefaults ();
+ verbose (" Installing default language mappings:\n");
+ installLanguageMapDefaults ();
+
+ /* always excluded by default */
+ verbose (" Installing default exclude patterns:\n");
+ processExcludeOption (NULL, "{arch}");
+ processExcludeOption (NULL, ".arch-ids");
+ processExcludeOption (NULL, ".arch-inventory");
+ processExcludeOption (NULL, "autom4te.cache");
+ processExcludeOption (NULL, "BitKeeper");
+ processExcludeOption (NULL, ".bzr");
+ processExcludeOption (NULL, ".bzrignore");
+ processExcludeOption (NULL, "CVS");
+ processExcludeOption (NULL, ".cvsignore");
+ processExcludeOption (NULL, "_darcs");
+ processExcludeOption (NULL, ".deps");
+ processExcludeOption (NULL, "EIFGEN");
+ processExcludeOption (NULL, ".git");
+ processExcludeOption (NULL, ".hg");
+ processExcludeOption (NULL, "PENDING");
+ processExcludeOption (NULL, "RCS");
+ processExcludeOption (NULL, "RESYNC");
+ processExcludeOption (NULL, "SCCS");
+ processExcludeOption (NULL, ".svn");
+}
+
+extern void freeOptionResources (void)
+{
+ freeString (&Option.tagFileName);
+ freeString (&Option.fileList);
+ freeString (&Option.filterTerminator);
+
+ freeList (&Excluded);
+ freeList (&Option.ignore);
+ freeList (&Option.headerExt);
+ freeList (&Option.etagsInclude);
+ freeList (&OptionFiles);
+}
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/options.h b/options.h
new file mode 100644
index 0000000..34150e7
--- /dev/null
+++ b/options.h
@@ -0,0 +1,154 @@
+/*
+* $Id: options.h 443 2006-05-30 04:37:13Z darren $
+*
+* Copyright (c) 1998-2003, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* Defines external interface to option processing.
+*/
+#ifndef _OPTIONS_H
+#define _OPTIONS_H
+
+#if defined(OPTION_WRITE) || defined(VAXC)
+# define CONST_OPTION
+#else
+# define CONST_OPTION const
+#endif
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include <stdarg.h>
+
+#include "args.h"
+#include "parse.h"
+#include "strlist.h"
+#include "vstring.h"
+
+/*
+* DATA DECLARATIONS
+*/
+
+typedef enum { OPTION_NONE, OPTION_SHORT, OPTION_LONG } optionType;
+
+typedef struct sCookedArgs {
+ /* private */
+ Arguments* args;
+ char *shortOptions;
+ char simple[2];
+ boolean isOption;
+ boolean longOption;
+ const char* parameter;
+ /* public */
+ char* item;
+} cookedArgs;
+
+typedef enum eLocate {
+ EX_MIX, /* line numbers for defines, patterns otherwise */
+ EX_LINENUM, /* -n only line numbers in tag file */
+ EX_PATTERN /* -N only patterns in tag file */
+} exCmd;
+
+typedef enum sortType {
+ SO_UNSORTED,
+ SO_SORTED,
+ SO_FOLDSORTED
+} sortType;
+
+struct sInclude {
+ boolean fileNames; /* include tags for source file names */
+ boolean qualifiedTags; /* include tags for qualified class members */
+ boolean fileScope; /* include tags of file scope only */
+};
+
+struct sExtFields { /* extension field content control */
+ boolean access;
+ boolean fileScope;
+ boolean implementation;
+ boolean inheritance;
+ boolean kind;
+ boolean kindKey;
+ boolean kindLong;
+ boolean language;
+ boolean lineNumber;
+ boolean scope;
+ boolean signature;
+ boolean typeRef;
+};
+
+/* This stores the command line options.
+ */
+typedef struct sOptionValues {
+ struct sInclude include;/* --extra extra tag inclusion */
+ struct sExtFields extensionFields;/* --fields extension field control */
+ stringList* ignore; /* -I name of file containing tokens to ignore */
+ boolean append; /* -a append to "tags" file */
+ boolean backward; /* -B regexp patterns search backwards */
+ boolean etags; /* -e output Emacs style tags file */
+ exCmd locate; /* --excmd EX command used to locate tag */
+ boolean recurse; /* -R recurse into directories */
+ sortType sorted; /* -u,--sort sort tags */
+ boolean verbose; /* -V verbose */
+ boolean xref; /* -x generate xref output instead */
+ char *fileList; /* -L name of file containing names of files */
+ char *tagFileName; /* -o name of tags file */
+ stringList* headerExt; /* -h header extensions */
+ stringList* etagsInclude;/* --etags-include list of TAGS files to include*/
+ unsigned int tagFileFormat;/* --format tag file format (level) */
+ boolean if0; /* --if0 examine code within "#if 0" branch */
+ boolean kindLong; /* --kind-long */
+ langType language; /* --lang specified language override */
+ boolean followLinks; /* --link follow symbolic links? */
+ boolean filter; /* --filter behave as filter: files in, tags out */
+ char* filterTerminator; /* --filter-terminator string to output */
+ boolean tagRelative; /* --tag-relative file paths relative to tag file */
+ boolean printTotals; /* --totals print cumulative statistics */
+ boolean lineDirectives; /* --linedirectives process #line directives */
+#ifdef DEBUG
+ long debugLevel; /* -D debugging output */
+ unsigned long breakLine;/* -b source line at which to call lineBreak() */
+#endif
+} optionValues;
+
+/*
+* GLOBAL VARIABLES
+*/
+extern CONST_OPTION optionValues Option;
+
+/*
+* FUNCTION PROTOTYPES
+*/
+extern void verbose (const char *const format, ...) __printf__ (1, 2);
+extern void freeList (stringList** const pString);
+extern void setDefaultTagFileName (void);
+extern void checkOptions (void);
+extern boolean filesRequired (void);
+extern void testEtagsInvocation (void);
+
+extern cookedArgs* cArgNewFromString (const char* string);
+extern cookedArgs* cArgNewFromArgv (char* const* const argv);
+extern cookedArgs* cArgNewFromFile (FILE* const fp);
+extern cookedArgs* cArgNewFromLineFile (FILE* const fp);
+extern void cArgDelete (cookedArgs* const current);
+extern boolean cArgOff (cookedArgs* const current);
+extern boolean cArgIsOption (cookedArgs* const current);
+extern const char* cArgItem (cookedArgs* const current);
+extern void cArgForth (cookedArgs* const current);
+
+extern boolean isExcludedFile (const char* const name);
+extern boolean isIncludeFile (const char *const fileName);
+extern boolean isIgnoreToken (const char *const name, boolean *const pIgnoreParens, const char **const replacement);
+extern void parseOption (cookedArgs* const cargs);
+extern void parseOptions (cookedArgs* const cargs);
+extern void previewFirstOption (cookedArgs* const cargs);
+extern void readOptionConfiguration (void);
+extern void initOptions (void);
+extern void freeOptionResources (void);
+
+#endif /* _OPTIONS_H */
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/parse.c b/parse.c
new file mode 100644
index 0000000..0b5e2c3
--- /dev/null
+++ b/parse.c
@@ -0,0 +1,677 @@
+/*
+* $Id: parse.c 597 2007-07-31 05:35:30Z dhiebert $
+*
+* Copyright (c) 1996-2003, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* This module contains functions for managing source languages and
+* dispatching files to the appropriate language parser.
+*/
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include <string.h>
+
+#include "debug.h"
+#include "entry.h"
+#include "main.h"
+#define OPTION_WRITE
+#include "options.h"
+#include "parsers.h"
+#include "read.h"
+#include "routines.h"
+#include "vstring.h"
+
+/*
+* DATA DEFINITIONS
+*/
+static parserDefinitionFunc* BuiltInParsers[] = { PARSER_LIST };
+static parserDefinition** LanguageTable = NULL;
+static unsigned int LanguageCount = 0;
+
+/*
+* FUNCTION DEFINITIONS
+*/
+
+extern void makeSimpleTag (
+ const vString* const name, kindOption* const kinds, const int kind)
+{
+ if (kinds [kind].enabled && name != NULL && vStringLength (name) > 0)
+ {
+ tagEntryInfo e;
+ initTagEntry (&e, vStringValue (name));
+
+ e.kindName = kinds [kind].name;
+ e.kind = kinds [kind].letter;
+
+ makeTagEntry (&e);
+ }
+}
+
+/*
+* parserDescription mapping management
+*/
+
+extern parserDefinition* parserNew (const char* name)
+{
+ parserDefinition* result = xCalloc (1, parserDefinition);
+ result->name = eStrdup (name);
+ return result;
+}
+
+extern const char *getLanguageName (const langType language)
+{
+ const char* result;
+ if (language == LANG_IGNORE)
+ result = "unknown";
+ else
+ {
+ Assert (0 <= language && language < (int) LanguageCount);
+ result = LanguageTable [language]->name;
+ }
+ return result;
+}
+
+extern langType getNamedLanguage (const char *const name)
+{
+ langType result = LANG_IGNORE;
+ unsigned int i;
+ Assert (name != NULL);
+ for (i = 0 ; i < LanguageCount && result == LANG_IGNORE ; ++i)
+ {
+ const parserDefinition* const lang = LanguageTable [i];
+ if (lang->name != NULL)
+ if (strcasecmp (name, lang->name) == 0)
+ result = i;
+ }
+ return result;
+}
+
+static langType getExtensionLanguage (const char *const extension)
+{
+ langType result = LANG_IGNORE;
+ unsigned int i;
+ for (i = 0 ; i < LanguageCount && result == LANG_IGNORE ; ++i)
+ {
+ stringList* const exts = LanguageTable [i]->currentExtensions;
+ if (exts != NULL && stringListExtensionMatched (exts, extension))
+ result = i;
+ }
+ return result;
+}
+
+static langType getPatternLanguage (const char *const fileName)
+{
+ langType result = LANG_IGNORE;
+ const char* base = baseFilename (fileName);
+ unsigned int i;
+ for (i = 0 ; i < LanguageCount && result == LANG_IGNORE ; ++i)
+ {
+ stringList* const ptrns = LanguageTable [i]->currentPatterns;
+ if (ptrns != NULL && stringListFileMatched (ptrns, base))
+ result = i;
+ }
+ return result;
+}
+
+#ifdef SYS_INTERPRETER
+
+/* The name of the language interpreter, either directly or as the argument
+ * to "env".
+ */
+static vString* determineInterpreter (const char* const cmd)
+{
+ vString* const interpreter = vStringNew ();
+ const char* p = cmd;
+ do
+ {
+ vStringClear (interpreter);
+ for ( ; isspace ((int) *p) ; ++p)
+ ; /* no-op */
+ for ( ; *p != '\0' && ! isspace ((int) *p) ; ++p)
+ vStringPut (interpreter, (int) *p);
+ vStringTerminate (interpreter);
+ } while (strcmp (vStringValue (interpreter), "env") == 0);
+ return interpreter;
+}
+
+static langType getInterpreterLanguage (const char *const fileName)
+{
+ langType result = LANG_IGNORE;
+ FILE* const fp = fopen (fileName, "r");
+ if (fp != NULL)
+ {
+ vString* const vLine = vStringNew ();
+ const char* const line = readLine (vLine, fp);
+ if (line != NULL && line [0] == '#' && line [1] == '!')
+ {
+ const char* const lastSlash = strrchr (line, '/');
+ const char *const cmd = lastSlash != NULL ? lastSlash+1 : line+2;
+ vString* const interpreter = determineInterpreter (cmd);
+ result = getExtensionLanguage (vStringValue (interpreter));
+ if (result == LANG_IGNORE)
+ result = getNamedLanguage (vStringValue (interpreter));
+ vStringDelete (interpreter);
+ }
+ vStringDelete (vLine);
+ fclose (fp);
+ }
+ return result;
+}
+
+#endif
+
+extern langType getFileLanguage (const char *const fileName)
+{
+ langType language = Option.language;
+ if (language == LANG_AUTO)
+ {
+ language = getExtensionLanguage (fileExtension (fileName));
+ if (language == LANG_IGNORE)
+ language = getPatternLanguage (fileName);
+#ifdef SYS_INTERPRETER
+ if (language == LANG_IGNORE)
+ {
+ fileStatus *status = eStat (fileName);
+ if (status->isExecutable)
+ language = getInterpreterLanguage (fileName);
+ }
+#endif
+ }
+ return language;
+}
+
+extern void printLanguageMap (const langType language)
+{
+ boolean first = TRUE;
+ unsigned int i;
+ stringList* map = LanguageTable [language]->currentPatterns;
+ Assert (0 <= language && language < (int) LanguageCount);
+ for (i = 0 ; map != NULL && i < stringListCount (map) ; ++i)
+ {
+ printf ("%s(%s)", (first ? "" : " "),
+ vStringValue (stringListItem (map, i)));
+ first = FALSE;
+ }
+ map = LanguageTable [language]->currentExtensions;
+ for (i = 0 ; map != NULL && i < stringListCount (map) ; ++i)
+ {
+ printf ("%s.%s", (first ? "" : " "),
+ vStringValue (stringListItem (map, i)));
+ first = FALSE;
+ }
+}
+
+extern void installLanguageMapDefault (const langType language)
+{
+ parserDefinition* lang;
+ Assert (0 <= language && language < (int) LanguageCount);
+ lang = LanguageTable [language];
+ if (lang->currentPatterns != NULL)
+ stringListDelete (lang->currentPatterns);
+ if (lang->currentExtensions != NULL)
+ stringListDelete (lang->currentExtensions);
+
+ if (lang->patterns == NULL)
+ lang->currentPatterns = stringListNew ();
+ else
+ {
+ lang->currentPatterns =
+ stringListNewFromArgv (lang->patterns);
+ }
+ if (lang->extensions == NULL)
+ lang->currentExtensions = stringListNew ();
+ else
+ {
+ lang->currentExtensions =
+ stringListNewFromArgv (lang->extensions);
+ }
+ if (Option.verbose)
+ printLanguageMap (language);
+ verbose ("\n");
+}
+
+extern void installLanguageMapDefaults (void)
+{
+ unsigned int i;
+ for (i = 0 ; i < LanguageCount ; ++i)
+ {
+ verbose (" %s: ", getLanguageName (i));
+ installLanguageMapDefault (i);
+ }
+}
+
+extern void clearLanguageMap (const langType language)
+{
+ Assert (0 <= language && language < (int) LanguageCount);
+ stringListClear (LanguageTable [language]->currentPatterns);
+ stringListClear (LanguageTable [language]->currentExtensions);
+}
+
+extern void addLanguagePatternMap (const langType language, const char* ptrn)
+{
+ vString* const str = vStringNewInit (ptrn);
+ parserDefinition* lang;
+ Assert (0 <= language && language < (int) LanguageCount);
+ lang = LanguageTable [language];
+ if (lang->currentPatterns == NULL)
+ lang->currentPatterns = stringListNew ();
+ stringListAdd (lang->currentPatterns, str);
+}
+
+extern boolean removeLanguageExtensionMap (const char *const extension)
+{
+ boolean result = FALSE;
+ unsigned int i;
+ for (i = 0 ; i < LanguageCount && ! result ; ++i)
+ {
+ stringList* const exts = LanguageTable [i]->currentExtensions;
+ if (exts != NULL && stringListRemoveExtension (exts, extension))
+ {
+ verbose (" (removed from %s)", getLanguageName (i));
+ result = TRUE;
+ }
+ }
+ return result;
+}
+
+extern void addLanguageExtensionMap (
+ const langType language, const char* extension)
+{
+ vString* const str = vStringNewInit (extension);
+ Assert (0 <= language && language < (int) LanguageCount);
+ removeLanguageExtensionMap (extension);
+ stringListAdd (LanguageTable [language]->currentExtensions, str);
+}
+
+extern void enableLanguage (const langType language, const boolean state)
+{
+ Assert (0 <= language && language < (int) LanguageCount);
+ LanguageTable [language]->enabled = state;
+}
+
+extern void enableLanguages (const boolean state)
+{
+ unsigned int i;
+ for (i = 0 ; i < LanguageCount ; ++i)
+ enableLanguage (i, state);
+}
+
+static void initializeParsers (void)
+{
+ unsigned int i;
+ for (i = 0 ; i < LanguageCount ; ++i)
+ if (LanguageTable [i]->initialize != NULL)
+ (LanguageTable [i]->initialize) ((langType) i);
+}
+
+extern void initializeParsing (void)
+{
+ unsigned int builtInCount;
+ unsigned int i;
+
+ builtInCount = sizeof (BuiltInParsers) / sizeof (BuiltInParsers [0]);
+ LanguageTable = xMalloc (builtInCount, parserDefinition*);
+
+ verbose ("Installing parsers: ");
+ for (i = 0 ; i < builtInCount ; ++i)
+ {
+ parserDefinition* const def = (*BuiltInParsers [i]) ();
+ if (def != NULL)
+ {
+ boolean accepted = FALSE;
+ if (def->name == NULL || def->name[0] == '\0')
+ error (FATAL, "parser definition must contain name\n");
+ else if (def->regex)
+ {
+#ifdef HAVE_REGEX
+ def->parser = findRegexTags;
+ accepted = TRUE;
+#endif
+ }
+ else if ((def->parser == NULL) == (def->parser2 == NULL))
+ error (FATAL,
+ "%s parser definition must define one and only one parsing routine\n",
+ def->name);
+ else
+ accepted = TRUE;
+ if (accepted)
+ {
+ verbose ("%s%s", i > 0 ? ", " : "", def->name);
+ def->id = LanguageCount++;
+ LanguageTable [def->id] = def;
+ }
+ }
+ }
+ verbose ("\n");
+ enableLanguages (TRUE);
+ initializeParsers ();
+}
+
+extern void freeParserResources (void)
+{
+ unsigned int i;
+ for (i = 0 ; i < LanguageCount ; ++i)
+ {
+ parserDefinition* const lang = LanguageTable [i];
+ freeList (&lang->currentPatterns);
+ freeList (&lang->currentExtensions);
+ eFree (lang->name);
+ lang->name = NULL;
+ eFree (lang);
+ }
+ if (LanguageTable != NULL)
+ eFree (LanguageTable);
+ LanguageTable = NULL;
+ LanguageCount = 0;
+}
+
+/*
+* Option parsing
+*/
+
+extern void processLanguageDefineOption (
+ const char *const option, const char *const parameter __unused__)
+{
+#ifdef HAVE_REGEX
+ if (parameter [0] == '\0')
+ error (WARNING, "No language specified for \"%s\" option", option);
+ else if (getNamedLanguage (parameter) != LANG_IGNORE)
+ error (WARNING, "Language \"%s\" already defined", parameter);
+ else
+ {
+ unsigned int i = LanguageCount++;
+ parserDefinition* const def = parserNew (parameter);
+ def->parser = findRegexTags;
+ def->currentPatterns = stringListNew ();
+ def->currentExtensions = stringListNew ();
+ def->regex = TRUE;
+ def->enabled = TRUE;
+ def->id = i;
+ LanguageTable = xRealloc (LanguageTable, i + 1, parserDefinition*);
+ LanguageTable [i] = def;
+ }
+#else
+ error (WARNING, "regex support not available; required for --%s option",
+ option);
+#endif
+}
+
+static kindOption *langKindOption (const langType language, const int flag)
+{
+ unsigned int i;
+ kindOption* result = NULL;
+ const parserDefinition* lang;
+ Assert (0 <= language && language < (int) LanguageCount);
+ lang = LanguageTable [language];
+ for (i=0 ; i < lang->kindCount && result == NULL ; ++i)
+ if (lang->kinds [i].letter == flag)
+ result = &lang->kinds [i];
+ return result;
+}
+
+static void disableLanguageKinds (const langType language)
+{
+ const parserDefinition* lang;
+ Assert (0 <= language && language < (int) LanguageCount);
+ lang = LanguageTable [language];
+ if (lang->regex)
+ disableRegexKinds (language);
+ else
+ {
+ unsigned int i;
+ for (i = 0 ; i < lang->kindCount ; ++i)
+ lang->kinds [i].enabled = FALSE;
+ }
+}
+
+static boolean enableLanguageKind (
+ const langType language, const int kind, const boolean mode)
+{
+ boolean result = FALSE;
+ if (LanguageTable [language]->regex)
+ result = enableRegexKind (language, kind, mode);
+ else
+ {
+ kindOption* const opt = langKindOption (language, kind);
+ if (opt != NULL)
+ {
+ opt->enabled = mode;
+ result = TRUE;
+ }
+ }
+ return result;
+}
+
+static void processLangKindOption (
+ const langType language, const char *const option,
+ const char *const parameter)
+{
+ const char *p = parameter;
+ boolean mode = TRUE;
+ int c;
+
+ Assert (0 <= language && language < (int) LanguageCount);
+ if (*p != '+' && *p != '-')
+ disableLanguageKinds (language);
+ while ((c = *p++) != '\0') switch (c)
+ {
+ case '+': mode = TRUE; break;
+ case '-': mode = FALSE; break;
+ default:
+ if (! enableLanguageKind (language, c, mode))
+ error (WARNING, "Unsupported parameter '%c' for --%s option",
+ c, option);
+ break;
+ }
+}
+
+extern boolean processKindOption (
+ const char *const option, const char *const parameter)
+{
+ boolean handled = FALSE;
+ const char* const dash = strchr (option, '-');
+ if (dash != NULL &&
+ (strcmp (dash + 1, "kinds") == 0 || strcmp (dash + 1, "types") == 0))
+ {
+ langType language;
+ vString* langName = vStringNew ();
+ vStringNCopyS (langName, option, dash - option);
+ language = getNamedLanguage (vStringValue (langName));
+ if (language == LANG_IGNORE)
+ error (WARNING, "Unknown language \"%s\" in \"%s\" option", vStringValue (langName), option);
+ else
+ processLangKindOption (language, option, parameter);
+ vStringDelete (langName);
+ handled = TRUE;
+ }
+ return handled;
+}
+
+static void printLanguageKind (const kindOption* const kind, boolean indent)
+{
+ const char *const indentation = indent ? " " : "";
+ printf ("%s%c %s%s\n", indentation, kind->letter,
+ kind->description != NULL ? kind->description :
+ (kind->name != NULL ? kind->name : ""),
+ kind->enabled ? "" : " [off]");
+}
+
+static void printKinds (langType language, boolean indent)
+{
+ const parserDefinition* lang;
+ Assert (0 <= language && language < (int) LanguageCount);
+ lang = LanguageTable [language];
+ if (lang->kinds != NULL || lang->regex)
+ {
+ unsigned int i;
+ for (i = 0 ; i < lang->kindCount ; ++i)
+ printLanguageKind (lang->kinds + i, indent);
+ printRegexKinds (language, indent);
+ }
+}
+
+extern void printLanguageKinds (const langType language)
+{
+ if (language == LANG_AUTO)
+ {
+ unsigned int i;
+ for (i = 0 ; i < LanguageCount ; ++i)
+ {
+ const parserDefinition* const lang = LanguageTable [i];
+ printf ("%s%s\n", lang->name, lang->enabled ? "" : " [disabled]");
+ printKinds (i, TRUE);
+ }
+ }
+ else
+ printKinds (language, FALSE);
+}
+
+static void printMaps (const langType language)
+{
+ const parserDefinition* lang;
+ unsigned int i;
+ Assert (0 <= language && language < (int) LanguageCount);
+ lang = LanguageTable [language];
+ printf ("%-8s", lang->name);
+ if (lang->currentExtensions != NULL)
+ for (i = 0 ; i < stringListCount (lang->currentExtensions) ; ++i)
+ printf (" *.%s", vStringValue (
+ stringListItem (lang->currentExtensions, i)));
+ if (lang->currentPatterns != NULL)
+ for (i = 0 ; i < stringListCount (lang->currentPatterns) ; ++i)
+ printf (" %s", vStringValue (
+ stringListItem (lang->currentPatterns, i)));
+ putchar ('\n');
+}
+
+extern void printLanguageMaps (const langType language)
+{
+ if (language == LANG_AUTO)
+ {
+ unsigned int i;
+ for (i = 0 ; i < LanguageCount ; ++i)
+ printMaps (i);
+ }
+ else
+ printMaps (language);
+}
+
+static void printLanguage (const langType language)
+{
+ const parserDefinition* lang;
+ Assert (0 <= language && language < (int) LanguageCount);
+ lang = LanguageTable [language];
+ if (lang->kinds != NULL || lang->regex)
+ printf ("%s%s\n", lang->name, lang->enabled ? "" : " [disabled]");
+}
+
+extern void printLanguageList (void)
+{
+ unsigned int i;
+ for (i = 0 ; i < LanguageCount ; ++i)
+ printLanguage (i);
+}
+
+/*
+* File parsing
+*/
+
+static void makeFileTag (const char *const fileName)
+{
+ if (Option.include.fileNames)
+ {
+ tagEntryInfo tag;
+ initTagEntry (&tag, baseFilename (fileName));
+
+ tag.isFileEntry = TRUE;
+ tag.lineNumberEntry = TRUE;
+ tag.lineNumber = 1;
+ tag.kindName = "file";
+ tag.kind = 'F';
+
+ makeTagEntry (&tag);
+ }
+}
+
+static boolean createTagsForFile (
+ const char *const fileName, const langType language,
+ const unsigned int passCount)
+{
+ boolean retried = FALSE;
+ Assert (0 <= language && language < (int) LanguageCount);
+ if (fileOpen (fileName, language))
+ {
+ const parserDefinition* const lang = LanguageTable [language];
+ if (Option.etags)
+ beginEtagsFile ();
+
+ makeFileTag (fileName);
+
+ if (lang->parser != NULL)
+ lang->parser ();
+ else if (lang->parser2 != NULL)
+ retried = lang->parser2 (passCount);
+
+ if (Option.etags)
+ endEtagsFile (getSourceFileTagPath ());
+
+ fileClose ();
+ }
+
+ return retried;
+}
+
+static boolean createTagsWithFallback (
+ const char *const fileName, const langType language)
+{
+ const unsigned long numTags = TagFile.numTags.added;
+ fpos_t tagFilePosition;
+ unsigned int passCount = 0;
+ boolean tagFileResized = FALSE;
+
+ fgetpos (TagFile.fp, &tagFilePosition);
+ while (createTagsForFile (fileName, language, ++passCount))
+ {
+ /* Restore prior state of tag file.
+ */
+ fsetpos (TagFile.fp, &tagFilePosition);
+ TagFile.numTags.added = numTags;
+ tagFileResized = TRUE;
+ }
+ return tagFileResized;
+}
+
+extern boolean parseFile (const char *const fileName)
+{
+ boolean tagFileResized = FALSE;
+ langType language = Option.language;
+ if (Option.language == LANG_AUTO)
+ language = getFileLanguage (fileName);
+ Assert (language != LANG_AUTO);
+ if (language == LANG_IGNORE)
+ verbose ("ignoring %s (unknown language)\n", fileName);
+ else if (! LanguageTable [language]->enabled)
+ verbose ("ignoring %s (language disabled)\n", fileName);
+ else
+ {
+ if (Option.filter)
+ openTagFile ();
+
+ tagFileResized = createTagsWithFallback (fileName, language);
+
+ if (Option.filter)
+ closeTagFile (tagFileResized);
+ addTotals (1, 0L, 0L);
+
+ return tagFileResized;
+ }
+ return tagFileResized;
+}
+
+/* vi:set tabstop=4 shiftwidth=4 nowrap: */
diff --git a/parse.h b/parse.h
new file mode 100644
index 0000000..1dbff35
--- /dev/null
+++ b/parse.h
@@ -0,0 +1,129 @@
+/*
+* $Id: parse.h 443 2006-05-30 04:37:13Z darren $
+*
+* Copyright (c) 1998-2003, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* Private definitions for parsing support.
+*/
+#ifndef _PARSE_H
+#define _PARSE_H
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+#include "parsers.h" /* contains list of parsers */
+#include "strlist.h"
+
+/*
+* MACROS
+*/
+#define KIND_COUNT(kindTable) (sizeof(kindTable)/sizeof(kindOption))
+
+#define LANG_AUTO (-1)
+#define LANG_IGNORE (-2)
+
+/*
+* DATA DECLARATIONS
+*/
+typedef int langType;
+
+typedef void (*createRegexTag) (const vString* const name);
+typedef void (*simpleParser) (void);
+typedef boolean (*rescanParser) (const unsigned int passCount);
+typedef void (*parserInitialize) (langType language);
+
+typedef struct sKindOption {
+ boolean enabled; /* are tags for kind enabled? */
+ int letter; /* kind letter */
+ const char* name; /* kind name */
+ const char* description; /* displayed in --help output */
+} kindOption;
+
+typedef struct {
+ /* defined by parser */
+ char* name; /* name of language */
+ kindOption* kinds; /* tag kinds handled by parser */
+ unsigned int kindCount; /* size of `kinds' list */
+ const char *const *extensions; /* list of default extensions */
+ const char *const *patterns; /* list of default file name patterns */
+ parserInitialize initialize; /* initialization routine, if needed */
+ simpleParser parser; /* simple parser (common case) */
+ rescanParser parser2; /* rescanning parser (unusual case) */
+ boolean regex; /* is this a regex parser? */
+
+ /* used internally */
+ unsigned int id; /* id assigned to language */
+ boolean enabled; /* currently enabled? */
+ stringList* currentPatterns; /* current list of file name patterns */
+ stringList* currentExtensions; /* current list of extensions */
+} parserDefinition;
+
+typedef parserDefinition* (parserDefinitionFunc) (void);
+
+typedef struct {
+ size_t start; /* character index in line where match starts */
+ size_t length; /* length of match */
+} regexMatch;
+
+typedef void (*regexCallback) (const char *line, const regexMatch *matches, unsigned int count);
+
+/*
+* FUNCTION PROTOTYPES
+*/
+
+/* Each parsers' definition function is called. The routine is expected to
+ * return a structure allocated using parserNew(). This structure must,
+ * at minimum, set the `parser' field.
+ */
+extern parserDefinitionFunc PARSER_LIST;
+
+/* Legacy interface */
+extern boolean includingDefineTags (void);
+
+/* Language processing and parsing */
+extern void makeSimpleTag (const vString* const name, kindOption* const kinds, const int kind);
+extern parserDefinition* parserNew (const char* name);
+extern const char *getLanguageName (const langType language);
+extern langType getNamedLanguage (const char *const name);
+extern langType getFileLanguage (const char *const fileName);
+extern void installLanguageMapDefault (const langType language);
+extern void installLanguageMapDefaults (void);
+extern void clearLanguageMap (const langType language);
+extern boolean removeLanguageExtensionMap (const char *const extension);
+extern void addLanguageExtensionMap (const langType language, const char* extension);
+extern void addLanguagePatternMap (const langType language, const char* ptrn);
+extern void printLanguageMap (const langType language);
+extern void printLanguageMaps (const langType language);
+extern void enableLanguages (const boolean state);
+extern void enableLanguage (const langType language, const boolean state);
+extern void initializeParsing (void);
+extern void freeParserResources (void);
+extern void processLanguageDefineOption (const char *const option, const char *const parameter);
+extern boolean processKindOption (const char *const option, const char *const parameter);
+extern void printKindOptions (void);
+extern void printLanguageKinds (const langType language);
+extern void printLanguageList (void);
+extern boolean parseFile (const char *const fileName);
+
+/* Regex interface */
+#ifdef HAVE_REGEX
+extern void findRegexTags (void);
+extern boolean matchRegex (const vString* const line, const langType language);
+#endif
+extern boolean processRegexOption (const char *const option, const char *const parameter);
+extern void addLanguageRegex (const langType language, const char* const regex);
+extern void addTagRegex (const langType language, const char* const regex, const char* const name, const char* const kinds, const char* const flags);
+extern void addCallbackRegex (const langType language, const char *const regex, const char *const flags, const regexCallback callback);
+extern void disableRegexKinds (const langType language);
+extern boolean enableRegexKind (const langType language, const int kind, const boolean mode);
+extern void printRegexKinds (const langType language, boolean indent);
+extern void freeRegexResources (void);
+extern void checkRegex (void);
+
+#endif /* _PARSE_H */
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/parsers.h b/parsers.h
new file mode 100644
index 0000000..3dcc8ae
--- /dev/null
+++ b/parsers.h
@@ -0,0 +1,63 @@
+/*
+* $Id: parsers.h 717 2009-07-07 03:40:50Z dhiebert $
+*
+* Copyright (c) 2000-2003, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* External interface to all language parsing modules.
+*
+* To add a new language parser, you need only modify this single source
+* file to add the name of the parser definition function.
+*/
+#ifndef _PARSERS_H
+#define _PARSERS_H
+
+/* Add the name of any new parser definition function here */
+#define PARSER_LIST \
+ AntParser, \
+ AsmParser, \
+ AspParser, \
+ AwkParser, \
+ BasicParser, \
+ BetaParser, \
+ CParser, \
+ CppParser, \
+ CsharpParser, \
+ CobolParser, \
+ DosBatchParser, \
+ EiffelParser, \
+ ErlangParser, \
+ FlexParser, \
+ FortranParser, \
+ HtmlParser, \
+ JavaParser, \
+ JavaScriptParser, \
+ LispParser, \
+ LuaParser, \
+ MakefileParser, \
+ MatLabParser, \
+ OcamlParser, \
+ PascalParser, \
+ PerlParser, \
+ PhpParser, \
+ PythonParser, \
+ RexxParser, \
+ RubyParser, \
+ SchemeParser, \
+ ShParser, \
+ SlangParser, \
+ SmlParser, \
+ SqlParser, \
+ TclParser, \
+ TexParser, \
+ VeraParser, \
+ VerilogParser, \
+ VhdlParser, \
+ VimParser, \
+ YaccParser
+
+#endif /* _PARSERS_H */
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/pascal.c b/pascal.c
new file mode 100644
index 0000000..9a50ba7
--- /dev/null
+++ b/pascal.c
@@ -0,0 +1,267 @@
+/*
+* $Id: pascal.c 536 2007-06-02 06:09:00Z elliotth $
+*
+* Copyright (c) 2001-2002, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* This module contains functions for generating tags for the Pascal language,
+* including some extensions for Object Pascal.
+*/
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include <string.h>
+
+#include "entry.h"
+#include "parse.h"
+#include "read.h"
+#include "vstring.h"
+
+/*
+* DATA DEFINITIONS
+*/
+typedef enum {
+ K_FUNCTION, K_PROCEDURE
+} pascalKind;
+
+static kindOption PascalKinds [] = {
+ { TRUE, 'f', "function", "functions"},
+ { TRUE, 'p', "procedure", "procedures"}
+};
+
+/*
+* FUNCTION DEFINITIONS
+*/
+
+static void createPascalTag (
+ tagEntryInfo* const tag, const vString* const name, const int kind)
+{
+ if (PascalKinds [kind].enabled && name != NULL && vStringLength (name) > 0)
+ {
+ initTagEntry (tag, vStringValue (name));
+ tag->kindName = PascalKinds [kind].name;
+ tag->kind = PascalKinds [kind].letter;
+ }
+ else
+ initTagEntry (tag, NULL);
+}
+
+static void makePascalTag (const tagEntryInfo* const tag)
+{
+ if (tag->name != NULL)
+ makeTagEntry (tag);
+}
+
+static const unsigned char* dbp;
+
+#define starttoken(c) (isalpha ((int) c) || (int) c == '_')
+#define intoken(c) (isalnum ((int) c) || (int) c == '_' || (int) c == '.')
+#define endtoken(c) (! intoken (c) && ! isdigit ((int) c))
+
+static boolean tail (const char *cp)
+{
+ boolean result = FALSE;
+ register int len = 0;
+
+ while (*cp != '\0' && tolower ((int) *cp) == tolower ((int) dbp [len]))
+ cp++, len++;
+ if (*cp == '\0' && !intoken (dbp [len]))
+ {
+ dbp += len;
+ result = TRUE;
+ }
+ return result;
+}
+
+/* Algorithm adapted from from GNU etags.
+ * Locates tags for procedures & functions. Doesn't do any type- or
+ * var-definitions. It does look for the keyword "extern" or "forward"
+ * immediately following the procedure statement; if found, the tag is
+ * skipped.
+ */
+static void findPascalTags (void)
+{
+ vString *name = vStringNew ();
+ tagEntryInfo tag;
+ pascalKind kind = K_FUNCTION;
+ /* each of these flags is TRUE iff: */
+ boolean incomment = FALSE; /* point is inside a comment */
+ int comment_char = '\0'; /* type of current comment */
+ boolean inquote = FALSE; /* point is inside '..' string */
+ boolean get_tagname = FALSE;/* point is after PROCEDURE/FUNCTION
+ keyword, so next item = potential tag */
+ boolean found_tag = FALSE; /* point is after a potential tag */
+ boolean inparms = FALSE; /* point is within parameter-list */
+ boolean verify_tag = FALSE;
+ /* point has passed the parm-list, so the next token will determine
+ * whether this is a FORWARD/EXTERN to be ignored, or whether it is a
+ * real tag
+ */
+
+ dbp = fileReadLine ();
+ while (dbp != NULL)
+ {
+ int c = *dbp++;
+
+ if (c == '\0') /* if end of line */
+ {
+ dbp = fileReadLine ();
+ if (dbp == NULL || *dbp == '\0')
+ continue;
+ if (!((found_tag && verify_tag) || get_tagname))
+ c = *dbp++;
+ /* only if don't need *dbp pointing to the beginning of
+ * the name of the procedure or function
+ */
+ }
+ if (incomment)
+ {
+ if (comment_char == '{' && c == '}')
+ incomment = FALSE;
+ else if (comment_char == '(' && c == '*' && *dbp == ')')
+ {
+ dbp++;
+ incomment = FALSE;
+ }
+ continue;
+ }
+ else if (inquote)
+ {
+ if (c == '\'')
+ inquote = FALSE;
+ continue;
+ }
+ else switch (c)
+ {
+ case '\'':
+ inquote = TRUE; /* found first quote */
+ continue;
+ case '{': /* found open { comment */
+ incomment = TRUE;
+ comment_char = c;
+ continue;
+ case '(':
+ if (*dbp == '*') /* found open (* comment */
+ {
+ incomment = TRUE;
+ comment_char = c;
+ dbp++;
+ }
+ else if (found_tag) /* found '(' after tag, i.e., parm-list */
+ inparms = TRUE;
+ continue;
+ case ')': /* end of parms list */
+ if (inparms)
+ inparms = FALSE;
+ continue;
+ case ';':
+ if (found_tag && !inparms) /* end of proc or fn stmt */
+ {
+ verify_tag = TRUE;
+ break;
+ }
+ continue;
+ }
+ if (found_tag && verify_tag && *dbp != ' ')
+ {
+ /* check if this is an "extern" declaration */
+ if (*dbp == '\0')
+ continue;
+ if (tolower ((int) *dbp == 'e'))
+ {
+ if (tail ("extern")) /* superfluous, really! */
+ {
+ found_tag = FALSE;
+ verify_tag = FALSE;
+ }
+ }
+ else if (tolower ((int) *dbp) == 'f')
+ {
+ if (tail ("forward")) /* check for forward reference */
+ {
+ found_tag = FALSE;
+ verify_tag = FALSE;
+ }
+ }
+ if (found_tag && verify_tag) /* not external proc, so make tag */
+ {
+ found_tag = FALSE;
+ verify_tag = FALSE;
+ makePascalTag (&tag);
+ continue;
+ }
+ }
+ if (get_tagname) /* grab name of proc or fn */
+ {
+ const unsigned char *cp;
+
+ if (*dbp == '\0')
+ continue;
+
+ /* grab block name */
+ while (isspace ((int) *dbp))
+ ++dbp;
+ for (cp = dbp ; *cp != '\0' && !endtoken (*cp) ; cp++)
+ continue;
+ vStringNCopyS (name, (const char*) dbp, cp - dbp);
+ createPascalTag (&tag, name, kind);
+ dbp = cp; /* set dbp to e-o-token */
+ get_tagname = FALSE;
+ found_tag = TRUE;
+ /* and proceed to check for "extern" */
+ }
+ else if (!incomment && !inquote && !found_tag)
+ {
+ switch (tolower ((int) c))
+ {
+ case 'c':
+ if (tail ("onstructor"))
+ {
+ get_tagname = TRUE;
+ kind = K_PROCEDURE;
+ }
+ break;
+ case 'd':
+ if (tail ("estructor"))
+ {
+ get_tagname = TRUE;
+ kind = K_PROCEDURE;
+ }
+ break;
+ case 'p':
+ if (tail ("rocedure"))
+ {
+ get_tagname = TRUE;
+ kind = K_PROCEDURE;
+ }
+ break;
+ case 'f':
+ if (tail ("unction"))
+ {
+ get_tagname = TRUE;
+ kind = K_FUNCTION;
+ }
+ break;
+ }
+ } /* while not eof */
+ }
+ vStringDelete (name);
+}
+
+extern parserDefinition* PascalParser (void)
+{
+ static const char *const extensions [] = { "p", "pas", NULL };
+ parserDefinition* def = parserNew ("Pascal");
+ def->extensions = extensions;
+ def->kinds = PascalKinds;
+ def->kindCount = KIND_COUNT (PascalKinds);
+ def->parser = findPascalTags;
+ return def;
+}
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/perl.c b/perl.c
new file mode 100644
index 0000000..7c3e932
--- /dev/null
+++ b/perl.c
@@ -0,0 +1,382 @@
+/*
+* $Id: perl.c 601 2007-08-02 04:45:16Z perlguy0 $
+*
+* Copyright (c) 2000-2003, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* This module contains functions for generating tags for PERL language
+* files.
+*/
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include <string.h>
+
+#include "entry.h"
+#include "options.h"
+#include "read.h"
+#include "routines.h"
+#include "vstring.h"
+
+#define TRACE_PERL_C 0
+#define TRACE if (TRACE_PERL_C) printf("perl.c:%d: ", __LINE__), printf
+
+/*
+* DATA DEFINITIONS
+*/
+typedef enum {
+ K_NONE = -1,
+ K_CONSTANT,
+ K_FORMAT,
+ K_LABEL,
+ K_PACKAGE,
+ K_SUBROUTINE,
+ K_SUBROUTINE_DECLARATION
+} perlKind;
+
+static kindOption PerlKinds [] = {
+ { TRUE, 'c', "constant", "constants" },
+ { TRUE, 'f', "format", "formats" },
+ { TRUE, 'l', "label", "labels" },
+ { TRUE, 'p', "package", "packages" },
+ { TRUE, 's', "subroutine", "subroutines" },
+ { FALSE, 'd', "subroutine declaration", "subroutine declarations" },
+};
+
+/*
+* FUNCTION DEFINITIONS
+*/
+
+static boolean isIdentifier1 (int c)
+{
+ return (boolean) (isalpha (c) || c == '_');
+}
+
+static boolean isIdentifier (int c)
+{
+ return (boolean) (isalnum (c) || c == '_');
+}
+
+static boolean isPodWord (const char *word)
+{
+ boolean result = FALSE;
+ if (isalpha (*word))
+ {
+ const char *const pods [] = {
+ "head1", "head2", "head3", "head4", "over", "item", "back",
+ "pod", "begin", "end", "for"
+ };
+ const size_t count = sizeof (pods) / sizeof (pods [0]);
+ const char *white = strpbrk (word, " \t");
+ const size_t len = (white!=NULL) ? (size_t)(white-word) : strlen (word);
+ char *const id = (char*) eMalloc (len + 1);
+ size_t i;
+ strncpy (id, word, len);
+ id [len] = '\0';
+ for (i = 0 ; i < count && ! result ; ++i)
+ {
+ if (strcmp (id, pods [i]) == 0)
+ result = TRUE;
+ }
+ eFree (id);
+ }
+ return result;
+}
+
+/*
+ * Perl subroutine declaration may look like one of the following:
+ *
+ * sub abc;
+ * sub abc :attr;
+ * sub abc (proto);
+ * sub abc (proto) :attr;
+ *
+ * Note that there may be more than one attribute. Attributes may
+ * have things in parentheses (they look like arguments). Anything
+ * inside of those parentheses goes. Prototypes may contain semi-colons.
+ * The matching end when we encounter (outside of any parentheses) either
+ * a semi-colon (that'd be a declaration) or an left curly brace
+ * (definition).
+ *
+ * This is pretty complicated parsing (plus we all know that only perl can
+ * parse Perl), so we are only promising best effort here.
+ *
+ * If we can't determine what this is (due to a file ending, for example),
+ * we will return FALSE.
+ */
+static boolean isSubroutineDeclaration (const unsigned char *cp)
+{
+ boolean attr = FALSE;
+ int nparens = 0;
+
+ do {
+ for ( ; *cp; ++cp) {
+SUB_DECL_SWITCH:
+ switch (*cp) {
+ case ':':
+ if (nparens)
+ break;
+ else if (TRUE == attr)
+ return FALSE; /* Invalid attribute name */
+ else
+ attr = TRUE;
+ break;
+ case '(':
+ ++nparens;
+ break;
+ case ')':
+ --nparens;
+ break;
+ case ' ':
+ case '\t':
+ break;
+ case ';':
+ if (!nparens)
+ return TRUE;
+ case '{':
+ if (!nparens)
+ return FALSE;
+ default:
+ if (attr) {
+ if (isIdentifier1(*cp)) {
+ cp++;
+ while (isIdentifier (*cp))
+ cp++;
+ attr = FALSE;
+ goto SUB_DECL_SWITCH; /* Instead of --cp; */
+ } else {
+ return FALSE;
+ }
+ } else if (nparens) {
+ break;
+ } else {
+ return FALSE;
+ }
+ }
+ }
+ } while (NULL != (cp = fileReadLine ()));
+
+ return FALSE;
+}
+
+/* Algorithm adapted from from GNU etags.
+ * Perl support by Bart Robinson <lomew@cs.utah.edu>
+ * Perl sub names: look for /^ [ \t\n]sub [ \t\n]+ [^ \t\n{ (]+/
+ */
+static void findPerlTags (void)
+{
+ vString *name = vStringNew ();
+ vString *package = NULL;
+ boolean skipPodDoc = FALSE;
+ const unsigned char *line;
+
+ while ((line = fileReadLine ()) != NULL)
+ {
+ boolean spaceRequired = FALSE;
+ boolean qualified = FALSE;
+ const unsigned char *cp = line;
+ perlKind kind = K_NONE;
+ tagEntryInfo e;
+
+ if (skipPodDoc)
+ {
+ if (strncmp ((const char*) line, "=cut", (size_t) 4) == 0)
+ skipPodDoc = FALSE;
+ continue;
+ }
+ else if (line [0] == '=')
+ {
+ skipPodDoc = isPodWord ((const char*)line + 1);
+ continue;
+ }
+ else if (strcmp ((const char*) line, "__DATA__") == 0)
+ break;
+ else if (strcmp ((const char*) line, "__END__") == 0)
+ break;
+ else if (line [0] == '#')
+ continue;
+
+ while (isspace (*cp))
+ cp++;
+
+ if (strncmp((const char*) cp, "sub", (size_t) 3) == 0)
+ {
+ TRACE("this looks like a sub\n");
+ cp += 3;
+ kind = K_SUBROUTINE;
+ spaceRequired = TRUE;
+ qualified = TRUE;
+ }
+ else if (strncmp((const char*) cp, "use", (size_t) 3) == 0)
+ {
+ cp += 3;
+ if (!isspace(*cp))
+ continue;
+ while (*cp && isspace (*cp))
+ ++cp;
+ if (strncmp((const char*) cp, "constant", (size_t) 8) != 0)
+ continue;
+ cp += 8;
+ kind = K_CONSTANT;
+ spaceRequired = TRUE;
+ qualified = TRUE;
+ }
+ else if (strncmp((const char*) cp, "package", (size_t) 7) == 0)
+ {
+ /* This will point to space after 'package' so that a tag
+ can be made */
+ const unsigned char *space = cp += 7;
+
+ if (package == NULL)
+ package = vStringNew ();
+ else
+ vStringClear (package);
+ while (isspace (*cp))
+ cp++;
+ while ((int) *cp != ';' && !isspace ((int) *cp))
+ {
+ vStringPut (package, (int) *cp);
+ cp++;
+ }
+ vStringCatS (package, "::");
+
+ cp = space; /* Rewind */
+ kind = K_PACKAGE;
+ spaceRequired = TRUE;
+ qualified = TRUE;
+ }
+ else if (strncmp((const char*) cp, "format", (size_t) 6) == 0)
+ {
+ cp += 6;
+ kind = K_FORMAT;
+ spaceRequired = TRUE;
+ qualified = TRUE;
+ }
+ else
+ {
+ if (isIdentifier1 (*cp))
+ {
+ const unsigned char *p = cp;
+ while (isIdentifier (*p))
+ ++p;
+ while (isspace (*p))
+ ++p;
+ if ((int) *p == ':' && (int) *(p + 1) != ':')
+ kind = K_LABEL;
+ }
+ }
+ if (kind != K_NONE)
+ {
+ TRACE("cp0: %s\n", (const char *) cp);
+ if (spaceRequired && *cp && !isspace (*cp))
+ continue;
+
+ TRACE("cp1: %s\n", (const char *) cp);
+ while (isspace (*cp))
+ cp++;
+
+ while (!*cp || '#' == *cp) { /* Gobble up empty lines
+ and comments */
+ cp = fileReadLine ();
+ if (!cp)
+ goto END_MAIN_WHILE;
+ while (isspace (*cp))
+ cp++;
+ }
+
+ while (isIdentifier (*cp) || (K_PACKAGE == kind && ':' == *cp))
+ {
+ vStringPut (name, (int) *cp);
+ cp++;
+ }
+
+ if (K_FORMAT == kind &&
+ vStringLength (name) == 0 && /* cp did not advance */
+ '=' == *cp)
+ {
+ /* format's name is optional. If it's omitted, 'STDOUT'
+ is assumed. */
+ vStringCatS (name, "STDOUT");
+ }
+
+ vStringTerminate (name);
+ TRACE("name: %s\n", name->buffer);
+
+ if (0 == vStringLength(name)) {
+ vStringClear(name);
+ continue;
+ }
+
+ if (K_SUBROUTINE == kind)
+ {
+ /*
+ * isSubroutineDeclaration() may consume several lines. So
+ * we record line positions.
+ */
+ initTagEntry(&e, vStringValue(name));
+
+ if (TRUE == isSubroutineDeclaration(cp)) {
+ if (TRUE == PerlKinds[K_SUBROUTINE_DECLARATION].enabled) {
+ kind = K_SUBROUTINE_DECLARATION;
+ } else {
+ vStringClear (name);
+ continue;
+ }
+ }
+
+ e.kind = PerlKinds[kind].letter;
+ e.kindName = PerlKinds[kind].name;
+
+ makeTagEntry(&e);
+
+ if (Option.include.qualifiedTags && qualified &&
+ package != NULL && vStringLength (package) > 0)
+ {
+ vString *const qualifiedName = vStringNew ();
+ vStringCopy (qualifiedName, package);
+ vStringCat (qualifiedName, name);
+ e.name = vStringValue(qualifiedName);
+ makeTagEntry(&e);
+ vStringDelete (qualifiedName);
+ }
+ } else if (vStringLength (name) > 0)
+ {
+ makeSimpleTag (name, PerlKinds, kind);
+ if (Option.include.qualifiedTags && qualified &&
+ K_PACKAGE != kind &&
+ package != NULL && vStringLength (package) > 0)
+ {
+ vString *const qualifiedName = vStringNew ();
+ vStringCopy (qualifiedName, package);
+ vStringCat (qualifiedName, name);
+ makeSimpleTag (qualifiedName, PerlKinds, kind);
+ vStringDelete (qualifiedName);
+ }
+ }
+ vStringClear (name);
+ }
+ }
+
+END_MAIN_WHILE:
+ vStringDelete (name);
+ if (package != NULL)
+ vStringDelete (package);
+}
+
+extern parserDefinition* PerlParser (void)
+{
+ static const char *const extensions [] = { "pl", "pm", "plx", "perl", NULL };
+ parserDefinition* def = parserNew ("Perl");
+ def->kinds = PerlKinds;
+ def->kindCount = KIND_COUNT (PerlKinds);
+ def->extensions = extensions;
+ def->parser = findPerlTags;
+ return def;
+}
+
+/* vi:set tabstop=4 shiftwidth=4 noexpandtab: */
diff --git a/php.c b/php.c
new file mode 100644
index 0000000..0dd60c5
--- /dev/null
+++ b/php.c
@@ -0,0 +1,237 @@
+/*
+* $Id: php.c 624 2007-09-15 22:53:31Z jafl $
+*
+* Copyright (c) 2000, Jesus Castagnetto <jmcastagnetto@zkey.com>
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* This module contains functions for generating tags for the PHP web page
+* scripting language. Only recognizes functions and classes, not methods or
+* variables.
+*
+* Parsing PHP defines by Pavel Hlousek <pavel.hlousek@seznam.cz>, Apr 2003.
+*/
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include <string.h>
+
+#include "parse.h"
+#include "read.h"
+#include "vstring.h"
+
+/*
+* DATA DEFINITIONS
+*/
+typedef enum {
+ K_CLASS, K_DEFINE, K_FUNCTION, K_VARIABLE
+} phpKind;
+
+#if 0
+static kindOption PhpKinds [] = {
+ { TRUE, 'c', "class", "classes" },
+ { TRUE, 'd', "define", "constant definitions" },
+ { TRUE, 'f', "function", "functions" },
+ { TRUE, 'v', "variable", "variables" }
+};
+#endif
+
+/*
+* FUNCTION DEFINITIONS
+*/
+
+/* JavaScript patterns are duplicated in jscript.c */
+
+/*
+ * Cygwin doesn't support non-ASCII characters in character classes.
+ * This isn't a good solution to the underlying problem, because we're still
+ * making assumptions about the character encoding.
+ * Really, these regular expressions need to concentrate on what marks the
+ * end of an identifier, and we need something like iconv to take into
+ * account the user's locale (or an override on the command-line.)
+ */
+#ifdef __CYGWIN__
+#define ALPHA "[:alpha:]"
+#define ALNUM "[:alnum:]"
+#else
+#define ALPHA "A-Za-z\x7f-\xff"
+#define ALNUM "0-9A-Za-z\x7f-\xff"
+#endif
+
+static void installPHPRegex (const langType language)
+{
+ addTagRegex(language, "(^|[ \t])class[ \t]+([" ALPHA "_][" ALNUM "_]*)",
+ "\\2", "c,class,classes", NULL);
+ addTagRegex(language, "(^|[ \t])interface[ \t]+([" ALPHA "_][" ALNUM "_]*)",
+ "\\2", "i,interface,interfaces", NULL);
+ addTagRegex(language, "(^|[ \t])define[ \t]*\\([ \t]*['\"]?([" ALPHA "_][" ALNUM "_]*)",
+ "\\2", "d,define,constant definitions", NULL);
+ addTagRegex(language, "(^|[ \t])function[ \t]+&?[ \t]*([" ALPHA "_][" ALNUM "_]*)",
+ "\\2", "f,function,functions", NULL);
+ addTagRegex(language, "(^|[ \t])(\\$|::\\$|\\$this->)([" ALPHA "_][" ALNUM "_]*)[ \t]*=",
+ "\\3", "v,variable,variables", NULL);
+ addTagRegex(language, "(^|[ \t])(var|public|protected|private|static)[ \t]+\\$([" ALPHA "_][" ALNUM "_]*)[ \t]*[=;]",
+ "\\3", "v,variable,variables", NULL);
+
+ /* function regex is covered by PHP regex */
+ addTagRegex (language, "(^|[ \t])([A-Za-z0-9_]+)[ \t]*[=:][ \t]*function[ \t]*\\(",
+ "\\2", "j,jsfunction,javascript functions", NULL);
+ addTagRegex (language, "(^|[ \t])([A-Za-z0-9_.]+)\\.([A-Za-z0-9_]+)[ \t]*=[ \t]*function[ \t]*\\(",
+ "\\2.\\3", "j,jsfunction,javascript functions", NULL);
+ addTagRegex (language, "(^|[ \t])([A-Za-z0-9_.]+)\\.([A-Za-z0-9_]+)[ \t]*=[ \t]*function[ \t]*\\(",
+ "\\3", "j,jsfunction,javascript functions", NULL);
+}
+
+/* Create parser definition structure */
+extern parserDefinition* PhpParser (void)
+{
+ static const char *const extensions [] = { "php", "php3", "phtml", NULL };
+ parserDefinition* def = parserNew ("PHP");
+ def->extensions = extensions;
+ def->initialize = installPHPRegex;
+ def->regex = TRUE;
+ return def;
+}
+
+#if 0
+
+static boolean isLetter(const int c)
+{
+ return (boolean)(isalpha(c) || (c >= 127 && c <= 255));
+}
+
+static boolean isVarChar1(const int c)
+{
+ return (boolean)(isLetter (c) || c == '_');
+}
+
+static boolean isVarChar(const int c)
+{
+ return (boolean)(isVarChar1 (c) || isdigit (c));
+}
+
+static void findPhpTags (void)
+{
+ vString *name = vStringNew ();
+ const unsigned char *line;
+
+ while ((line = fileReadLine ()) != NULL)
+ {
+ const unsigned char *cp = line;
+ const char* f;
+
+ while (isspace (*cp))
+ cp++;
+
+ if (*(const char*)cp == '$' && isVarChar1 (*(const char*)(cp+1)))
+ {
+ cp += 1;
+ vStringClear (name);
+ while (isVarChar ((int) *cp))
+ {
+ vStringPut (name, (int) *cp);
+ ++cp;
+ }
+ while (isspace ((int) *cp))
+ ++cp;
+ if (*(const char*) cp == '=')
+ {
+ vStringTerminate (name);
+ makeSimpleTag (name, PhpKinds, K_VARIABLE);
+ vStringClear (name);
+ }
+ }
+ else if ((f = strstr ((const char*) cp, "function")) != NULL &&
+ (f == (const char*) cp || isspace ((int) f [-1])) &&
+ isspace ((int) f [8]))
+ {
+ cp = ((const unsigned char *) f) + 8;
+
+ while (isspace ((int) *cp))
+ ++cp;
+
+ if (*cp == '&') /* skip reference character and following whitespace */
+ {
+ cp++;
+
+ while (isspace ((int) *cp))
+ ++cp;
+ }
+
+ vStringClear (name);
+ while (isalnum ((int) *cp) || *cp == '_')
+ {
+ vStringPut (name, (int) *cp);
+ ++cp;
+ }
+ vStringTerminate (name);
+ makeSimpleTag (name, PhpKinds, K_FUNCTION);
+ vStringClear (name);
+ }
+ else if (strncmp ((const char*) cp, "class", (size_t) 5) == 0 &&
+ isspace ((int) cp [5]))
+ {
+ cp += 5;
+
+ while (isspace ((int) *cp))
+ ++cp;
+ vStringClear (name);
+ while (isalnum ((int) *cp) || *cp == '_')
+ {
+ vStringPut (name, (int) *cp);
+ ++cp;
+ }
+ vStringTerminate (name);
+ makeSimpleTag (name, PhpKinds, K_CLASS);
+ vStringClear (name);
+ }
+ else if (strncmp ((const char*) cp, "define", (size_t) 6) == 0 &&
+ ! isalnum ((int) cp [6]))
+ {
+ cp += 6;
+
+ while (isspace ((int) *cp))
+ ++cp;
+ if (*cp != '(')
+ continue;
+ ++cp;
+
+ while (isspace ((int) *cp))
+ ++cp;
+ if ((*cp == '\'') || (*cp == '"'))
+ ++cp;
+ else if (! ((*cp == '_') || isalnum ((int) *cp)))
+ continue;
+
+ vStringClear (name);
+ while (isalnum ((int) *cp) || *cp == '_')
+ {
+ vStringPut (name, (int) *cp);
+ ++cp;
+ }
+ vStringTerminate (name);
+ makeSimpleTag (name, PhpKinds, K_DEFINE);
+ vStringClear (name);
+ }
+ }
+ vStringDelete (name);
+}
+
+extern parserDefinition* PhpParser (void)
+{
+ static const char *const extensions [] = { "php", "php3", "phtml", NULL };
+ parserDefinition* def = parserNew ("PHP");
+ def->kinds = PhpKinds;
+ def->kindCount = KIND_COUNT (PhpKinds);
+ def->extensions = extensions;
+ def->parser = findPhpTags;
+ return def;
+}
+
+#endif
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/python.c b/python.c
new file mode 100644
index 0000000..5fdf31b
--- /dev/null
+++ b/python.c
@@ -0,0 +1,771 @@
+/*
+* $Id: python.c 720 2009-07-07 03:55:23Z dhiebert $
+*
+* Copyright (c) 2000-2003, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* This module contains functions for generating tags for Python language
+* files.
+*/
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include <string.h>
+
+#include "entry.h"
+#include "options.h"
+#include "read.h"
+#include "main.h"
+#include "vstring.h"
+#include "routines.h"
+#include "debug.h"
+
+/*
+* DATA DECLARATIONS
+*/
+typedef struct NestingLevel NestingLevel;
+typedef struct NestingLevels NestingLevels;
+
+struct NestingLevel
+{
+ int indentation;
+ vString *name;
+ int type;
+};
+
+struct NestingLevels
+{
+ NestingLevel *levels;
+ int n; /* number of levels in use */
+ int allocated;
+};
+
+typedef enum {
+ K_CLASS, K_FUNCTION, K_MEMBER, K_VARIABLE, K_IMPORT
+} pythonKind;
+
+/*
+* DATA DEFINITIONS
+*/
+static kindOption PythonKinds[] = {
+ {TRUE, 'c', "class", "classes"},
+ {TRUE, 'f', "function", "functions"},
+ {TRUE, 'm', "member", "class members"},
+ {TRUE, 'v', "variable", "variables"},
+ {TRUE, 'i', "namespace", "imports"}
+};
+
+static char const * const singletriple = "'''";
+static char const * const doubletriple = "\"\"\"";
+
+/*
+* FUNCTION DEFINITIONS
+*/
+
+static NestingLevels *nestingLevelsNew (void)
+{
+ NestingLevels *nls = xCalloc (1, NestingLevels);
+ return nls;
+}
+
+static void nestingLevelsFree (NestingLevels *nls)
+{
+ int i;
+ for (i = 0; i < nls->allocated; i++)
+ vStringDelete(nls->levels[i].name);
+ if (nls->levels) eFree(nls->levels);
+ eFree(nls);
+}
+
+static void nestingLevelsPush (NestingLevels *nls,
+ const vString *name, int type)
+{
+ NestingLevel *nl = NULL;
+
+ if (nls->n >= nls->allocated)
+ {
+ nls->allocated++;
+ nls->levels = xRealloc(nls->levels,
+ nls->allocated, NestingLevel);
+ nls->levels[nls->n].name = vStringNew();
+ }
+ nl = &nls->levels[nls->n];
+ nls->n++;
+
+ vStringCopy(nl->name, name);
+ nl->type = type;
+}
+
+#if 0
+static NestingLevel *nestingLevelsGetCurrent (NestingLevels *nls)
+{
+ Assert (nls != NULL);
+
+ if (nls->n < 1)
+ return NULL;
+
+ return &nls->levels[nls->n - 1];
+}
+
+static void nestingLevelsPop (NestingLevels *nls)
+{
+ const NestingLevel *nl = nestingLevelsGetCurrent(nls);
+
+ Assert (nl != NULL);
+ vStringClear(nl->name);
+ nls->n--;
+}
+#endif
+
+static boolean isIdentifierFirstCharacter (int c)
+{
+ return (boolean) (isalpha (c) || c == '_');
+}
+
+static boolean isIdentifierCharacter (int c)
+{
+ return (boolean) (isalnum (c) || c == '_');
+}
+
+/* Given a string with the contents of a line directly after the "def" keyword,
+ * extract all relevant information and create a tag.
+ */
+static void makeFunctionTag (vString *const function,
+ vString *const parent, int is_class_parent, const char *arglist __unused__)
+{
+ tagEntryInfo tag;
+ initTagEntry (&tag, vStringValue (function));
+
+ tag.kindName = "function";
+ tag.kind = 'f';
+ /* tag.extensionFields.arglist = arglist; */
+
+ if (vStringLength (parent) > 0)
+ {
+ if (is_class_parent)
+ {
+ tag.kindName = "member";
+ tag.kind = 'm';
+ tag.extensionFields.scope [0] = "class";
+ tag.extensionFields.scope [1] = vStringValue (parent);
+ }
+ else
+ {
+ tag.extensionFields.scope [0] = "function";
+ tag.extensionFields.scope [1] = vStringValue (parent);
+ }
+ }
+
+ /* If a function starts with __, we mark it as file scope.
+ * FIXME: What is the proper way to signal such attributes?
+ * TODO: What does functions/classes starting with _ and __ mean in python?
+ */
+ if (strncmp (vStringValue (function), "__", 2) == 0 &&
+ strcmp (vStringValue (function), "__init__") != 0)
+ {
+ tag.extensionFields.access = "private";
+ tag.isFileScope = TRUE;
+ }
+ else
+ {
+ tag.extensionFields.access = "public";
+ }
+ makeTagEntry (&tag);
+}
+
+/* Given a string with the contents of the line directly after the "class"
+ * keyword, extract all necessary information and create a tag.
+ */
+static void makeClassTag (vString *const class, vString *const inheritance,
+ vString *const parent, int is_class_parent)
+{
+ tagEntryInfo tag;
+ initTagEntry (&tag, vStringValue (class));
+ tag.kindName = "class";
+ tag.kind = 'c';
+ if (vStringLength (parent) > 0)
+ {
+ if (is_class_parent)
+ {
+ tag.extensionFields.scope [0] = "class";
+ tag.extensionFields.scope [1] = vStringValue (parent);
+ }
+ else
+ {
+ tag.extensionFields.scope [0] = "function";
+ tag.extensionFields.scope [1] = vStringValue (parent);
+ }
+ }
+ tag.extensionFields.inheritance = vStringValue (inheritance);
+ makeTagEntry (&tag);
+}
+
+static void makeVariableTag (vString *const var, vString *const parent)
+{
+ tagEntryInfo tag;
+ initTagEntry (&tag, vStringValue (var));
+ tag.kindName = "variable";
+ tag.kind = 'v';
+ if (vStringLength (parent) > 0)
+ {
+ tag.extensionFields.scope [0] = "class";
+ tag.extensionFields.scope [1] = vStringValue (parent);
+ }
+ makeTagEntry (&tag);
+}
+
+/* Skip a single or double quoted string. */
+static const char *skipString (const char *cp)
+{
+ const char *start = cp;
+ int escaped = 0;
+ for (cp++; *cp; cp++)
+ {
+ if (escaped)
+ escaped--;
+ else if (*cp == '\\')
+ escaped++;
+ else if (*cp == *start)
+ return cp + 1;
+ }
+ return cp;
+}
+
+/* Skip everything up to an identifier start. */
+static const char *skipEverything (const char *cp)
+{
+ for (; *cp; cp++)
+ {
+ if (*cp == '"' || *cp == '\'')
+ {
+ cp = skipString(cp);
+ if (!*cp) break;
+ }
+ if (isIdentifierFirstCharacter ((int) *cp))
+ return cp;
+ }
+ return cp;
+}
+
+/* Skip an identifier. */
+static const char *skipIdentifier (const char *cp)
+{
+ while (isIdentifierCharacter ((int) *cp))
+ cp++;
+ return cp;
+}
+
+static const char *findDefinitionOrClass (const char *cp)
+{
+ while (*cp)
+ {
+ cp = skipEverything (cp);
+ if (!strncmp(cp, "def", 3) || !strncmp(cp, "class", 5) ||
+ !strncmp(cp, "cdef", 4) || !strncmp(cp, "cpdef", 5))
+ {
+ return cp;
+ }
+ cp = skipIdentifier (cp);
+ }
+ return NULL;
+}
+
+static const char *skipSpace (const char *cp)
+{
+ while (isspace ((int) *cp))
+ ++cp;
+ return cp;
+}
+
+/* Starting at ''cp'', parse an identifier into ''identifier''. */
+static const char *parseIdentifier (const char *cp, vString *const identifier)
+{
+ vStringClear (identifier);
+ while (isIdentifierCharacter ((int) *cp))
+ {
+ vStringPut (identifier, (int) *cp);
+ ++cp;
+ }
+ vStringTerminate (identifier);
+ return cp;
+}
+
+static void parseClass (const char *cp, vString *const class,
+ vString *const parent, int is_class_parent)
+{
+ vString *const inheritance = vStringNew ();
+ vStringClear (inheritance);
+ cp = parseIdentifier (cp, class);
+ cp = skipSpace (cp);
+ if (*cp == '(')
+ {
+ ++cp;
+ while (*cp != ')')
+ {
+ if (*cp == '\0')
+ {
+ /* Closing parenthesis can be in follow up line. */
+ cp = (const char *) fileReadLine ();
+ if (!cp) break;
+ vStringPut (inheritance, ' ');
+ continue;
+ }
+ vStringPut (inheritance, *cp);
+ ++cp;
+ }
+ vStringTerminate (inheritance);
+ }
+ makeClassTag (class, inheritance, parent, is_class_parent);
+ vStringDelete (inheritance);
+}
+
+static void parseImports (const char *cp)
+{
+ const char *pos;
+ vString *name, *name_next;
+
+ cp = skipEverything (cp);
+
+ if ((pos = strstr (cp, "import")) == NULL)
+ return;
+
+ cp = pos + 6;
+
+ /* continue only if there is some space between the keyword and the identifier */
+ if (! isspace (*cp))
+ return;
+
+ cp++;
+ cp = skipSpace (cp);
+
+ name = vStringNew ();
+ name_next = vStringNew ();
+
+ cp = skipEverything (cp);
+ while (*cp)
+ {
+ cp = parseIdentifier (cp, name);
+
+ cp = skipEverything (cp);
+ /* we parse the next possible import statement as well to be able to ignore 'foo' in
+ * 'import foo as bar' */
+ parseIdentifier (cp, name_next);
+
+ /* take the current tag only if the next one is not "as" */
+ if (strcmp (vStringValue (name_next), "as") != 0 &&
+ strcmp (vStringValue (name), "as") != 0)
+ {
+ makeSimpleTag (name, PythonKinds, K_IMPORT);
+ }
+ }
+ vStringDelete (name);
+ vStringDelete (name_next);
+}
+
+/* modified from get.c getArglistFromStr().
+ * warning: terminates rest of string past arglist!
+ * note: does not ignore brackets inside strings! */
+static char *parseArglist(const char *buf)
+{
+ char *start, *end;
+ int level;
+ if (NULL == buf)
+ return NULL;
+ if (NULL == (start = strchr(buf, '(')))
+ return NULL;
+ for (level = 1, end = start + 1; level > 0; ++end)
+ {
+ if ('\0' == *end)
+ break;
+ else if ('(' == *end)
+ ++ level;
+ else if (')' == *end)
+ -- level;
+ }
+ *end = '\0';
+ return strdup(start);
+}
+
+static void parseFunction (const char *cp, vString *const def,
+ vString *const parent, int is_class_parent)
+{
+ char *arglist;
+
+ cp = parseIdentifier (cp, def);
+ arglist = parseArglist (cp);
+ makeFunctionTag (def, parent, is_class_parent, arglist);
+ eFree (arglist);
+}
+
+/* Get the combined name of a nested symbol. Classes are separated with ".",
+ * functions with "/". For example this code:
+ * class MyClass:
+ * def myFunction:
+ * def SubFunction:
+ * class SubClass:
+ * def Method:
+ * pass
+ * Would produce this string:
+ * MyClass.MyFunction/SubFunction/SubClass.Method
+ */
+static boolean constructParentString(NestingLevels *nls, int indent,
+ vString *result)
+{
+ int i;
+ NestingLevel *prev = NULL;
+ int is_class = FALSE;
+ vStringClear (result);
+ for (i = 0; i < nls->n; i++)
+ {
+ NestingLevel *nl = nls->levels + i;
+ if (indent <= nl->indentation)
+ break;
+ if (prev)
+ {
+ vStringCatS(result, "."); /* make Geany symbol list grouping work properly */
+/*
+ if (prev->type == K_CLASS)
+ vStringCatS(result, ".");
+ else
+ vStringCatS(result, "/");
+*/
+ }
+ vStringCat(result, nl->name);
+ is_class = (nl->type == K_CLASS);
+ prev = nl;
+ }
+ return is_class;
+}
+
+/* Check whether parent's indentation level is higher than the current level and
+ * if so, remove it.
+ */
+static void checkParent(NestingLevels *nls, int indent, vString *parent)
+{
+ int i;
+ NestingLevel *n;
+
+ for (i = 0; i < nls->n; i++)
+ {
+ n = nls->levels + i;
+ /* is there a better way to compare two vStrings? */
+ if (strcmp(vStringValue(parent), vStringValue(n->name)) == 0)
+ {
+ if (n && indent <= n->indentation)
+ {
+ /* remove this level by clearing its name */
+ vStringClear(n->name);
+ }
+ break;
+ }
+ }
+}
+
+static void addNestingLevel(NestingLevels *nls, int indentation,
+ const vString *name, boolean is_class)
+{
+ int i;
+ NestingLevel *nl = NULL;
+
+ for (i = 0; i < nls->n; i++)
+ {
+ nl = nls->levels + i;
+ if (indentation <= nl->indentation) break;
+ }
+ if (i == nls->n)
+ {
+ nestingLevelsPush(nls, name, 0);
+ nl = nls->levels + i;
+ }
+ else
+ { /* reuse existing slot */
+ nls->n = i + 1;
+ vStringCopy(nl->name, name);
+ }
+ nl->indentation = indentation;
+ nl->type = is_class ? K_CLASS : !K_CLASS;
+}
+
+/* Return a pointer to the start of the next triple string, or NULL. Store
+ * the kind of triple string in "which" if the return is not NULL.
+ */
+static char const *find_triple_start(char const *string, char const **which)
+{
+ char const *cp = string;
+
+ for (; *cp; cp++)
+ {
+ if (*cp == '"' || *cp == '\'')
+ {
+ if (strncmp(cp, doubletriple, 3) == 0)
+ {
+ *which = doubletriple;
+ return cp;
+ }
+ if (strncmp(cp, singletriple, 3) == 0)
+ {
+ *which = singletriple;
+ return cp;
+ }
+ cp = skipString(cp);
+ if (!*cp) break;
+ }
+ }
+ return NULL;
+}
+
+/* Find the end of a triple string as pointed to by "which", and update "which"
+ * with any other triple strings following in the given string.
+ */
+static void find_triple_end(char const *string, char const **which)
+{
+ char const *s = string;
+ while (1)
+ {
+ /* Check if the string ends in the same line. */
+ s = strstr (s, *which);
+ if (!s) break;
+ s += 3;
+ *which = NULL;
+ /* If yes, check if another one starts in the same line. */
+ s = find_triple_start(s, which);
+ if (!s) break;
+ s += 3;
+ }
+}
+
+static const char *findVariable(const char *line)
+{
+ /* Parse global and class variable names (C.x) from assignment statements.
+ * Object attributes (obj.x) are ignored.
+ * Assignment to a tuple 'x, y = 2, 3' not supported.
+ * TODO: ignore duplicate tags from reassignment statements. */
+ const char *cp, *sp, *eq, *start;
+
+ cp = strstr(line, "=");
+ if (!cp)
+ return NULL;
+ eq = cp + 1;
+ while (*eq)
+ {
+ if (*eq == '=')
+ return NULL; /* ignore '==' operator and 'x=5,y=6)' function lines */
+ if (*eq == '(' || *eq == '#')
+ break; /* allow 'x = func(b=2,y=2,' lines and comments at the end of line */
+ eq++;
+ }
+
+ /* go backwards to the start of the line, checking we have valid chars */
+ start = cp - 1;
+ while (start >= line && isspace ((int) *start))
+ --start;
+ while (start >= line && isIdentifierCharacter ((int) *start))
+ --start;
+ if (!isIdentifierFirstCharacter(*(start + 1)))
+ return NULL;
+ sp = start;
+ while (sp >= line && isspace ((int) *sp))
+ --sp;
+ if ((sp + 1) != line) /* the line isn't a simple variable assignment */
+ return NULL;
+ /* the line is valid, parse the variable name */
+ ++start;
+ return start;
+}
+
+/* Skip type declaration that optionally follows a cdef/cpdef */
+static const char *skipTypeDecl (const char *cp, boolean *is_class)
+{
+ const char *lastStart = cp, *ptr = cp;
+ int loopCount = 0;
+ ptr = skipSpace(cp);
+ if (!strncmp("extern", ptr, 6)) {
+ ptr += 6;
+ ptr = skipSpace(ptr);
+ if (!strncmp("from", ptr, 4)) { return NULL; }
+ }
+ if (!strncmp("class", ptr, 5)) {
+ ptr += 5 ;
+ *is_class = TRUE;
+ ptr = skipSpace(ptr);
+ return ptr;
+ }
+ /* limit so that we don't pick off "int item=obj()" */
+ while (*ptr && loopCount++ < 2) {
+ while (*ptr && *ptr != '=' && *ptr != '(' && !isspace(*ptr)) ptr++;
+ if (!*ptr || *ptr == '=') return NULL;
+ if (*ptr == '(') {
+ return lastStart; /* if we stopped on a '(' we are done */
+ }
+ ptr = skipSpace(ptr);
+ lastStart = ptr;
+ while (*lastStart == '*') lastStart++; /* cdef int *identifier */
+ }
+ return NULL;
+}
+
+static void findPythonTags (void)
+{
+ vString *const continuation = vStringNew ();
+ vString *const name = vStringNew ();
+ vString *const parent = vStringNew();
+
+ NestingLevels *const nesting_levels = nestingLevelsNew();
+
+ const char *line;
+ int line_skip = 0;
+ char const *longStringLiteral = NULL;
+
+ while ((line = (const char *) fileReadLine ()) != NULL)
+ {
+ const char *cp = line, *candidate;
+ char const *longstring;
+ char const *keyword, *variable;
+ int indent;
+
+ cp = skipSpace (cp);
+
+ if (*cp == '\0') /* skip blank line */
+ continue;
+
+ /* Skip comment if we are not inside a multi-line string. */
+ if (*cp == '#' && !longStringLiteral)
+ continue;
+
+ /* Deal with line continuation. */
+ if (!line_skip) vStringClear(continuation);
+ vStringCatS(continuation, line);
+ vStringStripTrailing(continuation);
+ if (vStringLast(continuation) == '\\')
+ {
+ vStringChop(continuation);
+ vStringCatS(continuation, " ");
+ line_skip = 1;
+ continue;
+ }
+ cp = line = vStringValue(continuation);
+ cp = skipSpace (cp);
+ indent = cp - line;
+ line_skip = 0;
+
+ checkParent(nesting_levels, indent, parent);
+
+ /* Deal with multiline string ending. */
+ if (longStringLiteral)
+ {
+ find_triple_end(cp, &longStringLiteral);
+ continue;
+ }
+
+ /* Deal with multiline string start. */
+ longstring = find_triple_start(cp, &longStringLiteral);
+ if (longstring)
+ {
+ longstring += 3;
+ find_triple_end(longstring, &longStringLiteral);
+ /* We don't parse for any tags in the rest of the line. */
+ continue;
+ }
+
+ /* Deal with def and class keywords. */
+ keyword = findDefinitionOrClass (cp);
+ if (keyword)
+ {
+ boolean found = FALSE;
+ boolean is_class = FALSE;
+ if (!strncmp (keyword, "def ", 4))
+ {
+ cp = skipSpace (keyword + 3);
+ found = TRUE;
+ }
+ else if (!strncmp (keyword, "class ", 6))
+ {
+ cp = skipSpace (keyword + 5);
+ found = TRUE;
+ is_class = TRUE;
+ }
+ else if (!strncmp (keyword, "cdef ", 5))
+ {
+ cp = skipSpace(keyword + 4);
+ candidate = skipTypeDecl (cp, &is_class);
+ if (candidate)
+ {
+ found = TRUE;
+ cp = candidate;
+ }
+
+ }
+ else if (!strncmp (keyword, "cpdef ", 6))
+ {
+ cp = skipSpace(keyword + 5);
+ candidate = skipTypeDecl (cp, &is_class);
+ if (candidate)
+ {
+ found = TRUE;
+ cp = candidate;
+ }
+ }
+
+ if (found)
+ {
+ boolean is_parent_class;
+
+ is_parent_class =
+ constructParentString(nesting_levels, indent, parent);
+
+ if (is_class)
+ parseClass (cp, name, parent, is_parent_class);
+ else
+ parseFunction(cp, name, parent, is_parent_class);
+
+ addNestingLevel(nesting_levels, indent, name, is_class);
+ }
+ }
+ /* Find global and class variables */
+ variable = findVariable(line);
+ if (variable)
+ {
+ const char *start = variable;
+ boolean parent_is_class;
+
+ vStringClear (name);
+ while (isIdentifierCharacter ((int) *start))
+ {
+ vStringPut (name, (int) *start);
+ ++start;
+ }
+ vStringTerminate (name);
+
+ parent_is_class = constructParentString(nesting_levels, indent, parent);
+ /* skip variables in methods */
+ if (! parent_is_class && vStringLength(parent) > 0)
+ continue;
+
+ makeVariableTag (name, parent);
+ }
+ /* Find and parse imports */
+ parseImports(line);
+ }
+ /* Clean up all memory we allocated. */
+ vStringDelete (parent);
+ vStringDelete (name);
+ vStringDelete (continuation);
+ nestingLevelsFree (nesting_levels);
+}
+
+extern parserDefinition *PythonParser (void)
+{
+ static const char *const extensions[] = { "py", "pyx", "pxd", "pxi" ,"scons", NULL };
+ parserDefinition *def = parserNew ("Python");
+ def->kinds = PythonKinds;
+ def->kindCount = KIND_COUNT (PythonKinds);
+ def->extensions = extensions;
+ def->parser = findPythonTags;
+ return def;
+}
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/qdos.c b/qdos.c
new file mode 100644
index 0000000..2adb8c3
--- /dev/null
+++ b/qdos.c
@@ -0,0 +1,106 @@
+/*
+* $Id: qdos.c 443 2006-05-30 04:37:13Z darren $
+*
+* Copyright (c) 1999, Thierry Godefroy <godefroy@imaginet.fr>
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* This module contains functions to handle wildcard expansion and file name
+* conversion under QDOS.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <qdos.h>
+#include <string.h>
+#include <errno.h>
+#include "ctags.h"
+
+/* Translate the filenames from UNIX to QDOS conventions on open calls */
+int (*_Open) (const char *, int, ...) = qopen;
+
+long _stack = 24576; /* Plenty of stack space */
+long _memincr = 10240; /* Big increments to cut fragmentation */
+char _prog_name [] = "ctags";
+char _version [] = PROGRAM_VERSION;
+char _copyright [32] = __DATE__;
+char *_endmsg = "\nPress a key to exit.";
+int custom_expand (char * param, char ***argvptr, int *argcptr);
+int (*_cmdwildcard) () = custom_expand;
+
+
+struct WINDOWDEF _condetails = { 208, 1, 0, 7, 512, 256, 0, 0};
+void (*_consetup) () = consetup_title;
+
+/* custom cmdexpand: also expands directory names */
+
+#define FILEBUF_INIT 1024 /* Initial allocation size for buffer */
+#define FILEBUF_INCR 1024 /* Increment size for buffer */
+
+int custom_expand (char * param, char ***argvptr, int *argcptr)
+{
+ int count,sl;
+ size_t bufsize;
+ char *filenamebuf;
+ char *ptr,*safeptr;
+
+ /*
+ * Check to see if we should do wild card expansion.
+ * We only perform wildcard expansion if the parameter
+ * was not a string and if it contains one of the
+ * wild card characters.
+ *
+ * We also do not expand any option that starts with '-'
+ * as we then assume that it is a unix stylew option.
+ */
+ if ((*param == '-') || (strpbrk (param,"*?") == NULL) ) {
+ return 0;
+ }
+
+ if ((filenamebuf = malloc (bufsize = FILEBUF_INIT)) == NULL) {
+ return -1;
+ }
+TRYAGAIN:
+ count = getfnl (param, filenamebuf, bufsize, QDR_ALL);
+ if (count == -1 && errno == ENOMEM) {
+ /*
+ * We have overflowed the buffer, so we try
+ * to get a bigger buffer and try again.
+ */
+ bufsize += FILEBUF_INCR;
+ if ((filenamebuf = realloc (filenamebuf, bufsize)) == NULL) {
+ return -1;
+ } else {
+ goto TRYAGAIN;
+ }
+ }
+ /*
+ * If no files were found, then return unexpanded.
+ */
+ if (count == 0) {
+ free (filenamebuf);
+ return 0;
+ }
+ /*
+ * Files were found, so add these to the list instead
+ * of the original parameter typed by the user.
+ */
+ for ( ptr=filenamebuf ; count > 0 ; count -- ) {
+ *argvptr = (char **) realloc (*argvptr, (size_t) (((*argcptr) + 2) * sizeof (char *)));
+ safeptr= (char *) malloc ((size_t) (sl=strlen (ptr) + 1));
+ if (safeptr == NULL || *argvptr == NULL) {
+ return -1;
+ }
+ (void) memcpy (safeptr,ptr, (size_t) sl);
+ (*argvptr) [*argcptr] = safeptr;
+ *argcptr += 1;
+ ptr += sl;
+ }
+ free (filenamebuf);
+ return *argcptr;
+}
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/read.c b/read.c
new file mode 100644
index 0000000..7940c86
--- /dev/null
+++ b/read.c
@@ -0,0 +1,564 @@
+/*
+* $Id: read.c 708 2009-07-04 05:29:02Z dhiebert $
+*
+* Copyright (c) 1996-2002, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* This module contains low level source and tag file read functions (newline
+* conversion for source files are performed at this level).
+*/
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include <string.h>
+#include <ctype.h>
+
+#define FILE_WRITE
+#include "read.h"
+#include "debug.h"
+#include "entry.h"
+#include "main.h"
+#include "routines.h"
+#include "options.h"
+
+/*
+* DATA DEFINITIONS
+*/
+inputFile File; /* globally read through macros */
+static fpos_t StartOfLine; /* holds deferred position of start of line */
+
+/*
+* FUNCTION DEFINITIONS
+*/
+
+extern void freeSourceFileResources (void)
+{
+ if (File.name != NULL)
+ vStringDelete (File.name);
+ if (File.path != NULL)
+ vStringDelete (File.path);
+ if (File.source.name != NULL)
+ vStringDelete (File.source.name);
+ if (File.source.tagPath != NULL)
+ eFree (File.source.tagPath);
+ if (File.line != NULL)
+ vStringDelete (File.line);
+}
+
+/*
+ * Source file access functions
+ */
+
+static void setInputFileName (const char *const fileName)
+{
+ const char *const head = fileName;
+ const char *const tail = baseFilename (head);
+
+ if (File.name != NULL)
+ vStringDelete (File.name);
+ File.name = vStringNewInit (fileName);
+
+ if (File.path != NULL)
+ vStringDelete (File.path);
+ if (tail == head)
+ File.path = NULL;
+ else
+ {
+ const size_t length = tail - head - 1;
+ File.path = vStringNew ();
+ vStringNCopyS (File.path, fileName, length);
+ }
+}
+
+static void setSourceFileParameters (vString *const fileName)
+{
+ if (File.source.name != NULL)
+ vStringDelete (File.source.name);
+ File.source.name = fileName;
+
+ if (File.source.tagPath != NULL)
+ eFree (File.source.tagPath);
+ if (! Option.tagRelative || isAbsolutePath (vStringValue (fileName)))
+ File.source.tagPath = eStrdup (vStringValue (fileName));
+ else
+ File.source.tagPath =
+ relativeFilename (vStringValue (fileName), TagFile.directory);
+
+ if (vStringLength (fileName) > TagFile.max.file)
+ TagFile.max.file = vStringLength (fileName);
+
+ File.source.isHeader = isIncludeFile (vStringValue (fileName));
+ File.source.language = getFileLanguage (vStringValue (fileName));
+}
+
+static boolean setSourceFileName (vString *const fileName)
+{
+ boolean result = FALSE;
+ if (getFileLanguage (vStringValue (fileName)) != LANG_IGNORE)
+ {
+ vString *pathName;
+ if (isAbsolutePath (vStringValue (fileName)) || File.path == NULL)
+ pathName = vStringNewCopy (fileName);
+ else
+ pathName = combinePathAndFile (
+ vStringValue (File.path), vStringValue (fileName));
+ setSourceFileParameters (pathName);
+ result = TRUE;
+ }
+ return result;
+}
+
+/*
+ * Line directive parsing
+ */
+
+static int skipWhite (void)
+{
+ int c;
+ do
+ c = getc (File.fp);
+ while (c == ' ' || c == '\t');
+ return c;
+}
+
+static unsigned long readLineNumber (void)
+{
+ unsigned long lNum = 0;
+ int c = skipWhite ();
+ while (c != EOF && isdigit (c))
+ {
+ lNum = (lNum * 10) + (c - '0');
+ c = getc (File.fp);
+ }
+ ungetc (c, File.fp);
+ if (c != ' ' && c != '\t')
+ lNum = 0;
+
+ return lNum;
+}
+
+/* While ANSI only permits lines of the form:
+ * # line n "filename"
+ * Earlier compilers generated lines of the form
+ * # n filename
+ * GNU C will output lines of the form:
+ * # n "filename"
+ * So we need to be fairly flexible in what we accept.
+ */
+static vString *readFileName (void)
+{
+ vString *const fileName = vStringNew ();
+ boolean quoteDelimited = FALSE;
+ int c = skipWhite ();
+
+ if (c == '"')
+ {
+ c = getc (File.fp); /* skip double-quote */
+ quoteDelimited = TRUE;
+ }
+ while (c != EOF && c != '\n' &&
+ (quoteDelimited ? (c != '"') : (c != ' ' && c != '\t')))
+ {
+ vStringPut (fileName, c);
+ c = getc (File.fp);
+ }
+ if (c == '\n')
+ ungetc (c, File.fp);
+ vStringPut (fileName, '\0');
+
+ return fileName;
+}
+
+static boolean parseLineDirective (void)
+{
+ boolean result = FALSE;
+ int c = skipWhite ();
+ DebugStatement ( const char* lineStr = ""; )
+
+ if (isdigit (c))
+ {
+ ungetc (c, File.fp);
+ result = TRUE;
+ }
+ else if (c == 'l' && getc (File.fp) == 'i' &&
+ getc (File.fp) == 'n' && getc (File.fp) == 'e')
+ {
+ c = getc (File.fp);
+ if (c == ' ' || c == '\t')
+ {
+ DebugStatement ( lineStr = "line"; )
+ result = TRUE;
+ }
+ }
+ if (result)
+ {
+ const unsigned long lNum = readLineNumber ();
+ if (lNum == 0)
+ result = FALSE;
+ else
+ {
+ vString *const fileName = readFileName ();
+ if (vStringLength (fileName) == 0)
+ {
+ File.source.lineNumber = lNum - 1; /* applies to NEXT line */
+ DebugStatement ( debugPrintf (DEBUG_RAW, "#%s %ld", lineStr, lNum); )
+ }
+ else if (setSourceFileName (fileName))
+ {
+ File.source.lineNumber = lNum - 1; /* applies to NEXT line */
+ DebugStatement ( debugPrintf (DEBUG_RAW, "#%s %ld \"%s\"",
+ lineStr, lNum, vStringValue (fileName)); )
+ }
+
+ if (Option.include.fileNames && vStringLength (fileName) > 0 &&
+ lNum == 1)
+ {
+ tagEntryInfo tag;
+ initTagEntry (&tag, baseFilename (vStringValue (fileName)));
+
+ tag.isFileEntry = TRUE;
+ tag.lineNumberEntry = TRUE;
+ tag.lineNumber = 1;
+ tag.kindName = "file";
+ tag.kind = 'F';
+
+ makeTagEntry (&tag);
+ }
+ vStringDelete (fileName);
+ result = TRUE;
+ }
+ }
+ return result;
+}
+
+/*
+ * Source file I/O operations
+ */
+
+/* This function opens a source file, and resets the line counter. If it
+ * fails, it will display an error message and leave the File.fp set to NULL.
+ */
+extern boolean fileOpen (const char *const fileName, const langType language)
+{
+#ifdef VMS
+ const char *const openMode = "r";
+#else
+ const char *const openMode = "rb";
+#endif
+ boolean opened = FALSE;
+
+ /* If another file was already open, then close it.
+ */
+ if (File.fp != NULL)
+ {
+ fclose (File.fp); /* close any open source file */
+ File.fp = NULL;
+ }
+
+ File.fp = fopen (fileName, openMode);
+ if (File.fp == NULL)
+ error (WARNING | PERROR, "cannot open \"%s\"", fileName);
+ else
+ {
+ opened = TRUE;
+
+ setInputFileName (fileName);
+ fgetpos (File.fp, &StartOfLine);
+ fgetpos (File.fp, &File.filePosition);
+ File.currentLine = NULL;
+ File.language = language;
+ File.lineNumber = 0L;
+ File.eof = FALSE;
+ File.newLine = TRUE;
+
+ if (File.line != NULL)
+ vStringClear (File.line);
+
+ setSourceFileParameters (vStringNewInit (fileName));
+ File.source.lineNumber = 0L;
+
+ verbose ("OPENING %s as %s language %sfile\n", fileName,
+ getLanguageName (language),
+ File.source.isHeader ? "include " : "");
+ }
+ return opened;
+}
+
+extern void fileClose (void)
+{
+ if (File.fp != NULL)
+ {
+ /* The line count of the file is 1 too big, since it is one-based
+ * and is incremented upon each newline.
+ */
+ if (Option.printTotals)
+ {
+ fileStatus *status = eStat (vStringValue (File.name));
+ addTotals (0, File.lineNumber - 1L, status->size);
+ }
+ fclose (File.fp);
+ File.fp = NULL;
+ }
+}
+
+extern boolean fileEOF (void)
+{
+ return File.eof;
+}
+
+/* Action to take for each encountered source newline.
+ */
+static void fileNewline (void)
+{
+ File.filePosition = StartOfLine;
+ File.newLine = FALSE;
+ File.lineNumber++;
+ File.source.lineNumber++;
+ DebugStatement ( if (Option.breakLine == File.lineNumber) lineBreak (); )
+ DebugStatement ( debugPrintf (DEBUG_RAW, "%6ld: ", File.lineNumber); )
+}
+
+/* This function reads a single character from the stream, performing newline
+ * canonicalization.
+ */
+static int iFileGetc (void)
+{
+ int c;
+readnext:
+ c = getc (File.fp);
+
+ /* If previous character was a newline, then we're starting a line.
+ */
+ if (File.newLine && c != EOF)
+ {
+ fileNewline ();
+ if (c == '#' && Option.lineDirectives)
+ {
+ if (parseLineDirective ())
+ goto readnext;
+ else
+ {
+ fsetpos (File.fp, &StartOfLine);
+ c = getc (File.fp);
+ }
+ }
+ }
+
+ if (c == EOF)
+ File.eof = TRUE;
+ else if (c == NEWLINE)
+ {
+ File.newLine = TRUE;
+ fgetpos (File.fp, &StartOfLine);
+ }
+ else if (c == CRETURN)
+ {
+ /* Turn line breaks into a canonical form. The three commonly
+ * used forms if line breaks: LF (UNIX/Mac OS X), CR (Mac OS 9),
+ * and CR-LF (MS-DOS) are converted into a generic newline.
+ */
+#ifndef macintosh
+ const int next = getc (File.fp); /* is CR followed by LF? */
+ if (next != NEWLINE)
+ ungetc (next, File.fp);
+ else
+#endif
+ {
+ c = NEWLINE; /* convert CR into newline */
+ File.newLine = TRUE;
+ fgetpos (File.fp, &StartOfLine);
+ }
+ }
+ DebugStatement ( debugPutc (DEBUG_RAW, c); )
+ return c;
+}
+
+extern void fileUngetc (int c)
+{
+ File.ungetch = c;
+}
+
+static vString *iFileGetLine (void)
+{
+ vString *result = NULL;
+ int c;
+ if (File.line == NULL)
+ File.line = vStringNew ();
+ vStringClear (File.line);
+ do
+ {
+ c = iFileGetc ();
+ if (c != EOF)
+ vStringPut (File.line, c);
+ if (c == '\n' || (c == EOF && vStringLength (File.line) > 0))
+ {
+ vStringTerminate (File.line);
+#ifdef HAVE_REGEX
+ if (vStringLength (File.line) > 0)
+ matchRegex (File.line, File.source.language);
+#endif
+ result = File.line;
+ break;
+ }
+ } while (c != EOF);
+ Assert (result != NULL || File.eof);
+ return result;
+}
+
+/* Do not mix use of fileReadLine () and fileGetc () for the same file.
+ */
+extern int fileGetc (void)
+{
+ int c;
+
+ /* If there is an ungotten character, then return it. Don't do any
+ * other processing on it, though, because we already did that the
+ * first time it was read through fileGetc ().
+ */
+ if (File.ungetch != '\0')
+ {
+ c = File.ungetch;
+ File.ungetch = '\0';
+ return c; /* return here to avoid re-calling debugPutc () */
+ }
+ do
+ {
+ if (File.currentLine != NULL)
+ {
+ c = *File.currentLine++;
+ if (c == '\0')
+ File.currentLine = NULL;
+ }
+ else
+ {
+ vString* const line = iFileGetLine ();
+ if (line != NULL)
+ File.currentLine = (unsigned char*) vStringValue (line);
+ if (File.currentLine == NULL)
+ c = EOF;
+ else
+ c = '\0';
+ }
+ } while (c == '\0');
+ DebugStatement ( debugPutc (DEBUG_READ, c); )
+ return c;
+}
+
+extern int fileSkipToCharacter (int c)
+{
+ int d;
+ do
+ {
+ d = fileGetc ();
+ } while (d != EOF && d != c);
+ return d;
+}
+
+/* An alternative interface to fileGetc (). Do not mix use of fileReadLine()
+ * and fileGetc() for the same file. The returned string does not contain
+ * the terminating newline. A NULL return value means that all lines in the
+ * file have been read and we are at the end of file.
+ */
+extern const unsigned char *fileReadLine (void)
+{
+ vString* const line = iFileGetLine ();
+ const unsigned char* result = NULL;
+ if (line != NULL)
+ {
+ result = (const unsigned char*) vStringValue (line);
+ vStringStripNewline (line);
+ DebugStatement ( debugPrintf (DEBUG_READ, "%s\n", result); )
+ }
+ return result;
+}
+
+/*
+ * Source file line reading with automatic buffer sizing
+ */
+extern char *readLine (vString *const vLine, FILE *const fp)
+{
+ char *result = NULL;
+
+ vStringClear (vLine);
+ if (fp == NULL) /* to free memory allocated to buffer */
+ error (FATAL, "NULL file pointer");
+ else
+ {
+ boolean reReadLine;
+
+ /* If reading the line places any character other than a null or a
+ * newline at the last character position in the buffer (one less
+ * than the buffer size), then we must resize the buffer and
+ * reattempt to read the line.
+ */
+ do
+ {
+ char *const pLastChar = vStringValue (vLine) + vStringSize (vLine) -2;
+ fpos_t startOfLine;
+
+ fgetpos (fp, &startOfLine);
+ reReadLine = FALSE;
+ *pLastChar = '\0';
+ result = fgets (vStringValue (vLine), (int) vStringSize (vLine), fp);
+ if (result == NULL)
+ {
+ if (! feof (fp))
+ error (FATAL | PERROR, "Failure on attempt to read file");
+ }
+ else if (*pLastChar != '\0' &&
+ *pLastChar != '\n' && *pLastChar != '\r')
+ {
+ /* buffer overflow */
+ reReadLine = vStringAutoResize (vLine);
+ if (reReadLine)
+ fsetpos (fp, &startOfLine);
+ else
+ error (FATAL | PERROR, "input line too big; out of memory");
+ }
+ else
+ {
+ char* eol;
+ vStringSetLength (vLine);
+ /* canonicalize new line */
+ eol = vStringValue (vLine) + vStringLength (vLine) - 1;
+ if (*eol == '\r')
+ *eol = '\n';
+ else if (*(eol - 1) == '\r' && *eol == '\n')
+ {
+ *(eol - 1) = '\n';
+ *eol = '\0';
+ --vLine->length;
+ }
+ }
+ } while (reReadLine);
+ }
+ return result;
+}
+
+/* Places into the line buffer the contents of the line referenced by
+ * "location".
+ */
+extern char *readSourceLine (
+ vString *const vLine, fpos_t location, long *const pSeekValue)
+{
+ fpos_t orignalPosition;
+ char *result;
+
+ fgetpos (File.fp, &orignalPosition);
+ fsetpos (File.fp, &location);
+ if (pSeekValue != NULL)
+ *pSeekValue = ftell (File.fp);
+ result = readLine (vLine, File.fp);
+ if (result == NULL)
+ error (FATAL, "Unexpected end of file: %s", vStringValue (File.name));
+ fsetpos (File.fp, &orignalPosition);
+
+ return result;
+}
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/read.h b/read.h
new file mode 100644
index 0000000..ad49a82
--- /dev/null
+++ b/read.h
@@ -0,0 +1,116 @@
+/*
+* $Id: read.h 659 2008-04-20 23:27:48Z elliotth $
+*
+* Copyright (c) 1998-2002, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* External interface to read.c
+*/
+#ifndef _READ_H
+#define _READ_H
+
+#if defined(FILE_WRITE) || defined(VAXC)
+# define CONST_FILE
+#else
+# define CONST_FILE const
+#endif
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include <stdio.h>
+#include <ctype.h>
+
+#include "parse.h"
+#include "vstring.h"
+
+/*
+* MACROS
+*/
+#define getInputLineNumber() File.lineNumber
+#define getInputFileName() vStringValue (File.source.name)
+#define getInputFilePosition() File.filePosition
+#define getSourceFileName() vStringValue (File.source.name)
+#define getSourceFileTagPath() File.source.tagPath
+#define getSourceLanguage() File.source.language
+#define getSourceLanguageName() getLanguageName (File.source.language)
+#define getSourceLineNumber() File.source.lineNumber
+#define isLanguage(lang) (boolean)((lang) == File.source.language)
+#define isHeaderFile() File.source.isHeader
+
+/*
+* DATA DECLARATIONS
+*/
+
+enum eCharacters {
+ /* white space characters */
+ SPACE = ' ',
+ NEWLINE = '\n',
+ CRETURN = '\r',
+ FORMFEED = '\f',
+ TAB = '\t',
+ VTAB = '\v',
+
+ /* some hard to read characters */
+ DOUBLE_QUOTE = '"',
+ SINGLE_QUOTE = '\'',
+ BACKSLASH = '\\',
+
+ STRING_SYMBOL = ('S' + 0x80),
+ CHAR_SYMBOL = ('C' + 0x80)
+};
+
+/* Maintains the state of the current source file.
+ */
+typedef struct sInputFile {
+ vString *name; /* name of input file */
+ vString *path; /* path of input file (if any) */
+ vString *line; /* last line read from file */
+ const unsigned char* currentLine; /* current line being worked on */
+ FILE *fp; /* stream used for reading the file */
+ unsigned long lineNumber; /* line number in the input file */
+ fpos_t filePosition; /* file position of current line */
+ int ungetch; /* a single character that was ungotten */
+ boolean eof; /* have we reached the end of file? */
+ boolean newLine; /* will the next character begin a new line? */
+ langType language; /* language of input file */
+
+ /* Contains data pertaining to the original source file in which the tag
+ * was defined. This may be different from the input file when #line
+ * directives are processed (i.e. the input file is preprocessor output).
+ */
+ struct sSource {
+ vString *name; /* name to report for source file */
+ char *tagPath; /* path of source file relative to tag file */
+ unsigned long lineNumber;/* line number in the source file */
+ boolean isHeader; /* is source file a header file? */
+ langType language; /* language of source file */
+ } source;
+} inputFile;
+
+/*
+* GLOBAL VARIABLES
+*/
+extern CONST_FILE inputFile File;
+
+/*
+* FUNCTION PROTOTYPES
+*/
+extern void freeSourceFileResources (void);
+extern boolean fileOpen (const char *const fileName, const langType language);
+extern boolean fileEOF (void);
+extern void fileClose (void);
+extern int fileGetc (void);
+extern int fileSkipToCharacter (int c);
+extern void fileUngetc (int c);
+extern const unsigned char *fileReadLine (void);
+extern char *readLine (vString *const vLine, FILE *const fp);
+extern char *readSourceLine (vString *const vLine, fpos_t location, long *const pSeekValue);
+
+#endif /* _READ_H */
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/readtags.c b/readtags.c
new file mode 100644
index 0000000..86442d1
--- /dev/null
+++ b/readtags.c
@@ -0,0 +1,959 @@
+/*
+* $Id: readtags.c 592 2007-07-31 03:30:41Z dhiebert $
+*
+* Copyright (c) 1996-2003, Darren Hiebert
+*
+* This source code is released into the public domain.
+*
+* This module contains functions for reading tag files.
+*/
+
+/*
+* INCLUDE FILES
+*/
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdio.h>
+#include <errno.h>
+#include <sys/types.h> /* to declare off_t */
+
+#include "readtags.h"
+
+/*
+* MACROS
+*/
+#define TAB '\t'
+
+
+/*
+* DATA DECLARATIONS
+*/
+typedef struct {
+ size_t size;
+ char *buffer;
+} vstring;
+
+/* Information about current tag file */
+struct sTagFile {
+ /* has the file been opened and this structure initialized? */
+ short initialized;
+ /* format of tag file */
+ short format;
+ /* how is the tag file sorted? */
+ sortType sortMethod;
+ /* pointer to file structure */
+ FILE* fp;
+ /* file position of first character of `line' */
+ off_t pos;
+ /* size of tag file in seekable positions */
+ off_t size;
+ /* last line read */
+ vstring line;
+ /* name of tag in last line read */
+ vstring name;
+ /* defines tag search state */
+ struct {
+ /* file position of last match for tag */
+ off_t pos;
+ /* name of tag last searched for */
+ char *name;
+ /* length of name for partial matches */
+ size_t nameLength;
+ /* peforming partial match */
+ short partial;
+ /* ignoring case */
+ short ignorecase;
+ } search;
+ /* miscellaneous extension fields */
+ struct {
+ /* number of entries in `list' */
+ unsigned short max;
+ /* list of key value pairs */
+ tagExtensionField *list;
+ } fields;
+ /* buffers to be freed at close */
+ struct {
+ /* name of program author */
+ char *author;
+ /* name of program */
+ char *name;
+ /* URL of distribution */
+ char *url;
+ /* program version */
+ char *version;
+ } program;
+};
+
+/*
+* DATA DEFINITIONS
+*/
+const char *const EmptyString = "";
+const char *const PseudoTagPrefix = "!_";
+
+/*
+* FUNCTION DEFINITIONS
+*/
+
+/*
+ * Compare two strings, ignoring case.
+ * Return 0 for match, < 0 for smaller, > 0 for bigger
+ * Make sure case is folded to uppercase in comparison (like for 'sort -f')
+ * This makes a difference when one of the chars lies between upper and lower
+ * ie. one of the chars [ \ ] ^ _ ` for ascii. (The '_' in particular !)
+ */
+static int struppercmp (const char *s1, const char *s2)
+{
+ int result;
+ do
+ {
+ result = toupper ((int) *s1) - toupper ((int) *s2);
+ } while (result == 0 && *s1++ != '\0' && *s2++ != '\0');
+ return result;
+}
+
+static int strnuppercmp (const char *s1, const char *s2, size_t n)
+{
+ int result;
+ do
+ {
+ result = toupper ((int) *s1) - toupper ((int) *s2);
+ } while (result == 0 && --n > 0 && *s1++ != '\0' && *s2++ != '\0');
+ return result;
+}
+
+static int growString (vstring *s)
+{
+ int result = 0;
+ size_t newLength;
+ char *newLine;
+ if (s->size == 0)
+ {
+ newLength = 128;
+ newLine = (char*) malloc (newLength);
+ *newLine = '\0';
+ }
+ else
+ {
+ newLength = 2 * s->size;
+ newLine = (char*) realloc (s->buffer, newLength);
+ }
+ if (newLine == NULL)
+ perror ("string too large");
+ else
+ {
+ s->buffer = newLine;
+ s->size = newLength;
+ result = 1;
+ }
+ return result;
+}
+
+/* Copy name of tag out of tag line */
+static void copyName (tagFile *const file)
+{
+ size_t length;
+ const char *end = strchr (file->line.buffer, '\t');
+ if (end == NULL)
+ {
+ end = strchr (file->line.buffer, '\n');
+ if (end == NULL)
+ end = strchr (file->line.buffer, '\r');
+ }
+ if (end != NULL)
+ length = end - file->line.buffer;
+ else
+ length = strlen (file->line.buffer);
+ while (length >= file->name.size)
+ growString (&file->name);
+ strncpy (file->name.buffer, file->line.buffer, length);
+ file->name.buffer [length] = '\0';
+}
+
+static int readTagLineRaw (tagFile *const file)
+{
+ int result = 1;
+ int reReadLine;
+
+ /* If reading the line places any character other than a null or a
+ * newline at the last character position in the buffer (one less than
+ * the buffer size), then we must resize the buffer and reattempt to read
+ * the line.
+ */
+ do
+ {
+ char *const pLastChar = file->line.buffer + file->line.size - 2;
+ char *line;
+
+ file->pos = ftell (file->fp);
+ reReadLine = 0;
+ *pLastChar = '\0';
+ line = fgets (file->line.buffer, (int) file->line.size, file->fp);
+ if (line == NULL)
+ {
+ /* read error */
+ if (! feof (file->fp))
+ perror ("readTagLine");
+ result = 0;
+ }
+ else if (*pLastChar != '\0' &&
+ *pLastChar != '\n' && *pLastChar != '\r')
+ {
+ /* buffer overflow */
+ growString (&file->line);
+ fseek (file->fp, file->pos, SEEK_SET);
+ reReadLine = 1;
+ }
+ else
+ {
+ size_t i = strlen (file->line.buffer);
+ while (i > 0 &&
+ (file->line.buffer [i - 1] == '\n' || file->line.buffer [i - 1] == '\r'))
+ {
+ file->line.buffer [i - 1] = '\0';
+ --i;
+ }
+ }
+ } while (reReadLine && result);
+ if (result)
+ copyName (file);
+ return result;
+}
+
+static int readTagLine (tagFile *const file)
+{
+ int result;
+ do
+ {
+ result = readTagLineRaw (file);
+ } while (result && *file->name.buffer == '\0');
+ return result;
+}
+
+static tagResult growFields (tagFile *const file)
+{
+ tagResult result = TagFailure;
+ unsigned short newCount = (unsigned short) 2 * file->fields.max;
+ tagExtensionField *newFields = (tagExtensionField*)
+ realloc (file->fields.list, newCount * sizeof (tagExtensionField));
+ if (newFields == NULL)
+ perror ("too many extension fields");
+ else
+ {
+ file->fields.list = newFields;
+ file->fields.max = newCount;
+ result = TagSuccess;
+ }
+ return result;
+}
+
+static void parseExtensionFields (tagFile *const file, tagEntry *const entry,
+ char *const string)
+{
+ char *p = string;
+ while (p != NULL && *p != '\0')
+ {
+ while (*p == TAB)
+ *p++ = '\0';
+ if (*p != '\0')
+ {
+ char *colon;
+ char *field = p;
+ p = strchr (p, TAB);
+ if (p != NULL)
+ *p++ = '\0';
+ colon = strchr (field, ':');
+ if (colon == NULL)
+ entry->kind = field;
+ else
+ {
+ const char *key = field;
+ const char *value = colon + 1;
+ *colon = '\0';
+ if (strcmp (key, "kind") == 0)
+ entry->kind = value;
+ else if (strcmp (key, "file") == 0)
+ entry->fileScope = 1;
+ else if (strcmp (key, "line") == 0)
+ entry->address.lineNumber = atol (value);
+ else
+ {
+ if (entry->fields.count == file->fields.max)
+ growFields (file);
+ file->fields.list [entry->fields.count].key = key;
+ file->fields.list [entry->fields.count].value = value;
+ ++entry->fields.count;
+ }
+ }
+ }
+ }
+}
+
+static void parseTagLine (tagFile *file, tagEntry *const entry)
+{
+ int i;
+ char *p = file->line.buffer;
+ char *tab = strchr (p, TAB);
+
+ entry->fields.list = NULL;
+ entry->fields.count = 0;
+ entry->kind = NULL;
+ entry->fileScope = 0;
+
+ entry->name = p;
+ if (tab != NULL)
+ {
+ *tab = '\0';
+ p = tab + 1;
+ entry->file = p;
+ tab = strchr (p, TAB);
+ if (tab != NULL)
+ {
+ int fieldsPresent;
+ *tab = '\0';
+ p = tab + 1;
+ if (*p == '/' || *p == '?')
+ {
+ /* parse pattern */
+ int delimiter = *(unsigned char*) p;
+ entry->address.lineNumber = 0;
+ entry->address.pattern = p;
+ do
+ {
+ p = strchr (p + 1, delimiter);
+ } while (p != NULL && *(p - 1) == '\\');
+ if (p == NULL)
+ {
+ /* invalid pattern */
+ }
+ else
+ ++p;
+ }
+ else if (isdigit ((int) *(unsigned char*) p))
+ {
+ /* parse line number */
+ entry->address.pattern = p;
+ entry->address.lineNumber = atol (p);
+ while (isdigit ((int) *(unsigned char*) p))
+ ++p;
+ }
+ else
+ {
+ /* invalid pattern */
+ }
+ fieldsPresent = (strncmp (p, ";\"", 2) == 0);
+ *p = '\0';
+ if (fieldsPresent)
+ parseExtensionFields (file, entry, p + 2);
+ }
+ }
+ if (entry->fields.count > 0)
+ entry->fields.list = file->fields.list;
+ for (i = entry->fields.count ; i < file->fields.max ; ++i)
+ {
+ file->fields.list [i].key = NULL;
+ file->fields.list [i].value = NULL;
+ }
+}
+
+static char *duplicate (const char *str)
+{
+ char *result = NULL;
+ if (str != NULL)
+ {
+ result = strdup (str);
+ if (result == NULL)
+ perror (NULL);
+ }
+ return result;
+}
+
+static void readPseudoTags (tagFile *const file, tagFileInfo *const info)
+{
+ fpos_t startOfLine;
+ const size_t prefixLength = strlen (PseudoTagPrefix);
+ if (info != NULL)
+ {
+ info->file.format = 1;
+ info->file.sort = TAG_UNSORTED;
+ info->program.author = NULL;
+ info->program.name = NULL;
+ info->program.url = NULL;
+ info->program.version = NULL;
+ }
+ while (1)
+ {
+ fgetpos (file->fp, &startOfLine);
+ if (! readTagLine (file))
+ break;
+ if (strncmp (file->line.buffer, PseudoTagPrefix, prefixLength) != 0)
+ break;
+ else
+ {
+ tagEntry entry;
+ const char *key, *value;
+ parseTagLine (file, &entry);
+ key = entry.name + prefixLength;
+ value = entry.file;
+ if (strcmp (key, "TAG_FILE_SORTED") == 0)
+ file->sortMethod = (sortType) atoi (value);
+ else if (strcmp (key, "TAG_FILE_FORMAT") == 0)
+ file->format = (short) atoi (value);
+ else if (strcmp (key, "TAG_PROGRAM_AUTHOR") == 0)
+ file->program.author = duplicate (value);
+ else if (strcmp (key, "TAG_PROGRAM_NAME") == 0)
+ file->program.name = duplicate (value);
+ else if (strcmp (key, "TAG_PROGRAM_URL") == 0)
+ file->program.url = duplicate (value);
+ else if (strcmp (key, "TAG_PROGRAM_VERSION") == 0)
+ file->program.version = duplicate (value);
+ if (info != NULL)
+ {
+ info->file.format = file->format;
+ info->file.sort = file->sortMethod;
+ info->program.author = file->program.author;
+ info->program.name = file->program.name;
+ info->program.url = file->program.url;
+ info->program.version = file->program.version;
+ }
+ }
+ }
+ fsetpos (file->fp, &startOfLine);
+}
+
+static void gotoFirstLogicalTag (tagFile *const file)
+{
+ fpos_t startOfLine;
+ const size_t prefixLength = strlen (PseudoTagPrefix);
+ rewind (file->fp);
+ while (1)
+ {
+ fgetpos (file->fp, &startOfLine);
+ if (! readTagLine (file))
+ break;
+ if (strncmp (file->line.buffer, PseudoTagPrefix, prefixLength) != 0)
+ break;
+ }
+ fsetpos (file->fp, &startOfLine);
+}
+
+static tagFile *initialize (const char *const filePath, tagFileInfo *const info)
+{
+ tagFile *result = (tagFile*) calloc ((size_t) 1, sizeof (tagFile));
+ if (result != NULL)
+ {
+ growString (&result->line);
+ growString (&result->name);
+ result->fields.max = 20;
+ result->fields.list = (tagExtensionField*) calloc (
+ result->fields.max, sizeof (tagExtensionField));
+ result->fp = fopen (filePath, "r");
+ if (result->fp == NULL)
+ {
+ free (result);
+ result = NULL;
+ info->status.error_number = errno;
+ }
+ else
+ {
+ fseek (result->fp, 0, SEEK_END);
+ result->size = ftell (result->fp);
+ rewind (result->fp);
+ readPseudoTags (result, info);
+ info->status.opened = 1;
+ result->initialized = 1;
+ }
+ }
+ return result;
+}
+
+static void terminate (tagFile *const file)
+{
+ fclose (file->fp);
+
+ free (file->line.buffer);
+ free (file->name.buffer);
+ free (file->fields.list);
+
+ if (file->program.author != NULL)
+ free (file->program.author);
+ if (file->program.name != NULL)
+ free (file->program.name);
+ if (file->program.url != NULL)
+ free (file->program.url);
+ if (file->program.version != NULL)
+ free (file->program.version);
+ if (file->search.name != NULL)
+ free (file->search.name);
+
+ memset (file, 0, sizeof (tagFile));
+
+ free (file);
+}
+
+static tagResult readNext (tagFile *const file, tagEntry *const entry)
+{
+ tagResult result;
+ if (file == NULL || ! file->initialized)
+ result = TagFailure;
+ else if (! readTagLine (file))
+ result = TagFailure;
+ else
+ {
+ if (entry != NULL)
+ parseTagLine (file, entry);
+ result = TagSuccess;
+ }
+ return result;
+}
+
+static const char *readFieldValue (
+ const tagEntry *const entry, const char *const key)
+{
+ const char *result = NULL;
+ int i;
+ if (strcmp (key, "kind") == 0)
+ result = entry->kind;
+ else if (strcmp (key, "file") == 0)
+ result = EmptyString;
+ else for (i = 0 ; i < entry->fields.count && result == NULL ; ++i)
+ if (strcmp (entry->fields.list [i].key, key) == 0)
+ result = entry->fields.list [i].value;
+ return result;
+}
+
+static int readTagLineSeek (tagFile *const file, const off_t pos)
+{
+ int result = 0;
+ if (fseek (file->fp, pos, SEEK_SET) == 0)
+ {
+ result = readTagLine (file); /* read probable partial line */
+ if (pos > 0 && result)
+ result = readTagLine (file); /* read complete line */
+ }
+ return result;
+}
+
+static int nameComparison (tagFile *const file)
+{
+ int result;
+ if (file->search.ignorecase)
+ {
+ if (file->search.partial)
+ result = strnuppercmp (file->search.name, file->name.buffer,
+ file->search.nameLength);
+ else
+ result = struppercmp (file->search.name, file->name.buffer);
+ }
+ else
+ {
+ if (file->search.partial)
+ result = strncmp (file->search.name, file->name.buffer,
+ file->search.nameLength);
+ else
+ result = strcmp (file->search.name, file->name.buffer);
+ }
+ return result;
+}
+
+static void findFirstNonMatchBefore (tagFile *const file)
+{
+#define JUMP_BACK 512
+ int more_lines;
+ int comp;
+ off_t start = file->pos;
+ off_t pos = start;
+ do
+ {
+ if (pos < (off_t) JUMP_BACK)
+ pos = 0;
+ else
+ pos = pos - JUMP_BACK;
+ more_lines = readTagLineSeek (file, pos);
+ comp = nameComparison (file);
+ } while (more_lines && comp == 0 && pos > 0 && pos < start);
+}
+
+static tagResult findFirstMatchBefore (tagFile *const file)
+{
+ tagResult result = TagFailure;
+ int more_lines;
+ off_t start = file->pos;
+ findFirstNonMatchBefore (file);
+ do
+ {
+ more_lines = readTagLine (file);
+ if (nameComparison (file) == 0)
+ result = TagSuccess;
+ } while (more_lines && result != TagSuccess && file->pos < start);
+ return result;
+}
+
+static tagResult findBinary (tagFile *const file)
+{
+ tagResult result = TagFailure;
+ off_t lower_limit = 0;
+ off_t upper_limit = file->size;
+ off_t last_pos = 0;
+ off_t pos = upper_limit / 2;
+ while (result != TagSuccess)
+ {
+ if (! readTagLineSeek (file, pos))
+ {
+ /* in case we fell off end of file */
+ result = findFirstMatchBefore (file);
+ break;
+ }
+ else if (pos == last_pos)
+ {
+ /* prevent infinite loop if we backed up to beginning of file */
+ break;
+ }
+ else
+ {
+ const int comp = nameComparison (file);
+ last_pos = pos;
+ if (comp < 0)
+ {
+ upper_limit = pos;
+ pos = lower_limit + ((upper_limit - lower_limit) / 2);
+ }
+ else if (comp > 0)
+ {
+ lower_limit = pos;
+ pos = lower_limit + ((upper_limit - lower_limit) / 2);
+ }
+ else if (pos == 0)
+ result = TagSuccess;
+ else
+ result = findFirstMatchBefore (file);
+ }
+ }
+ return result;
+}
+
+static tagResult findSequential (tagFile *const file)
+{
+ tagResult result = TagFailure;
+ if (file->initialized)
+ {
+ while (result == TagFailure && readTagLine (file))
+ {
+ if (nameComparison (file) == 0)
+ result = TagSuccess;
+ }
+ }
+ return result;
+}
+
+static tagResult find (tagFile *const file, tagEntry *const entry,
+ const char *const name, const int options)
+{
+ tagResult result;
+ if (file->search.name != NULL)
+ free (file->search.name);
+ file->search.name = duplicate (name);
+ file->search.nameLength = strlen (name);
+ file->search.partial = (options & TAG_PARTIALMATCH) != 0;
+ file->search.ignorecase = (options & TAG_IGNORECASE) != 0;
+ fseek (file->fp, 0, SEEK_END);
+ file->size = ftell (file->fp);
+ rewind (file->fp);
+ if ((file->sortMethod == TAG_SORTED && !file->search.ignorecase) ||
+ (file->sortMethod == TAG_FOLDSORTED && file->search.ignorecase))
+ {
+#ifdef DEBUG
+ printf ("<performing binary search>\n");
+#endif
+ result = findBinary (file);
+ }
+ else
+ {
+#ifdef DEBUG
+ printf ("<performing sequential search>\n");
+#endif
+ result = findSequential (file);
+ }
+
+ if (result != TagSuccess)
+ file->search.pos = file->size;
+ else
+ {
+ file->search.pos = file->pos;
+ if (entry != NULL)
+ parseTagLine (file, entry);
+ }
+ return result;
+}
+
+static tagResult findNext (tagFile *const file, tagEntry *const entry)
+{
+ tagResult result;
+ if ((file->sortMethod == TAG_SORTED && !file->search.ignorecase) ||
+ (file->sortMethod == TAG_FOLDSORTED && file->search.ignorecase))
+ {
+ result = tagsNext (file, entry);
+ if (result == TagSuccess && nameComparison (file) != 0)
+ result = TagFailure;
+ }
+ else
+ {
+ result = findSequential (file);
+ if (result == TagSuccess && entry != NULL)
+ parseTagLine (file, entry);
+ }
+ return result;
+}
+
+/*
+* EXTERNAL INTERFACE
+*/
+
+extern tagFile *tagsOpen (const char *const filePath, tagFileInfo *const info)
+{
+ return initialize (filePath, info);
+}
+
+extern tagResult tagsSetSortType (tagFile *const file, const sortType type)
+{
+ tagResult result = TagFailure;
+ if (file != NULL && file->initialized)
+ {
+ file->sortMethod = type;
+ result = TagSuccess;
+ }
+ return result;
+}
+
+extern tagResult tagsFirst (tagFile *const file, tagEntry *const entry)
+{
+ tagResult result = TagFailure;
+ if (file != NULL && file->initialized)
+ {
+ gotoFirstLogicalTag (file);
+ result = readNext (file, entry);
+ }
+ return result;
+}
+
+extern tagResult tagsNext (tagFile *const file, tagEntry *const entry)
+{
+ tagResult result = TagFailure;
+ if (file != NULL && file->initialized)
+ result = readNext (file, entry);
+ return result;
+}
+
+extern const char *tagsField (const tagEntry *const entry, const char *const key)
+{
+ const char *result = NULL;
+ if (entry != NULL)
+ result = readFieldValue (entry, key);
+ return result;
+}
+
+extern tagResult tagsFind (tagFile *const file, tagEntry *const entry,
+ const char *const name, const int options)
+{
+ tagResult result = TagFailure;
+ if (file != NULL && file->initialized)
+ result = find (file, entry, name, options);
+ return result;
+}
+
+extern tagResult tagsFindNext (tagFile *const file, tagEntry *const entry)
+{
+ tagResult result = TagFailure;
+ if (file != NULL && file->initialized)
+ result = findNext (file, entry);
+ return result;
+}
+
+extern tagResult tagsClose (tagFile *const file)
+{
+ tagResult result = TagFailure;
+ if (file != NULL && file->initialized)
+ {
+ terminate (file);
+ result = TagSuccess;
+ }
+ return result;
+}
+
+/*
+* TEST FRAMEWORK
+*/
+
+#ifdef READTAGS_MAIN
+
+static const char *TagFileName = "tags";
+static const char *ProgramName;
+static int extensionFields;
+static int SortOverride;
+static sortType SortMethod;
+
+static void printTag (const tagEntry *entry)
+{
+ int i;
+ int first = 1;
+ const char* separator = ";\"";
+ const char* const empty = "";
+/* "sep" returns a value only the first time it is evaluated */
+#define sep (first ? (first = 0, separator) : empty)
+ printf ("%s\t%s\t%s",
+ entry->name, entry->file, entry->address.pattern);
+ if (extensionFields)
+ {
+ if (entry->kind != NULL && entry->kind [0] != '\0')
+ printf ("%s\tkind:%s", sep, entry->kind);
+ if (entry->fileScope)
+ printf ("%s\tfile:", sep);
+#if 0
+ if (entry->address.lineNumber > 0)
+ printf ("%s\tline:%lu", sep, entry->address.lineNumber);
+#endif
+ for (i = 0 ; i < entry->fields.count ; ++i)
+ printf ("%s\t%s:%s", sep, entry->fields.list [i].key,
+ entry->fields.list [i].value);
+ }
+ putchar ('\n');
+#undef sep
+}
+
+static void findTag (const char *const name, const int options)
+{
+ tagFileInfo info;
+ tagEntry entry;
+ tagFile *const file = tagsOpen (TagFileName, &info);
+ if (file == NULL)
+ {
+ fprintf (stderr, "%s: cannot open tag file: %s: %s\n",
+ ProgramName, strerror (info.status.error_number), name);
+ exit (1);
+ }
+ else
+ {
+ if (SortOverride)
+ tagsSetSortType (file, SortMethod);
+ if (tagsFind (file, &entry, name, options) == TagSuccess)
+ {
+ do
+ {
+ printTag (&entry);
+ } while (tagsFindNext (file, &entry) == TagSuccess);
+ }
+ tagsClose (file);
+ }
+}
+
+static void listTags (void)
+{
+ tagFileInfo info;
+ tagEntry entry;
+ tagFile *const file = tagsOpen (TagFileName, &info);
+ if (file == NULL)
+ {
+ fprintf (stderr, "%s: cannot open tag file: %s: %s\n",
+ ProgramName, strerror (info.status.error_number), TagFileName);
+ exit (1);
+ }
+ else
+ {
+ while (tagsNext (file, &entry) == TagSuccess)
+ printTag (&entry);
+ tagsClose (file);
+ }
+}
+
+const char *const Usage =
+ "Find tag file entries matching specified names.\n\n"
+ "Usage: %s [-ilp] [-s[0|1]] [-t file] [name(s)]\n\n"
+ "Options:\n"
+ " -e Include extension fields in output.\n"
+ " -i Perform case-insensitive matching.\n"
+ " -l List all tags.\n"
+ " -p Perform partial matching.\n"
+ " -s[0|1|2] Override sort detection of tag file.\n"
+ " -t file Use specified tag file (default: \"tags\").\n"
+ "Note that options are acted upon as encountered, so order is significant.\n";
+
+extern int main (int argc, char **argv)
+{
+ int options = 0;
+ int actionSupplied = 0;
+ int i;
+ ProgramName = argv [0];
+ if (argc == 1)
+ {
+ fprintf (stderr, Usage, ProgramName);
+ exit (1);
+ }
+ for (i = 1 ; i < argc ; ++i)
+ {
+ const char *const arg = argv [i];
+ if (arg [0] != '-')
+ {
+ findTag (arg, options);
+ actionSupplied = 1;
+ }
+ else
+ {
+ size_t j;
+ for (j = 1 ; arg [j] != '\0' ; ++j)
+ {
+ switch (arg [j])
+ {
+ case 'e': extensionFields = 1; break;
+ case 'i': options |= TAG_IGNORECASE; break;
+ case 'p': options |= TAG_PARTIALMATCH; break;
+ case 'l': listTags (); actionSupplied = 1; break;
+
+ case 't':
+ if (arg [j+1] != '\0')
+ {
+ TagFileName = arg + j + 1;
+ j += strlen (TagFileName);
+ }
+ else if (i + 1 < argc)
+ TagFileName = argv [++i];
+ else
+ {
+ fprintf (stderr, Usage, ProgramName);
+ exit (1);
+ }
+ break;
+ case 's':
+ SortOverride = 1;
+ ++j;
+ if (arg [j] == '\0')
+ SortMethod = TAG_SORTED;
+ else if (strchr ("012", arg[j]) != NULL)
+ SortMethod = (sortType) (arg[j] - '0');
+ else
+ {
+ fprintf (stderr, Usage, ProgramName);
+ exit (1);
+ }
+ break;
+ default:
+ fprintf (stderr, "%s: unknown option: %c\n",
+ ProgramName, arg[j]);
+ exit (1);
+ break;
+ }
+ }
+ }
+ }
+ if (! actionSupplied)
+ {
+ fprintf (stderr,
+ "%s: no action specified: specify tag name(s) or -l option\n",
+ ProgramName);
+ exit (1);
+ }
+ return 0;
+}
+
+#endif
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/readtags.h b/readtags.h
new file mode 100644
index 0000000..724f250
--- /dev/null
+++ b/readtags.h
@@ -0,0 +1,252 @@
+/*
+* $Id: readtags.h 443 2006-05-30 04:37:13Z darren $
+*
+* Copyright (c) 1996-2003, Darren Hiebert
+*
+* This source code is released for the public domain.
+*
+* This file defines the public interface for looking up tag entries in tag
+* files.
+*
+* The functions defined in this interface are intended to provide tag file
+* support to a software tool. The tag lookups provided are sufficiently fast
+* enough to permit opening a sorted tag file, searching for a matching tag,
+* then closing the tag file each time a tag is looked up (search times are
+* on the order of hundreths of a second, even for huge tag files). This is
+* the recommended use of this library for most tool applications. Adhering
+* to this approach permits a user to regenerate a tag file at will without
+* the tool needing to detect and resynchronize with changes to the tag file.
+* Even for an unsorted 24MB tag file, tag searches take about one second.
+*/
+#ifndef READTAGS_H
+#define READTAGS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+* MACROS
+*/
+
+/* Options for tagsSetSortType() */
+typedef enum {
+ TAG_UNSORTED, TAG_SORTED, TAG_FOLDSORTED
+} sortType ;
+
+/* Options for tagsFind() */
+#define TAG_FULLMATCH 0x0
+#define TAG_PARTIALMATCH 0x1
+
+#define TAG_OBSERVECASE 0x0
+#define TAG_IGNORECASE 0x2
+
+/*
+* DATA DECLARATIONS
+*/
+
+typedef enum { TagFailure = 0, TagSuccess = 1 } tagResult;
+
+struct sTagFile;
+
+typedef struct sTagFile tagFile;
+
+/* This structure contains information about the tag file. */
+typedef struct {
+
+ struct {
+ /* was the tag file successfully opened? */
+ int opened;
+
+ /* errno value when 'opened' is false */
+ int error_number;
+ } status;
+
+ /* information about the structure of the tag file */
+ struct {
+ /* format of tag file (1 = original, 2 = extended) */
+ short format;
+
+ /* how is the tag file sorted? */
+ sortType sort;
+ } file;
+
+
+ /* information about the program which created this tag file */
+ struct {
+ /* name of author of generating program (may be null) */
+ const char *author;
+
+ /* name of program (may be null) */
+ const char *name;
+
+ /* URL of distribution (may be null) */
+ const char *url;
+
+ /* program version (may be null) */
+ const char *version;
+ } program;
+
+} tagFileInfo;
+
+/* This structure contains information about an extension field for a tag.
+ * These exist at the end of the tag in the form "key:value").
+ */
+typedef struct {
+
+ /* the key of the extension field */
+ const char *key;
+
+ /* the value of the extension field (may be an empty string) */
+ const char *value;
+
+} tagExtensionField;
+
+/* This structure contains information about a specific tag. */
+typedef struct {
+
+ /* name of tag */
+ const char *name;
+
+ /* path of source file containing definition of tag */
+ const char *file;
+
+ /* address for locating tag in source file */
+ struct {
+ /* pattern for locating source line
+ * (may be NULL if not present) */
+ const char *pattern;
+
+ /* line number in source file of tag definition
+ * (may be zero if not known) */
+ unsigned long lineNumber;
+ } address;
+
+ /* kind of tag (may by name, character, or NULL if not known) */
+ const char *kind;
+
+ /* is tag of file-limited scope? */
+ short fileScope;
+
+ /* miscellaneous extension fields */
+ struct {
+ /* number of entries in `list' */
+ unsigned short count;
+
+ /* list of key value pairs */
+ tagExtensionField *list;
+ } fields;
+
+} tagEntry;
+
+
+/*
+* FUNCTION PROTOTYPES
+*/
+
+/*
+* This function must be called before calling other functions in this
+* library. It is passed the path to the tag file to read and a (possibly
+* null) pointer to a structure which, if not null, will be populated with
+* information about the tag file. If successful, the function will return a
+* handle which must be supplied to other calls to read information from the
+* tag file, and info.status.opened will be set to true. If unsuccessful,
+* info.status.opened will be set to false and info.status.error_number will
+* be set to the errno value representing the system error preventing the tag
+* file from being successfully opened.
+*/
+extern tagFile *tagsOpen (const char *const filePath, tagFileInfo *const info);
+
+/*
+* This function allows the client to override the normal automatic detection
+* of how a tag file is sorted. Permissible values for `type' are
+* TAG_UNSORTED, TAG_SORTED, TAG_FOLDSORTED. Tag files in the new extended
+* format contain a key indicating whether or not they are sorted. However,
+* tag files in the original format do not contain such a key even when
+* sorted, preventing this library from taking advantage of fast binary
+* lookups. If the client knows that such an unmarked tag file is indeed
+* sorted (or not), it can override the automatic detection. Note that
+* incorrect lookup results will result if a tag file is marked as sorted when
+* it actually is not. The function will return TagSuccess if called on an
+* open tag file or TagFailure if not.
+*/
+extern tagResult tagsSetSortType (tagFile *const file, const sortType type);
+
+/*
+* Reads the first tag in the file, if any. It is passed the handle to an
+* opened tag file and a (possibly null) pointer to a structure which, if not
+* null, will be populated with information about the first tag file entry.
+* The function will return TagSuccess another tag entry is found, or
+* TagFailure if not (i.e. it reached end of file).
+*/
+extern tagResult tagsFirst (tagFile *const file, tagEntry *const entry);
+
+/*
+* Step to the next tag in the file, if any. It is passed the handle to an
+* opened tag file and a (possibly null) pointer to a structure which, if not
+* null, will be populated with information about the next tag file entry. The
+* function will return TagSuccess another tag entry is found, or TagFailure
+* if not (i.e. it reached end of file). It will always read the first tag in
+* the file immediately after calling tagsOpen().
+*/
+extern tagResult tagsNext (tagFile *const file, tagEntry *const entry);
+
+/*
+* Retrieve the value associated with the extension field for a specified key.
+* It is passed a pointer to a structure already populated with values by a
+* previous call to tagsNext(), tagsFind(), or tagsFindNext(), and a string
+* containing the key of the desired extension field. If no such field of the
+* specified key exists, the function will return null.
+*/
+extern const char *tagsField (const tagEntry *const entry, const char *const key);
+
+/*
+* Find the first tag matching `name'. The structure pointed to by `entry'
+* will be populated with information about the tag file entry. If a tag file
+* is sorted using the C locale, a binary search algorithm is used to search
+* the tag file, resulting in very fast tag lookups, even in huge tag files.
+* Various options controlling the matches can be combined by bit-wise or-ing
+* certain values together. The available values are:
+*
+* TAG_PARTIALMATCH
+* Tags whose leading characters match `name' will qualify.
+*
+* TAG_FULLMATCH
+* Only tags whose full lengths match `name' will qualify.
+*
+* TAG_IGNORECASE
+* Matching will be performed in a case-insenstive manner. Note that
+* this disables binary searches of the tag file.
+*
+* TAG_OBSERVECASE
+* Matching will be performed in a case-senstive manner. Note that
+* this enables binary searches of the tag file.
+*
+* The function will return TagSuccess if a tag matching the name is found, or
+* TagFailure if not.
+*/
+extern tagResult tagsFind (tagFile *const file, tagEntry *const entry, const char *const name, const int options);
+
+/*
+* Find the next tag matching the name and options supplied to the most recent
+* call to tagsFind() for the same tag file. The structure pointed to by
+* `entry' will be populated with information about the tag file entry. The
+* function will return TagSuccess if another tag matching the name is found,
+* or TagFailure if not.
+*/
+extern tagResult tagsFindNext (tagFile *const file, tagEntry *const entry);
+
+/*
+* Call tagsTerminate() at completion of reading the tag file, which will
+* close the file and free any internal memory allocated. The function will
+* return TagFailure is no file is currently open, TagSuccess otherwise.
+*/
+extern tagResult tagsClose (tagFile *const file);
+
+#ifdef __cplusplus
+};
+#endif
+
+#endif
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/rexx.c b/rexx.c
new file mode 100644
index 0000000..cb90f56
--- /dev/null
+++ b/rexx.c
@@ -0,0 +1,39 @@
+/*
+* $Id: rexx.c 443 2006-05-30 04:37:13Z darren $
+*
+* Copyright (c) 2001-2003, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* This module contains functions for generating tags for the REXX language
+* (http://www.rexxla.org, http://www2.hursley.ibm.com/rexx).
+*/
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* always include first */
+#include "parse.h" /* always include */
+
+/*
+* FUNCTION DEFINITIONS
+*/
+
+static void installRexxRegex (const langType language)
+{
+ addTagRegex (language, "^([A-Za-z0-9@#$\\.!?_]+)[ \t]*:",
+ "\\1", "s,subroutine,subroutines", NULL);
+}
+
+extern parserDefinition* RexxParser (void)
+{
+ static const char *const extensions [] = { "cmd", "rexx", "rx", NULL };
+ parserDefinition* const def = parserNew ("REXX");
+ def->extensions = extensions;
+ def->initialize = installRexxRegex;
+ def->regex = TRUE;
+ return def;
+}
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/routines.c b/routines.c
new file mode 100644
index 0000000..83bcdcc
--- /dev/null
+++ b/routines.c
@@ -0,0 +1,891 @@
+/*
+* $Id: routines.c 536 2007-06-02 06:09:00Z elliotth $
+*
+* Copyright (c) 2002-2003, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* This module contains a lose assortment of shared functions.
+*/
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#ifdef HAVE_STDLIB_H
+# include <stdlib.h> /* to declare malloc (), realloc () */
+#endif
+#include <ctype.h>
+#include <string.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <stdio.h> /* to declare tempnam(), and SEEK_SET (hopefully) */
+
+#ifdef HAVE_FCNTL_H
+# include <fcntl.h> /* to declar O_RDWR, O_CREAT, O_EXCL */
+#endif
+#ifdef HAVE_UNISTD_H
+# include <unistd.h> /* to declare mkstemp () */
+#endif
+
+/* To declare "struct stat" and stat ().
+ */
+#if defined (HAVE_SYS_TYPES_H)
+# include <sys/types.h>
+#else
+# if defined (HAVE_TYPES_H)
+# include <types.h>
+# endif
+#endif
+#ifdef HAVE_SYS_STAT_H
+# include <sys/stat.h>
+#else
+# ifdef HAVE_STAT_H
+# include <stat.h>
+# endif
+#endif
+
+#ifdef HAVE_DOS_H
+# include <dos.h> /* to declare MAXPATH */
+#endif
+#ifdef HAVE_DIRECT_H
+# include <direct.h> /* to _getcwd */
+#endif
+#ifdef HAVE_DIR_H
+# include <dir.h> /* to declare findfirst() and findnext() */
+#endif
+#ifdef HAVE_IO_H
+# include <io.h> /* to declare open() */
+#endif
+#include "debug.h"
+#include "routines.h"
+
+/*
+* MACROS
+*/
+#ifndef TMPDIR
+# define TMPDIR "/tmp"
+#endif
+
+/* File type tests.
+ */
+#ifndef S_ISREG
+# if defined (S_IFREG) && ! defined (AMIGA)
+# define S_ISREG(mode) ((mode) & S_IFREG)
+# else
+# define S_ISREG(mode) TRUE /* assume regular file */
+# endif
+#endif
+
+#ifndef S_ISLNK
+# ifdef S_IFLNK
+# define S_ISLNK(mode) (((mode) & S_IFMT) == S_IFLNK)
+# else
+# define S_ISLNK(mode) FALSE /* assume no soft links */
+# endif
+#endif
+
+#ifndef S_ISDIR
+# ifdef S_IFDIR
+# define S_ISDIR(mode) (((mode) & S_IFMT) == S_IFDIR)
+# else
+# define S_ISDIR(mode) FALSE /* assume no soft links */
+# endif
+#endif
+
+#ifndef S_IFMT
+# define S_IFMT 0
+#endif
+
+#ifndef S_IXUSR
+# define S_IXUSR 0
+#endif
+#ifndef S_IXGRP
+# define S_IXGRP 0
+#endif
+#ifndef S_IXOTH
+# define S_IXOTH 0
+#endif
+
+#ifndef S_IRUSR
+# define S_IRUSR 0400
+#endif
+#ifndef S_IWUSR
+# define S_IWUSR 0200
+#endif
+
+#ifndef S_ISUID
+# define S_ISUID 0
+#endif
+
+/* Hack for rediculous practice of Microsoft Visual C++.
+ */
+#if defined (WIN32)
+# if defined (_MSC_VER)
+# define stat _stat
+# define getcwd _getcwd
+# define currentdrive() (_getdrive() + 'A' - 1)
+# define PATH_MAX _MAX_PATH
+# elif defined (__BORLANDC__)
+# define PATH_MAX MAXPATH
+# define currentdrive() (getdisk() + 'A')
+# elif defined (DJGPP)
+# define currentdrive() (getdisk() + 'A')
+# else
+# define currentdrive() 'C'
+# endif
+#endif
+
+#ifndef PATH_MAX
+# define PATH_MAX 256
+#endif
+
+/*
+ * Miscellaneous macros
+ */
+#define selected(var,feature) (((int)(var) & (int)(feature)) == (int)feature)
+
+/*
+* DATA DEFINITIONS
+*/
+#if defined (MSDOS_STYLE_PATH)
+const char *const PathDelimiters = ":/\\";
+#elif defined (VMS)
+const char *const PathDelimiters = ":]>";
+#endif
+
+char *CurrentDirectory;
+
+static const char *ExecutableProgram;
+static const char *ExecutableName;
+
+/*
+* FUNCTION PROTOTYPES
+*/
+#ifdef NEED_PROTO_STAT
+extern int stat (const char *, struct stat *);
+#endif
+#ifdef NEED_PROTO_LSTAT
+extern int lstat (const char *, struct stat *);
+#endif
+#if defined (MSDOS) || defined (WIN32) || defined (VMS) || defined (__EMX__) || defined (AMIGA)
+# define lstat(fn,buf) stat(fn,buf)
+#endif
+
+/*
+* FUNCTION DEFINITIONS
+*/
+
+extern void freeRoutineResources (void)
+{
+ if (CurrentDirectory != NULL)
+ eFree (CurrentDirectory);
+}
+
+extern void setExecutableName (const char *const path)
+{
+ ExecutableProgram = path;
+ ExecutableName = baseFilename (path);
+#ifdef VAXC
+{
+ /* remove filetype from executable name */
+ char *p = strrchr (ExecutableName, '.');
+ if (p != NULL)
+ *p = '\0';
+}
+#endif
+}
+
+extern const char *getExecutableName (void)
+{
+ return ExecutableName;
+}
+
+extern const char *getExecutablePath (void)
+{
+ return ExecutableProgram;
+}
+
+extern void error (
+ const errorSelection selection, const char *const format, ...)
+{
+ va_list ap;
+
+ va_start (ap, format);
+ fprintf (errout, "%s: %s", getExecutableName (),
+ selected (selection, WARNING) ? "Warning: " : "");
+ vfprintf (errout, format, ap);
+ if (selected (selection, PERROR))
+#ifdef HAVE_STRERROR
+ fprintf (errout, " : %s", strerror (errno));
+#else
+ perror (" ");
+#endif
+ fputs ("\n", errout);
+ va_end (ap);
+ if (selected (selection, FATAL))
+ exit (1);
+}
+
+/*
+ * Memory allocation functions
+ */
+
+extern void *eMalloc (const size_t size)
+{
+ void *buffer = malloc (size);
+
+ if (buffer == NULL)
+ error (FATAL, "out of memory");
+
+ return buffer;
+}
+
+extern void *eCalloc (const size_t count, const size_t size)
+{
+ void *buffer = calloc (count, size);
+
+ if (buffer == NULL)
+ error (FATAL, "out of memory");
+
+ return buffer;
+}
+
+extern void *eRealloc (void *const ptr, const size_t size)
+{
+ void *buffer;
+ if (ptr == NULL)
+ buffer = eMalloc (size);
+ else
+ {
+ buffer = realloc (ptr, size);
+ if (buffer == NULL)
+ error (FATAL, "out of memory");
+ }
+ return buffer;
+}
+
+extern void eFree (void *const ptr)
+{
+ Assert (ptr != NULL);
+ free (ptr);
+}
+
+/*
+ * String manipulation functions
+ */
+
+/*
+ * Compare two strings, ignoring case.
+ * Return 0 for match, < 0 for smaller, > 0 for bigger
+ * Make sure case is folded to uppercase in comparison (like for 'sort -f')
+ * This makes a difference when one of the chars lies between upper and lower
+ * ie. one of the chars [ \ ] ^ _ ` for ascii. (The '_' in particular !)
+ */
+extern int struppercmp (const char *s1, const char *s2)
+{
+ int result;
+ do
+ {
+ result = toupper ((int) *s1) - toupper ((int) *s2);
+ } while (result == 0 && *s1++ != '\0' && *s2++ != '\0');
+ return result;
+}
+
+extern int strnuppercmp (const char *s1, const char *s2, size_t n)
+{
+ int result;
+ do
+ {
+ result = toupper ((int) *s1) - toupper ((int) *s2);
+ } while (result == 0 && --n > 0 && *s1++ != '\0' && *s2++ != '\0');
+ return result;
+}
+
+#ifndef HAVE_STRSTR
+extern char* strstr (const char *str, const char *substr)
+{
+ const size_t length = strlen (substr);
+ const char *match = NULL;
+ const char *p;
+
+ for (p = str ; *p != '\0' && match == NULL ; ++p)
+ if (strncmp (p, substr, length) == 0)
+ match = p;
+ return (char*) match;
+}
+#endif
+
+extern char* eStrdup (const char* str)
+{
+ char* result = xMalloc (strlen (str) + 1, char);
+ strcpy (result, str);
+ return result;
+}
+
+extern void toLowerString (char* str)
+{
+ while (*str != '\0')
+ {
+ *str = tolower ((int) *str);
+ ++str;
+ }
+}
+
+extern void toUpperString (char* str)
+{
+ while (*str != '\0')
+ {
+ *str = toupper ((int) *str);
+ ++str;
+ }
+}
+
+/* Newly allocated string containing lower case conversion of a string.
+ */
+extern char* newLowerString (const char* str)
+{
+ char* const result = xMalloc (strlen (str) + 1, char);
+ int i = 0;
+ do
+ result [i] = tolower ((int) str [i]);
+ while (str [i++] != '\0');
+ return result;
+}
+
+/* Newly allocated string containing upper case conversion of a string.
+ */
+extern char* newUpperString (const char* str)
+{
+ char* const result = xMalloc (strlen (str) + 1, char);
+ int i = 0;
+ do
+ result [i] = toupper ((int) str [i]);
+ while (str [i++] != '\0');
+ return result;
+}
+
+/*
+ * File system functions
+ */
+
+extern void setCurrentDirectory (void)
+{
+#ifndef AMIGA
+ char* buf;
+#endif
+ if (CurrentDirectory == NULL)
+ CurrentDirectory = xMalloc ((size_t) (PATH_MAX + 1), char);
+#ifdef AMIGA
+ strcpy (CurrentDirectory, ".");
+#else
+ buf = getcwd (CurrentDirectory, PATH_MAX);
+ if (buf == NULL)
+ perror ("");
+#endif
+ if (CurrentDirectory [strlen (CurrentDirectory) - (size_t) 1] !=
+ PATH_SEPARATOR)
+ {
+ sprintf (CurrentDirectory + strlen (CurrentDirectory), "%c",
+ OUTPUT_PATH_SEPARATOR);
+ }
+}
+
+#ifdef AMIGA
+static boolean isAmigaDirectory (const char *const name)
+{
+ boolean result = FALSE;
+ struct FileInfoBlock *const fib = xMalloc (1, struct FileInfoBlock);
+ if (fib != NULL)
+ {
+ const BPTR flock = Lock ((UBYTE *) name, (long) ACCESS_READ);
+
+ if (flock != (BPTR) NULL)
+ {
+ if (Examine (flock, fib))
+ result = ((fib->fib_DirEntryType >= 0) ? TRUE : FALSE);
+ UnLock (flock);
+ }
+ eFree (fib);
+ }
+ return result;
+}
+#endif
+
+/* For caching of stat() calls */
+extern fileStatus *eStat (const char *const fileName)
+{
+ struct stat status;
+ static fileStatus file;
+ if (file.name == NULL || strcmp (fileName, file.name) != 0)
+ {
+ eStatFree (&file);
+ file.name = eStrdup (fileName);
+ if (lstat (file.name, &status) != 0)
+ file.exists = FALSE;
+ else
+ {
+ file.isSymbolicLink = (boolean) S_ISLNK (status.st_mode);
+ if (file.isSymbolicLink && stat (file.name, &status) != 0)
+ file.exists = FALSE;
+ else
+ {
+ file.exists = TRUE;
+#ifdef AMIGA
+ file.isDirectory = isAmigaDirectory (file.name);
+#else
+ file.isDirectory = (boolean) S_ISDIR (status.st_mode);
+#endif
+ file.isNormalFile = (boolean) (S_ISREG (status.st_mode));
+ file.isExecutable = (boolean) ((status.st_mode &
+ (S_IXUSR | S_IXGRP | S_IXOTH)) != 0);
+ file.isSetuid = (boolean) ((status.st_mode & S_ISUID) != 0);
+ file.size = status.st_size;
+ }
+ }
+ }
+ return &file;
+}
+
+extern void eStatFree (fileStatus *status)
+{
+ if (status->name != NULL)
+ {
+ eFree (status->name);
+ status->name = NULL;
+ }
+}
+
+extern boolean doesFileExist (const char *const fileName)
+{
+ fileStatus *status = eStat (fileName);
+ return status->exists;
+}
+
+extern boolean isRecursiveLink (const char* const dirName)
+{
+ boolean result = FALSE;
+ fileStatus *status = eStat (dirName);
+ if (status->isSymbolicLink)
+ {
+ char* const path = absoluteFilename (dirName);
+ while (path [strlen (path) - 1] == PATH_SEPARATOR)
+ path [strlen (path) - 1] = '\0';
+ while (! result && strlen (path) > (size_t) 1)
+ {
+ char *const separator = strrchr (path, PATH_SEPARATOR);
+ if (separator == NULL)
+ break;
+ else if (separator == path) /* backed up to root directory */
+ *(separator + 1) = '\0';
+ else
+ *separator = '\0';
+ result = isSameFile (path, dirName);
+ }
+ eFree (path);
+ }
+ return result;
+}
+
+#ifndef HAVE_FGETPOS
+
+extern int fgetpos (FILE *stream, fpos_t *pos)
+{
+ int result = 0;
+
+ *pos = ftell (stream);
+ if (*pos == -1L)
+ result = -1;
+
+ return result;
+}
+
+extern int fsetpos (FILE *stream, fpos_t const *pos)
+{
+ return fseek (stream, *pos, SEEK_SET);
+}
+
+#endif
+
+/*
+ * Pathname manipulation (O/S dependent!!!)
+ */
+
+static boolean isPathSeparator (const int c)
+{
+ boolean result;
+#if defined (MSDOS_STYLE_PATH) || defined (VMS)
+ result = (boolean) (strchr (PathDelimiters, c) != NULL);
+#else
+ result = (boolean) (c == PATH_SEPARATOR);
+#endif
+ return result;
+}
+
+#if ! defined (HAVE_STAT_ST_INO)
+
+static void canonicalizePath (char *const path __unused__)
+{
+#if defined (MSDOS_STYLE_PATH)
+ char *p;
+ for (p = path ; *p != '\0' ; ++p)
+ if (isPathSeparator (*p) && *p != ':')
+ *p = PATH_SEPARATOR;
+#endif
+}
+
+#endif
+
+extern boolean isSameFile (const char *const name1, const char *const name2)
+{
+ boolean result = FALSE;
+#if defined (HAVE_STAT_ST_INO)
+ struct stat stat1, stat2;
+
+ if (stat (name1, &stat1) == 0 && stat (name2, &stat2) == 0)
+ result = (boolean) (stat1.st_ino == stat2.st_ino);
+#else
+ {
+ char *const n1 = absoluteFilename (name1);
+ char *const n2 = absoluteFilename (name2);
+ canonicalizePath (n1);
+ canonicalizePath (n2);
+# if defined (CASE_INSENSITIVE_FILENAMES)
+ result = (boolean) (strcasecmp (n1, n2) == 0);
+#else
+ result = (boolean) (strcmp (n1, n2) == 0);
+#endif
+ free (n1);
+ free (n2);
+ }
+#endif
+ return result;
+}
+
+extern const char *baseFilename (const char *const filePath)
+{
+#if defined (MSDOS_STYLE_PATH) || defined (VMS)
+ const char *tail = NULL;
+ unsigned int i;
+
+ /* Find whichever of the path delimiters is last.
+ */
+ for (i = 0 ; i < strlen (PathDelimiters) ; ++i)
+ {
+ const char *sep = strrchr (filePath, PathDelimiters [i]);
+
+ if (sep > tail)
+ tail = sep;
+ }
+#else
+ const char *tail = strrchr (filePath, PATH_SEPARATOR);
+#endif
+ if (tail == NULL)
+ tail = filePath;
+ else
+ ++tail; /* step past last delimiter */
+#ifdef VAXC
+ {
+ /* remove version number from filename */
+ char *p = strrchr ((char *) tail, ';');
+ if (p != NULL)
+ *p = '\0';
+ }
+#endif
+
+ return tail;
+}
+
+extern const char *fileExtension (const char *const fileName)
+{
+ const char *extension;
+ const char *pDelimiter = NULL;
+ const char *const base = baseFilename (fileName);
+#ifdef QDOS
+ pDelimiter = strrchr (base, '_');
+#endif
+ if (pDelimiter == NULL)
+ pDelimiter = strrchr (base, '.');
+
+ if (pDelimiter == NULL)
+ extension = "";
+ else
+ extension = pDelimiter + 1; /* skip to first char of extension */
+
+ return extension;
+}
+
+extern boolean isAbsolutePath (const char *const path)
+{
+ boolean result = FALSE;
+#if defined (MSDOS_STYLE_PATH)
+ if (isPathSeparator (path [0]))
+ result = TRUE;
+ else if (isalpha (path [0]) && path [1] == ':')
+ {
+ if (isPathSeparator (path [2]))
+ result = TRUE;
+ else
+ /* We don't support non-absolute file names with a drive
+ * letter, like `d:NAME' (it's too much hassle).
+ */
+ error (FATAL,
+ "%s: relative file names with drive letters not supported",
+ path);
+ }
+#elif defined (VMS)
+ result = (boolean) (strchr (path, ':') != NULL);
+#else
+ result = isPathSeparator (path [0]);
+#endif
+ return result;
+}
+
+extern vString *combinePathAndFile (
+ const char *const path, const char *const file)
+{
+ vString *const filePath = vStringNew ();
+#ifdef VMS
+ const char *const directoryId = strstr (file, ".DIR;1");
+
+ if (directoryId == NULL)
+ {
+ const char *const versionId = strchr (file, ';');
+
+ vStringCopyS (filePath, path);
+ if (versionId == NULL)
+ vStringCatS (filePath, file);
+ else
+ vStringNCatS (filePath, file, versionId - file);
+ vStringCopyToLower (filePath, filePath);
+ }
+ else
+ {
+ /* File really is a directory; append it to the path.
+ * Gotcha: doesn't work with logical names.
+ */
+ vStringNCopyS (filePath, path, strlen (path) - 1);
+ vStringPut (filePath, '.');
+ vStringNCatS (filePath, file, directoryId - file);
+ if (strchr (path, '[') != NULL)
+ vStringPut (filePath, ']');
+ else
+ vStringPut (filePath, '>');
+ vStringTerminate (filePath);
+ }
+#else
+ const int lastChar = path [strlen (path) - 1];
+ boolean terminated = isPathSeparator (lastChar);
+
+ vStringCopyS (filePath, path);
+ if (! terminated)
+ {
+ vStringPut (filePath, OUTPUT_PATH_SEPARATOR);
+ vStringTerminate (filePath);
+ }
+ vStringCatS (filePath, file);
+#endif
+
+ return filePath;
+}
+
+/* Return a newly-allocated string whose contents concatenate those of
+ * s1, s2, s3.
+ * Routine adapted from Gnu etags.
+ */
+static char* concat (const char *s1, const char *s2, const char *s3)
+{
+ int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
+ char *result = xMalloc (len1 + len2 + len3 + 1, char);
+
+ strcpy (result, s1);
+ strcpy (result + len1, s2);
+ strcpy (result + len1 + len2, s3);
+ result [len1 + len2 + len3] = '\0';
+
+ return result;
+}
+
+/* Return a newly allocated string containing the absolute file name of FILE
+ * given CWD (which should end with a slash).
+ * Routine adapted from Gnu etags.
+ */
+extern char* absoluteFilename (const char *file)
+{
+ char *slashp, *cp;
+ char *res = NULL;
+ if (isAbsolutePath (file))
+ {
+#ifdef MSDOS_STYLE_PATH
+ if (file [1] == ':')
+ res = eStrdup (file);
+ else
+ {
+ char drive [3];
+ sprintf (drive, "%c:", currentdrive ());
+ res = concat (drive, file, "");
+ }
+#else
+ res = eStrdup (file);
+#endif
+ }
+ else
+ res = concat (CurrentDirectory, file, "");
+
+ /* Delete the "/dirname/.." and "/." substrings. */
+ slashp = strchr (res, PATH_SEPARATOR);
+ while (slashp != NULL && slashp [0] != '\0')
+ {
+ if (slashp[1] == '.')
+ {
+ if (slashp [2] == '.' &&
+ (slashp [3] == PATH_SEPARATOR || slashp [3] == '\0'))
+ {
+ cp = slashp;
+ do
+ cp--;
+ while (cp >= res && ! isAbsolutePath (cp));
+ if (cp < res)
+ cp = slashp;/* the absolute name begins with "/.." */
+#ifdef MSDOS_STYLE_PATH
+ /* Under MSDOS and NT we get `d:/NAME' as absolute file name,
+ * so the luser could say `d:/../NAME'. We silently treat this
+ * as `d:/NAME'.
+ */
+ else if (cp [0] != PATH_SEPARATOR)
+ cp = slashp;
+#endif
+ strcpy (cp, slashp + 3);
+ slashp = cp;
+ continue;
+ }
+ else if (slashp [2] == PATH_SEPARATOR || slashp [2] == '\0')
+ {
+ strcpy (slashp, slashp + 2);
+ continue;
+ }
+ }
+ slashp = strchr (slashp + 1, PATH_SEPARATOR);
+ }
+
+ if (res [0] == '\0')
+ return eStrdup ("/");
+ else
+ {
+#ifdef MSDOS_STYLE_PATH
+ /* Canonicalize drive letter case. */
+ if (res [1] == ':' && islower (res [0]))
+ res [0] = toupper (res [0]);
+#endif
+
+ return res;
+ }
+}
+
+/* Return a newly allocated string containing the absolute file name of dir
+ * where `file' resides given `CurrentDirectory'.
+ * Routine adapted from Gnu etags.
+ */
+extern char* absoluteDirname (char *file)
+{
+ char *slashp, *res;
+ char save;
+ slashp = strrchr (file, PATH_SEPARATOR);
+ if (slashp == NULL)
+ res = eStrdup (CurrentDirectory);
+ else
+ {
+ save = slashp [1];
+ slashp [1] = '\0';
+ res = absoluteFilename (file);
+ slashp [1] = save;
+ }
+ return res;
+}
+
+/* Return a newly allocated string containing the file name of FILE relative
+ * to the absolute directory DIR (which should end with a slash).
+ * Routine adapted from Gnu etags.
+ */
+extern char* relativeFilename (const char *file, const char *dir)
+{
+ const char *fp, *dp;
+ char *absdir, *res;
+ int i;
+
+ /* Find the common root of file and dir (with a trailing slash). */
+ absdir = absoluteFilename (file);
+ fp = absdir;
+ dp = dir;
+ while (*fp++ == *dp++)
+ continue;
+ fp--;
+ dp--; /* back to the first differing char */
+ do
+ { /* look at the equal chars until path sep */
+ if (fp == absdir)
+ return absdir; /* first char differs, give up */
+ fp--;
+ dp--;
+ } while (*fp != PATH_SEPARATOR);
+
+ /* Build a sequence of "../" strings for the resulting relative file name.
+ */
+ i = 0;
+ while ((dp = strchr (dp + 1, PATH_SEPARATOR)) != NULL)
+ i += 1;
+ res = xMalloc (3 * i + strlen (fp + 1) + 1, char);
+ res [0] = '\0';
+ while (i-- > 0)
+ strcat (res, "../");
+
+ /* Add the file name relative to the common root of file and dir. */
+ strcat (res, fp + 1);
+ free (absdir);
+
+ return res;
+}
+
+extern FILE *tempFile (const char *const mode, char **const pName)
+{
+ char *name;
+ FILE *fp;
+ int fd;
+#if defined(HAVE_MKSTEMP)
+ const char *const pattern = "tags.XXXXXX";
+ const char *tmpdir = NULL;
+ fileStatus *file = eStat (ExecutableProgram);
+ if (! file->isSetuid)
+ tmpdir = getenv ("TMPDIR");
+ if (tmpdir == NULL)
+ tmpdir = TMPDIR;
+ name = xMalloc (strlen (tmpdir) + 1 + strlen (pattern) + 1, char);
+ sprintf (name, "%s%c%s", tmpdir, OUTPUT_PATH_SEPARATOR, pattern);
+ fd = mkstemp (name);
+ eStatFree (file);
+#elif defined(HAVE_TEMPNAM)
+ name = tempnam (TMPDIR, "tags");
+ if (name == NULL)
+ error (FATAL | PERROR, "cannot allocate temporary file name");
+ fd = open (name, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
+#else
+ name = xMalloc (L_tmpnam, char);
+ if (tmpnam (name) != name)
+ error (FATAL | PERROR, "cannot assign temporary file name");
+ fd = open (name, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
+#endif
+ if (fd == -1)
+ error (FATAL | PERROR, "cannot open temporary file");
+ fp = fdopen (fd, mode);
+ if (fp == NULL)
+ error (FATAL | PERROR, "cannot open temporary file");
+ DebugStatement (
+ debugPrintf (DEBUG_STATUS, "opened temporary file %s\n", name); )
+ Assert (*pName == NULL);
+ *pName = name;
+ return fp;
+}
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/routines.h b/routines.h
new file mode 100644
index 0000000..c623e17
--- /dev/null
+++ b/routines.h
@@ -0,0 +1,134 @@
+/*
+* $Id: routines.h 536 2007-06-02 06:09:00Z elliotth $
+*
+* Copyright (c) 2002, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* External interface to routines.c
+*/
+#ifndef _ROUTINES_H
+#define _ROUTINES_H
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+/*
+* MACROS
+*/
+#define xMalloc(n,Type) (Type *)eMalloc((size_t)(n) * sizeof (Type))
+#define xCalloc(n,Type) (Type *)eCalloc((size_t)(n), sizeof (Type))
+#define xRealloc(p,n,Type) (Type *)eRealloc((p), (n) * sizeof (Type))
+
+/*
+ * Portability macros
+ */
+#ifndef PATH_SEPARATOR
+# if defined (MSDOS_STYLE_PATH)
+# define PATH_SEPARATOR '\\'
+# elif defined (QDOS)
+# define PATH_SEPARATOR '_'
+# else
+# define PATH_SEPARATOR '/'
+# endif
+#endif
+
+#if defined (MSDOS_STYLE_PATH) && defined (UNIX_PATH_SEPARATOR)
+# define OUTPUT_PATH_SEPARATOR '/'
+#else
+# define OUTPUT_PATH_SEPARATOR PATH_SEPARATOR
+#endif
+
+/*
+* DATA DECLARATIONS
+*/
+#if defined (MSDOS_STYLE_PATH) || defined (VMS)
+extern const char *const PathDelimiters;
+#endif
+extern char *CurrentDirectory;
+typedef int errorSelection;
+enum eErrorTypes { FATAL = 1, WARNING = 2, PERROR = 4 };
+
+typedef struct {
+ /* Name of file for which status is valid */
+ char* name;
+
+ /* Does file exist? If not, members below do not contain valid data. */
+ boolean exists;
+
+ /* is file path a symbolic link to another file? */
+ boolean isSymbolicLink;
+
+ /* Is file (pointed to) a directory? */
+ boolean isDirectory;
+
+ /* Is file (pointed to) a normal file? */
+ boolean isNormalFile;
+
+ /* Is file (pointed to) executable? */
+ boolean isExecutable;
+
+ /* Is file (pointed to) setuid? */
+ boolean isSetuid;
+
+ /* Size of file (pointed to) */
+ unsigned long size;
+} fileStatus;
+
+/*
+* FUNCTION PROTOTYPES
+*/
+extern void freeRoutineResources (void);
+extern void setExecutableName (const char *const path);
+extern const char *getExecutableName (void);
+extern const char *getExecutablePath (void);
+extern void error (const errorSelection selection, const char *const format, ...) __printf__ (2, 3);
+
+/* Memory allocation functions */
+#ifdef NEED_PROTO_MALLOC
+extern void *malloc (size_t);
+extern void *realloc (void *ptr, size_t);
+#endif
+extern void *eMalloc (const size_t size);
+extern void *eCalloc (const size_t count, const size_t size);
+extern void *eRealloc (void *const ptr, const size_t size);
+extern void eFree (void *const ptr);
+
+/* String manipulation functions */
+extern int struppercmp (const char *s1, const char *s2);
+extern int strnuppercmp (const char *s1, const char *s2, size_t n);
+#ifndef HAVE_STRSTR
+extern char* strstr (const char *str, const char *substr);
+#endif
+extern char* eStrdup (const char* str);
+extern void toLowerString (char* str);
+extern void toUpperString (char* str);
+extern char* newLowerString (const char* str);
+extern char* newUpperString (const char* str);
+
+/* File system functions */
+extern void setCurrentDirectory (void);
+extern fileStatus *eStat (const char *const fileName);
+extern void eStatFree (fileStatus *status);
+extern boolean doesFileExist (const char *const fileName);
+extern boolean isRecursiveLink (const char* const dirName);
+extern boolean isSameFile (const char *const name1, const char *const name2);
+#if defined(NEED_PROTO_FGETPOS)
+extern int fgetpos (FILE *stream, fpos_t *pos);
+extern int fsetpos (FILE *stream, fpos_t *pos);
+#endif
+extern const char *baseFilename (const char *const filePath);
+extern const char *fileExtension (const char *const fileName);
+extern boolean isAbsolutePath (const char *const path);
+extern vString *combinePathAndFile (const char *const path, const char *const file);
+extern char* absoluteFilename (const char *file);
+extern char* absoluteDirname (char *file);
+extern char* relativeFilename (const char *file, const char *dir);
+extern FILE *tempFile (const char *const mode, char **const pName);
+
+#endif /* _ROUTINES_H */
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/ruby.c b/ruby.c
new file mode 100644
index 0000000..8001ec7
--- /dev/null
+++ b/ruby.c
@@ -0,0 +1,408 @@
+/*
+* $Id: ruby.c 571 2007-06-24 23:32:14Z elliotth $
+*
+* Copyright (c) 2000-2001, Thaddeus Covert <sahuagin@mediaone.net>
+* Copyright (c) 2002 Matthias Veit <matthias_veit@yahoo.de>
+* Copyright (c) 2004 Elliott Hughes <enh@acm.org>
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* This module contains functions for generating tags for Ruby language
+* files.
+*/
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include <string.h>
+
+#include "entry.h"
+#include "parse.h"
+#include "read.h"
+#include "vstring.h"
+
+/*
+* DATA DECLARATIONS
+*/
+typedef enum {
+ K_UNDEFINED = -1, K_CLASS, K_METHOD, K_MODULE, K_SINGLETON
+} rubyKind;
+
+/*
+* DATA DEFINITIONS
+*/
+static kindOption RubyKinds [] = {
+ { TRUE, 'c', "class", "classes" },
+ { TRUE, 'f', "method", "methods" },
+ { TRUE, 'm', "module", "modules" },
+ { TRUE, 'F', "singleton method", "singleton methods" }
+};
+
+static stringList* nesting = 0;
+
+/*
+* FUNCTION DEFINITIONS
+*/
+
+/*
+* Returns a string describing the scope in 'list'.
+* We record the current scope as a list of entered scopes.
+* Scopes corresponding to 'if' statements and the like are
+* represented by empty strings. Scopes corresponding to
+* modules and classes are represented by the name of the
+* module or class.
+*/
+static vString* stringListToScope (const stringList* list)
+{
+ unsigned int i;
+ unsigned int chunks_output = 0;
+ vString* result = vStringNew ();
+ const unsigned int max = stringListCount (list);
+ for (i = 0; i < max; ++i)
+ {
+ vString* chunk = stringListItem (list, i);
+ if (vStringLength (chunk) > 0)
+ {
+ vStringCatS (result, (chunks_output++ > 0) ? "." : "");
+ vStringCatS (result, vStringValue (chunk));
+ }
+ }
+ return result;
+}
+
+/*
+* Attempts to advance 's' past 'literal'.
+* Returns TRUE if it did, FALSE (and leaves 's' where
+* it was) otherwise.
+*/
+static boolean canMatch (const unsigned char** s, const char* literal)
+{
+ const int literal_length = strlen (literal);
+ const unsigned char next_char = *(*s + literal_length);
+ if (strncmp ((const char*) *s, literal, literal_length) != 0)
+ {
+ return FALSE;
+ }
+ /* Additionally check that we're at the end of a token. */
+ if ( ! (next_char == 0 || isspace (next_char) || next_char == '('))
+ {
+ return FALSE;
+ }
+ *s += literal_length;
+ return TRUE;
+}
+
+/*
+* Attempts to advance 'cp' past a Ruby operator method name. Returns
+* TRUE if successful (and copies the name into 'name'), FALSE otherwise.
+*/
+static boolean parseRubyOperator (vString* name, const unsigned char** cp)
+{
+ static const char* RUBY_OPERATORS[] = {
+ "[]", "[]=",
+ "**",
+ "!", "~", "+@", "-@",
+ "*", "/", "%",
+ "+", "-",
+ ">>", "<<",
+ "&",
+ "^", "|",
+ "<=", "<", ">", ">=",
+ "<=>", "==", "===", "!=", "=~", "!~",
+ "`",
+ 0
+ };
+ int i;
+ for (i = 0; RUBY_OPERATORS[i] != 0; ++i)
+ {
+ if (canMatch (cp, RUBY_OPERATORS[i]))
+ {
+ vStringCatS (name, RUBY_OPERATORS[i]);
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+/*
+* Emits a tag for the given 'name' of kind 'kind' at the current nesting.
+*/
+static void emitRubyTag (vString* name, rubyKind kind)
+{
+ tagEntryInfo tag;
+ vString* scope;
+
+ vStringTerminate (name);
+ scope = stringListToScope (nesting);
+
+ initTagEntry (&tag, vStringValue (name));
+ if (vStringLength (scope) > 0) {
+ tag.extensionFields.scope [0] = "class";
+ tag.extensionFields.scope [1] = vStringValue (scope);
+ }
+ tag.kindName = RubyKinds [kind].name;
+ tag.kind = RubyKinds [kind].letter;
+ makeTagEntry (&tag);
+
+ stringListAdd (nesting, vStringNewCopy (name));
+
+ vStringClear (name);
+ vStringDelete (scope);
+}
+
+/* Tests whether 'ch' is a character in 'list'. */
+static boolean charIsIn (char ch, const char* list)
+{
+ return (strchr (list, ch) != 0);
+}
+
+/* Advances 'cp' over leading whitespace. */
+static void skipWhitespace (const unsigned char** cp)
+{
+ while (isspace (**cp))
+ {
+ ++*cp;
+ }
+}
+
+/*
+* Copies the characters forming an identifier from *cp into
+* name, leaving *cp pointing to the character after the identifier.
+*/
+static rubyKind parseIdentifier (
+ const unsigned char** cp, vString* name, rubyKind kind)
+{
+ /* Method names are slightly different to class and variable names.
+ * A method name may optionally end with a question mark, exclamation
+ * point or equals sign. These are all part of the name.
+ * A method name may also contain a period if it's a singleton method.
+ */
+ const char* also_ok = (kind == K_METHOD) ? "_.?!=" : "_";
+
+ skipWhitespace (cp);
+
+ /* Check for an anonymous (singleton) class such as "class << HTTP". */
+ if (kind == K_CLASS && **cp == '<' && *(*cp + 1) == '<')
+ {
+ return K_UNDEFINED;
+ }
+
+ /* Check for operators such as "def []=(key, val)". */
+ if (kind == K_METHOD || kind == K_SINGLETON)
+ {
+ if (parseRubyOperator (name, cp))
+ {
+ return kind;
+ }
+ }
+
+ /* Copy the identifier into 'name'. */
+ while (**cp != 0 && (isalnum (**cp) || charIsIn (**cp, also_ok)))
+ {
+ char last_char = **cp;
+
+ vStringPut (name, last_char);
+ ++*cp;
+
+ if (kind == K_METHOD)
+ {
+ /* Recognize singleton methods. */
+ if (last_char == '.')
+ {
+ vStringTerminate (name);
+ vStringClear (name);
+ return parseIdentifier (cp, name, K_SINGLETON);
+ }
+
+ /* Recognize characters which mark the end of a method name. */
+ if (charIsIn (last_char, "?!="))
+ {
+ break;
+ }
+ }
+ }
+ return kind;
+}
+
+static void readAndEmitTag (const unsigned char** cp, rubyKind expected_kind)
+{
+ if (isspace (**cp))
+ {
+ vString *name = vStringNew ();
+ rubyKind actual_kind = parseIdentifier (cp, name, expected_kind);
+
+ if (actual_kind == K_UNDEFINED || vStringLength (name) == 0)
+ {
+ /*
+ * What kind of tags should we create for code like this?
+ *
+ * %w(self.clfloor clfloor).each do |name|
+ * module_eval <<-"end;"
+ * def #{name}(x, y=1)
+ * q, r = x.divmod(y)
+ * q = q.to_i
+ * return q, r
+ * end
+ * end;
+ * end
+ *
+ * Or this?
+ *
+ * class << HTTP
+ *
+ * For now, we don't create any.
+ */
+ }
+ else
+ {
+ emitRubyTag (name, actual_kind);
+ }
+ vStringDelete (name);
+ }
+}
+
+static void enterUnnamedScope (void)
+{
+ stringListAdd (nesting, vStringNewInit (""));
+}
+
+static void findRubyTags (void)
+{
+ const unsigned char *line;
+ boolean inMultiLineComment = FALSE;
+
+ nesting = stringListNew ();
+
+ /* FIXME: this whole scheme is wrong, because Ruby isn't line-based.
+ * You could perfectly well write:
+ *
+ * def
+ * method
+ * puts("hello")
+ * end
+ *
+ * if you wished, and this function would fail to recognize anything.
+ */
+ while ((line = fileReadLine ()) != NULL)
+ {
+ const unsigned char *cp = line;
+
+ if (canMatch (&cp, "=begin"))
+ {
+ inMultiLineComment = TRUE;
+ continue;
+ }
+ if (canMatch (&cp, "=end"))
+ {
+ inMultiLineComment = FALSE;
+ continue;
+ }
+
+ skipWhitespace (&cp);
+
+ /* Avoid mistakenly starting a scope for modifiers such as
+ *
+ * return if <exp>
+ *
+ * FIXME: this is fooled by code such as
+ *
+ * result = if <exp>
+ * <a>
+ * else
+ * <b>
+ * end
+ *
+ * FIXME: we're also fooled if someone does something heinous such as
+ *
+ * puts("hello") \
+ * unless <exp>
+ */
+ if (canMatch (&cp, "case") || canMatch (&cp, "for") ||
+ canMatch (&cp, "if") || canMatch (&cp, "unless") ||
+ canMatch (&cp, "while"))
+ {
+ enterUnnamedScope ();
+ }
+
+ /*
+ * "module M", "class C" and "def m" should only be at the beginning
+ * of a line.
+ */
+ if (canMatch (&cp, "module"))
+ {
+ readAndEmitTag (&cp, K_MODULE);
+ }
+ else if (canMatch (&cp, "class"))
+ {
+ readAndEmitTag (&cp, K_CLASS);
+ }
+ else if (canMatch (&cp, "def"))
+ {
+ readAndEmitTag (&cp, K_METHOD);
+ }
+
+ while (*cp != '\0')
+ {
+ /* FIXME: we don't cope with here documents,
+ * or regular expression literals, or ... you get the idea.
+ * Hopefully, the restriction above that insists on seeing
+ * definitions at the starts of lines should keep us out of
+ * mischief.
+ */
+ if (inMultiLineComment || isspace (*cp))
+ {
+ ++cp;
+ }
+ else if (*cp == '#')
+ {
+ /* FIXME: this is wrong, but there *probably* won't be a
+ * definition after an interpolated string (where # doesn't
+ * mean 'comment').
+ */
+ break;
+ }
+ else if (canMatch (&cp, "begin") || canMatch (&cp, "do"))
+ {
+ enterUnnamedScope ();
+ }
+ else if (canMatch (&cp, "end") && stringListCount (nesting) > 0)
+ {
+ /* Leave the most recent scope. */
+ vStringDelete (stringListLast (nesting));
+ stringListRemoveLast (nesting);
+ }
+ else if (*cp == '"')
+ {
+ /* Skip string literals.
+ * FIXME: should cope with escapes and interpolation.
+ */
+ do {
+ ++cp;
+ } while (*cp != 0 && *cp != '"');
+ }
+ else if (*cp != '\0')
+ {
+ do
+ ++cp;
+ while (isalnum (*cp) || *cp == '_');
+ }
+ }
+ }
+ stringListDelete (nesting);
+}
+
+extern parserDefinition* RubyParser (void)
+{
+ static const char *const extensions [] = { "rb", "ruby", NULL };
+ parserDefinition* def = parserNew ("Ruby");
+ def->kinds = RubyKinds;
+ def->kindCount = KIND_COUNT (RubyKinds);
+ def->extensions = extensions;
+ def->parser = findRubyTags;
+ return def;
+}
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/scheme.c b/scheme.c
new file mode 100644
index 0000000..e7f61f4
--- /dev/null
+++ b/scheme.c
@@ -0,0 +1,111 @@
+/*
+* $Id: scheme.c 443 2006-05-30 04:37:13Z darren $
+*
+* Copyright (c) 2000-2002, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* This module contains functions for generating tags for Scheme language
+* files.
+*/
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include <string.h>
+
+#include "parse.h"
+#include "read.h"
+#include "vstring.h"
+
+/*
+* DATA DEFINITIONS
+*/
+typedef enum {
+ K_FUNCTION, K_SET
+} schemeKind;
+
+static kindOption SchemeKinds [] = {
+ { TRUE, 'f', "function", "functions" },
+ { TRUE, 's', "set", "sets" }
+};
+
+/*
+* FUNCTION DEFINITIONS
+*/
+
+/* Algorithm adapted from from GNU etags.
+ * Scheme tag functions
+ * look for (def... xyzzy
+ * look for (def... (xyzzy
+ * look for (def ... ((... (xyzzy ....
+ * look for (set! xyzzy
+ */
+static void readIdentifier (vString *const name, const unsigned char *cp)
+{
+ const unsigned char *p;
+ vStringClear (name);
+ /* Go till you get to white space or a syntactic break */
+ for (p = cp; *p != '\0' && *p != '(' && *p != ')' && !isspace (*p); p++)
+ vStringPut (name, (int) *p);
+ vStringTerminate (name);
+}
+
+static void findSchemeTags (void)
+{
+ vString *name = vStringNew ();
+ const unsigned char *line;
+
+ while ((line = fileReadLine ()) != NULL)
+ {
+ const unsigned char *cp = line;
+
+ if (cp [0] == '(' &&
+ (cp [1] == 'D' || cp [1] == 'd') &&
+ (cp [2] == 'E' || cp [2] == 'e') &&
+ (cp [3] == 'F' || cp [3] == 'f'))
+ {
+ while (!isspace (*cp))
+ cp++;
+ /* Skip over open parens and white space */
+ while (*cp != '\0' && (isspace (*cp) || *cp == '('))
+ cp++;
+ readIdentifier (name, cp);
+ makeSimpleTag (name, SchemeKinds, K_FUNCTION);
+ }
+ if (cp [0] == '(' &&
+ (cp [1] == 'S' || cp [1] == 's') &&
+ (cp [2] == 'E' || cp [2] == 'e') &&
+ (cp [3] == 'T' || cp [3] == 't') &&
+ (cp [4] == '!' || cp [4] == '!') &&
+ (isspace (cp [5])))
+ {
+ while (*cp != '\0' && !isspace (*cp))
+ cp++;
+ /* Skip over white space */
+ while (isspace (*cp))
+ cp++;
+ readIdentifier (name, cp);
+ makeSimpleTag (name, SchemeKinds, K_SET);
+ }
+ }
+ vStringDelete (name);
+}
+
+extern parserDefinition* SchemeParser (void)
+{
+ static const char *const extensions [] = {
+ "SCM", "SM", "sch", "scheme", "scm", "sm", NULL
+ };
+ parserDefinition* def = parserNew ("Scheme");
+ def->kinds = SchemeKinds;
+ def->kindCount = KIND_COUNT (SchemeKinds);
+ def->extensions = extensions;
+ def->parser = findSchemeTags;
+ return def;
+}
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/sh.c b/sh.c
new file mode 100644
index 0000000..440ed85
--- /dev/null
+++ b/sh.c
@@ -0,0 +1,115 @@
+/*
+* $Id: sh.c 443 2006-05-30 04:37:13Z darren $
+*
+* Copyright (c) 2000-2002, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* This module contains functions for generating tags for scripts for the
+* Bourne shell (and its derivatives, the Korn and Z shells).
+*/
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include <string.h>
+
+#include "parse.h"
+#include "read.h"
+#include "routines.h"
+#include "vstring.h"
+
+/*
+* DATA DEFINITIONS
+*/
+typedef enum {
+ K_FUNCTION
+} shKind;
+
+static kindOption ShKinds [] = {
+ { TRUE, 'f', "function", "functions"}
+};
+
+/*
+* FUNCTION DEFINITIONS
+*/
+
+/* Reject any tag "main" from a file named "configure". These appear in
+ * here-documents in GNU autoconf scripts and will add a haystack to the
+ * needle.
+ */
+static boolean hackReject (const vString* const tagName)
+{
+ const char *const scriptName = baseFilename (vStringValue (File.name));
+ boolean result = (boolean) (
+ strcmp (scriptName, "configure") == 0 &&
+ strcmp (vStringValue (tagName), "main") == 0);
+ return result;
+}
+
+static void findShTags (void)
+{
+ vString *name = vStringNew ();
+ const unsigned char *line;
+
+ while ((line = fileReadLine ()) != NULL)
+ {
+ const unsigned char* cp = line;
+ boolean functionFound = FALSE;
+
+ if (line [0] == '#')
+ continue;
+
+ while (isspace (*cp))
+ cp++;
+ if (strncmp ((const char*) cp, "function", (size_t) 8) == 0 &&
+ isspace ((int) cp [8]))
+ {
+ functionFound = TRUE;
+ cp += 8;
+ if (! isspace ((int) *cp))
+ continue;
+ while (isspace ((int) *cp))
+ ++cp;
+ }
+ if (! (isalnum ((int) *cp) || *cp == '_'))
+ continue;
+ while (isalnum ((int) *cp) || *cp == '_')
+ {
+ vStringPut (name, (int) *cp);
+ ++cp;
+ }
+ vStringTerminate (name);
+ while (isspace ((int) *cp))
+ ++cp;
+ if (*cp++ == '(')
+ {
+ while (isspace ((int) *cp))
+ ++cp;
+ if (*cp == ')' && ! hackReject (name))
+ functionFound = TRUE;
+ }
+ if (functionFound)
+ makeSimpleTag (name, ShKinds, K_FUNCTION);
+ vStringClear (name);
+ }
+ vStringDelete (name);
+}
+
+extern parserDefinition* ShParser (void)
+{
+ static const char *const extensions [] = {
+ "sh", "SH", "bsh", "bash", "ksh", "zsh", NULL
+ };
+ parserDefinition* def = parserNew ("Sh");
+ def->kinds = ShKinds;
+ def->kindCount = KIND_COUNT (ShKinds);
+ def->extensions = extensions;
+ def->parser = findShTags;
+ return def;
+}
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/slang.c b/slang.c
new file mode 100644
index 0000000..74c50c3
--- /dev/null
+++ b/slang.c
@@ -0,0 +1,41 @@
+/*
+ * $Id: slang.c 443 2006-05-30 04:37:13Z darren $
+ *
+ * Copyright (c) 2000-2001, Francesc Rocher
+ *
+ * Author: Francesc Rocher <f.rocher@computer.org>.
+ *
+ * This source code is released for free distribution under the terms of the
+ * GNU General Public License.
+ *
+ * This module contains functions for generating tags for S-Lang files.
+ */
+
+/*
+ * INCLUDE FILES
+ */
+#include "general.h" /* must always come first */
+#include "parse.h"
+
+/*
+ * FUNCTION DEFINITIONS
+ */
+static void installSlangRegex (const langType language)
+{
+ addTagRegex (language,
+ "^.*define[ \t]+([A-Z_][A-Z0-9_]*)[^;]*$",
+ "\\1", "f,function,functions", "i");
+ addTagRegex (language,
+ "^[ \t]*implements[ \t]+\\([ \t]*\"([^\"]*)\"[ \t]*\\)[ \t]*;",
+ "\\1", "n,namespace,namespaces", NULL);
+}
+
+extern parserDefinition* SlangParser (void)
+{
+ static const char *const extensions [] = { "sl", NULL };
+ parserDefinition* const def = parserNew ("SLang");
+ def->extensions = extensions;
+ def->initialize = installSlangRegex;
+ def->regex = TRUE;
+ return def;
+}
diff --git a/sml.c b/sml.c
new file mode 100644
index 0000000..9fbb21b
--- /dev/null
+++ b/sml.c
@@ -0,0 +1,212 @@
+/*
+* $Id: sml.c 536 2007-06-02 06:09:00Z elliotth $
+*
+* Copyright (c) 2002, Venkatesh Prasad Ranganath and Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* This module contains functions for generating tags for SML language files.
+*/
+
+/*
+ * INCLUDE FILES
+ */
+#include "general.h" /* must always come first */
+
+#include <string.h>
+
+#include "entry.h"
+#include "parse.h"
+#include "read.h"
+#include "vstring.h"
+
+/*
+ * DATA DECLARATIONS
+ */
+typedef enum {
+ K_AND = -2,
+ K_NONE = -1,
+ K_EXCEPTION,
+ K_FUNCTION,
+ K_FUNCTOR,
+ K_SIGNATURE,
+ K_STRUCTURE,
+ K_TYPE,
+ K_VAL
+} smlKind;
+
+/*
+ * DATA DEFINITIONS
+ */
+static kindOption SmlKinds[] = {
+ { TRUE, 'e', "exception", "exception declarations" },
+ { TRUE, 'f', "function", "function definitions" },
+ { TRUE, 'c', "functor", "functor definitions" },
+ { TRUE, 's', "signature", "signature declarations" },
+ { TRUE, 'r', "structure", "structure declarations" },
+ { TRUE, 't', "type", "type definitions" },
+ { TRUE, 'v', "value", "value bindings" }
+};
+
+static struct {
+ const char *keyword;
+ smlKind kind;
+} SmlKeywordTypes [] = {
+ { "abstype", K_TYPE },
+ { "and", K_AND },
+ { "datatype", K_TYPE },
+ { "exception", K_EXCEPTION },
+ { "functor", K_FUNCTOR },
+ { "fun", K_FUNCTION },
+ { "signature", K_SIGNATURE },
+ { "structure", K_STRUCTURE },
+ { "type", K_TYPE },
+ { "val", K_VAL }
+};
+
+static unsigned int CommentLevel = 0;
+
+/*
+ * FUNCTION DEFINITIONS
+ */
+
+static void makeSmlTag (smlKind type, vString *name)
+{
+ tagEntryInfo tag;
+ initTagEntry (&tag, vStringValue (name));
+ tag.kindName = SmlKinds [type].name;
+ tag.kind = SmlKinds [type].letter;
+ makeTagEntry (&tag);
+}
+
+static const unsigned char *skipSpace (const unsigned char *cp)
+{
+ while (isspace ((int) *cp))
+ ++cp;
+ return cp;
+}
+
+static boolean isIdentifier (int c)
+{
+ boolean result = FALSE;
+ /* Consider '_' as an delimiter to aid user in tracking it's usage. */
+ const char *const alternateIdentifiers = "!%&$#+-<>=/?@\\~'^|*_";
+ if (isalnum (c))
+ result = TRUE;
+ else if (c != '\0' && strchr (alternateIdentifiers, c) != NULL)
+ result = TRUE;
+ return result;
+}
+
+static const unsigned char *parseIdentifier (
+ const unsigned char *cp, vString *const identifier)
+{
+ boolean stringLit = FALSE;
+ vStringClear (identifier);
+ while (*cp != '\0' && (!isIdentifier ((int) *cp) || stringLit))
+ {
+ int oneback = *cp;
+ cp++;
+ if (oneback == '(' && *cp == '*' && stringLit == FALSE)
+ {
+ CommentLevel++;
+ return ++cp;
+ }
+ if (*cp == '"' && oneback != '\\')
+ {
+ stringLit = TRUE;
+ continue;
+ }
+ if (stringLit && *cp == '"' && oneback != '\\')
+ stringLit = FALSE;
+ }
+ if (strcmp ((const char *) cp, "") == 0 || cp == NULL)
+ return cp;
+
+ while (isIdentifier ((int) *cp))
+ {
+ vStringPut (identifier, (int) *cp);
+ cp++;
+ }
+ vStringTerminate (identifier);
+ return cp;
+}
+
+static smlKind findNextIdentifier (const unsigned char **cp)
+{
+ smlKind result = K_NONE;
+ vString *const identifier = vStringNew ();
+ unsigned int count = sizeof (SmlKeywordTypes) / sizeof (SmlKeywordTypes [0]);
+ unsigned int i;
+ *cp = parseIdentifier (*cp, identifier);
+ for (i = 0 ; i < count && result == K_NONE ; ++i)
+ {
+ const char *id = vStringValue (identifier);
+ if (strcmp (id, SmlKeywordTypes [i].keyword) == 0)
+ result = SmlKeywordTypes [i].kind;
+ }
+ vStringDelete (identifier);
+ return result;
+}
+
+static void findSmlTags (void)
+{
+ vString *const identifier = vStringNew ();
+ const unsigned char *line;
+ smlKind lastTag = K_NONE;
+
+ while ((line = fileReadLine ()) != NULL)
+ {
+ const unsigned char *cp = skipSpace (line);
+ do
+ {
+ smlKind foundTag;
+ if (CommentLevel != 0)
+ {
+ cp = (const unsigned char *) strstr ((const char *) cp, "*)");
+ if (cp == NULL)
+ continue;
+ else
+ {
+ --CommentLevel;
+ cp += 2;
+ }
+ }
+ foundTag = findNextIdentifier (&cp);
+ if (foundTag != K_NONE)
+ {
+ cp = skipSpace (cp);
+ cp = parseIdentifier (cp, identifier);
+ if (foundTag == K_AND)
+ makeSmlTag (lastTag, identifier);
+ else
+ {
+ makeSmlTag (foundTag, identifier);
+ lastTag = foundTag;
+ }
+ }
+ if (strstr ((const char *) cp, "(*") != NULL)
+ {
+ cp += 2;
+ cp = (const unsigned char *) strstr ((const char *) cp, "*)");
+ if (cp == NULL)
+ ++CommentLevel;
+ }
+ } while (cp != NULL && strcmp ((const char *) cp, "") != 0);
+ }
+ vStringDelete (identifier);
+}
+
+extern parserDefinition *SmlParser (void)
+{
+ static const char *const extensions[] = { "sml", "sig", NULL };
+ parserDefinition *def = parserNew ("SML");
+ def->kinds = SmlKinds;
+ def->kindCount = KIND_COUNT (SmlKinds);
+ def->extensions = extensions;
+ def->parser = findSmlTags;
+ return def;
+}
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/sort.c b/sort.c
new file mode 100644
index 0000000..09ba87a
--- /dev/null
+++ b/sort.c
@@ -0,0 +1,230 @@
+/*
+* $Id: sort.c 498 2007-02-17 22:43:15Z dhiebert $
+*
+* Copyright (c) 1996-2002, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* This module contains functions to sort the tag entries.
+*/
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#if defined (HAVE_STDLIB_H)
+# include <stdlib.h> /* to declare malloc () */
+#endif
+#include <string.h>
+#include <stdio.h>
+
+#include "debug.h"
+#include "entry.h"
+#include "options.h"
+#include "read.h"
+#include "routines.h"
+#include "sort.h"
+
+/*
+* FUNCTION DEFINITIONS
+*/
+
+extern void catFile (const char *const name)
+{
+ FILE *const fp = fopen (name, "r");
+
+ if (fp != NULL)
+ {
+ int c;
+ while ((c = getc (fp)) != EOF)
+ putchar (c);
+ fflush (stdout);
+ fclose (fp);
+ }
+}
+
+#ifdef EXTERNAL_SORT
+
+#ifdef NON_CONST_PUTENV_PROTOTYPE
+# define PE_CONST
+#else
+# define PE_CONST const
+#endif
+
+extern void externalSortTags (const boolean toStdout)
+{
+ const char *const sortNormalCommand = "sort -u -o";
+ const char *const sortFoldedCommand = "sort -u -f -o";
+ const char *sortCommand =
+ Option.sorted == SO_FOLDSORTED ? sortFoldedCommand : sortNormalCommand;
+ PE_CONST char *const sortOrder1 = "LC_COLLATE=C";
+ PE_CONST char *const sortOrder2 = "LC_ALL=C";
+ const size_t length = 4 + strlen (sortOrder1) + strlen (sortOrder2) +
+ strlen (sortCommand) + (2 * strlen (tagFileName ()));
+ char *const cmd = (char *) malloc (length + 1);
+ int ret = -1;
+
+ if (cmd != NULL)
+ {
+ /* Ensure ASCII value sort order.
+ */
+#ifdef HAVE_SETENV
+ setenv ("LC_COLLATE", "C", 1);
+ setenv ("LC_ALL", "C", 1);
+ sprintf (cmd, "%s %s %s", sortCommand, tagFileName (), tagFileName ());
+#else
+# ifdef HAVE_PUTENV
+ putenv (sortOrder1);
+ putenv (sortOrder2);
+ sprintf (cmd, "%s %s %s", sortCommand, tagFileName (), tagFileName ());
+# else
+ sprintf (cmd, "%s %s %s %s %s", sortOrder1, sortOrder2, sortCommand,
+ tagFileName (), tagFileName ());
+# endif
+#endif
+ verbose ("system (\"%s\")\n", cmd);
+ ret = system (cmd);
+ free (cmd);
+
+ }
+ if (ret != 0)
+ error (FATAL | PERROR, "cannot sort tag file");
+ else if (toStdout)
+ catFile (tagFileName ());
+}
+
+#else
+
+/*
+ * These functions provide a basic internal sort. No great memory
+ * optimization is performed (e.g. recursive subdivided sorts),
+ * so have lots of memory if you have large tag files.
+ */
+
+static void failedSort (FILE *const fp, const char* msg)
+{
+ const char* const cannotSort = "cannot sort tag file";
+ if (fp != NULL)
+ fclose (fp);
+ if (msg == NULL)
+ error (FATAL | PERROR, cannotSort);
+ else
+ error (FATAL, "%s: %s", msg, cannotSort);
+}
+
+static int compareTagsFolded(const void *const one, const void *const two)
+{
+ const char *const line1 = *(const char* const*) one;
+ const char *const line2 = *(const char* const*) two;
+
+ return struppercmp (line1, line2);
+}
+
+static int compareTags (const void *const one, const void *const two)
+{
+ const char *const line1 = *(const char* const*) one;
+ const char *const line2 = *(const char* const*) two;
+
+ return strcmp (line1, line2);
+}
+
+static void writeSortedTags (
+ char **const table, const size_t numTags, const boolean toStdout)
+{
+ FILE *fp;
+ size_t i;
+
+ /* Write the sorted lines back into the tag file.
+ */
+ if (toStdout)
+ fp = stdout;
+ else
+ {
+ fp = fopen (tagFileName (), "w");
+ if (fp == NULL)
+ failedSort (fp, NULL);
+ }
+ for (i = 0 ; i < numTags ; ++i)
+ {
+ /* Here we filter out identical tag *lines* (including search
+ * pattern) if this is not an xref file.
+ */
+ if (i == 0 || Option.xref || strcmp (table [i], table [i-1]) != 0)
+ if (fputs (table [i], fp) == EOF)
+ failedSort (fp, NULL);
+ }
+ if (toStdout)
+ fflush (fp);
+ else
+ fclose (fp);
+}
+
+extern void internalSortTags (const boolean toStdout)
+{
+ vString *vLine = vStringNew ();
+ FILE *fp = NULL;
+ const char *line;
+ size_t i;
+ int (*cmpFunc)(const void *, const void *);
+
+ /* Allocate a table of line pointers to be sorted.
+ */
+ size_t numTags = TagFile.numTags.added + TagFile.numTags.prev;
+ const size_t tableSize = numTags * sizeof (char *);
+ char **const table = (char **) malloc (tableSize); /* line pointers */
+ DebugStatement ( size_t mallocSize = tableSize; ) /* cumulative total */
+
+
+ cmpFunc = Option.sorted == SO_FOLDSORTED ? compareTagsFolded : compareTags;
+ if (table == NULL)
+ failedSort (fp, "out of memory");
+
+ /* Open the tag file and place its lines into allocated buffers.
+ */
+ fp = fopen (tagFileName (), "r");
+ if (fp == NULL)
+ failedSort (fp, NULL);
+ for (i = 0 ; i < numTags && ! feof (fp) ; )
+ {
+ line = readLine (vLine, fp);
+ if (line == NULL)
+ {
+ if (! feof (fp))
+ failedSort (fp, NULL);
+ break;
+ }
+ else if (*line == '\0' || strcmp (line, "\n") == 0)
+ ; /* ignore blank lines */
+ else
+ {
+ const size_t stringSize = strlen (line) + 1;
+
+ table [i] = (char *) malloc (stringSize);
+ if (table [i] == NULL)
+ failedSort (fp, "out of memory");
+ DebugStatement ( mallocSize += stringSize; )
+ strcpy (table [i], line);
+ ++i;
+ }
+ }
+ numTags = i;
+ fclose (fp);
+ vStringDelete (vLine);
+
+ /* Sort the lines.
+ */
+ qsort (table, numTags, sizeof (*table), cmpFunc);
+
+ writeSortedTags (table, numTags, toStdout);
+
+ PrintStatus (("sort memory: %ld bytes\n", (long) mallocSize));
+ for (i = 0 ; i < numTags ; ++i)
+ free (table [i]);
+ free (table);
+}
+
+#endif
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/sort.h b/sort.h
new file mode 100644
index 0000000..83d3273
--- /dev/null
+++ b/sort.h
@@ -0,0 +1,32 @@
+/*
+* $Id: sort.h 443 2006-05-30 04:37:13Z darren $
+*
+* Copyright (c) 1998-2002, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* External interface to sort.c
+*/
+#ifndef _SORT_H
+#define _SORT_H
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+/*
+* FUNCTION PROTOTYPES
+*/
+extern void catFile (const char *const name);
+
+#ifdef EXTERNAL_SORT
+extern void externalSortTags (const boolean toStdout);
+#else
+extern void internalSortTags (const boolean toStdout);
+#endif
+
+#endif /* _SORT_H */
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/source.mak b/source.mak
new file mode 100644
index 0000000..3e5f740
--- /dev/null
+++ b/source.mak
@@ -0,0 +1,122 @@
+# $Id: source.mak 720 2009-07-07 03:55:23Z dhiebert $
+#
+# Shared macros
+
+HEADERS = \
+ args.h ctags.h debug.h entry.h general.h get.h keyword.h \
+ main.h options.h parse.h parsers.h read.h routines.h sort.h \
+ strlist.h vstring.h
+
+SOURCES = \
+ args.c \
+ ant.c \
+ asm.c \
+ asp.c \
+ awk.c \
+ basic.c \
+ beta.c \
+ c.c \
+ cobol.c \
+ dosbatch.c \
+ eiffel.c \
+ entry.c \
+ erlang.c \
+ flex.c \
+ fortran.c \
+ get.c \
+ html.c \
+ jscript.c \
+ keyword.c \
+ lisp.c \
+ lregex.c \
+ lua.c \
+ main.c \
+ make.c \
+ matlab.c \
+ ocaml.c \
+ options.c \
+ parse.c \
+ pascal.c \
+ perl.c \
+ php.c \
+ python.c \
+ read.c \
+ rexx.c \
+ routines.c \
+ ruby.c \
+ scheme.c \
+ sh.c \
+ slang.c \
+ sml.c \
+ sort.c \
+ sql.c \
+ strlist.c \
+ tcl.c \
+ tex.c \
+ verilog.c \
+ vhdl.c \
+ vim.c \
+ yacc.c \
+ vstring.c
+
+ENVIRONMENT_HEADERS = \
+ e_amiga.h e_djgpp.h e_mac.h e_msoft.h e_os2.h e_qdos.h e_riscos.h e_vms.h
+
+ENVIRONMENT_SOURCES = \
+ argproc.c mac.c qdos.c
+
+REGEX_SOURCES = gnu_regex/regex.c
+
+REGEX_HEADERS = gnu_regex/regex.h
+
+OBJECTS = \
+ args.$(OBJEXT) \
+ ant.$(OBJEXT) \
+ asm.$(OBJEXT) \
+ asp.$(OBJEXT) \
+ awk.$(OBJEXT) \
+ basic.$(OBJEXT) \
+ beta.$(OBJEXT) \
+ c.$(OBJEXT) \
+ cobol.$(OBJEXT) \
+ dosbatch.$(OBJEXT) \
+ eiffel.$(OBJEXT) \
+ entry.$(OBJEXT) \
+ erlang.$(OBJEXT) \
+ flex.$(OBJEXT) \
+ fortran.$(OBJEXT) \
+ get.$(OBJEXT) \
+ html.$(OBJEXT) \
+ jscript.$(OBJEXT) \
+ keyword.$(OBJEXT) \
+ lisp.$(OBJEXT) \
+ lregex.$(OBJEXT) \
+ lua.$(OBJEXT) \
+ main.$(OBJEXT) \
+ make.$(OBJEXT) \
+ matlab.$(OBJEXT) \
+ ocaml.$(OBJEXT) \
+ options.$(OBJEXT) \
+ parse.$(OBJEXT) \
+ pascal.$(OBJEXT) \
+ perl.$(OBJEXT) \
+ php.$(OBJEXT) \
+ python.$(OBJEXT) \
+ read.$(OBJEXT) \
+ rexx.$(OBJEXT) \
+ routines.$(OBJEXT) \
+ ruby.$(OBJEXT) \
+ scheme.$(OBJEXT) \
+ sh.$(OBJEXT) \
+ slang.$(OBJEXT) \
+ sml.$(OBJEXT) \
+ sort.$(OBJEXT) \
+ sql.$(OBJEXT) \
+ strlist.$(OBJEXT) \
+ tcl.$(OBJEXT) \
+ tex.$(OBJEXT) \
+ verilog.$(OBJEXT) \
+ vhdl.$(OBJEXT) \
+ vim.$(OBJEXT) \
+ yacc.$(OBJEXT) \
+ vstring.$(OBJEXT)
diff --git a/sql.c b/sql.c
new file mode 100644
index 0000000..efe7e5d
--- /dev/null
+++ b/sql.c
@@ -0,0 +1,2112 @@
+/*
+ * $Id: sql.c 703 2009-03-14 22:06:12Z dfishburn $
+ *
+ * Copyright (c) 2002-2003, Darren Hiebert
+ *
+ * This source code is released for free distribution under the terms of the
+ * GNU General Public License.
+ *
+ * This module contains functions for generating tags for PL/SQL language
+ * files.
+ */
+
+/*
+ * INCLUDE FILES
+ */
+#include "general.h" /* must always come first */
+
+#include <ctype.h> /* to define isalpha () */
+#include <setjmp.h>
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+#include "debug.h"
+#include "entry.h"
+#include "keyword.h"
+#include "parse.h"
+#include "read.h"
+#include "routines.h"
+#include "vstring.h"
+
+/*
+ * On-line "Oracle Database PL/SQL Language Reference":
+ * http://download.oracle.com/docs/cd/B28359_01/appdev.111/b28370/toc.htm
+ *
+ * Sample PL/SQL code is available from:
+ * http://www.orafaq.com/faqscrpt.htm#GENPLSQL
+ *
+ * On-line SQL Anywhere Documentation
+ * http://www.ianywhere.com/developer/product_manuals/sqlanywhere/index.html
+ */
+
+/*
+ * MACROS
+ */
+#define isType(token,t) (boolean) ((token)->type == (t))
+#define isKeyword(token,k) (boolean) ((token)->keyword == (k))
+
+/*
+ * DATA DECLARATIONS
+ */
+
+typedef enum eException { ExceptionNone, ExceptionEOF } exception_t;
+
+/*
+ * Used to specify type of keyword.
+ */
+typedef enum eKeywordId {
+ KEYWORD_NONE = -1,
+ KEYWORD_is,
+ KEYWORD_begin,
+ KEYWORD_body,
+ KEYWORD_cursor,
+ KEYWORD_declare,
+ KEYWORD_end,
+ KEYWORD_function,
+ KEYWORD_if,
+ KEYWORD_loop,
+ KEYWORD_case,
+ KEYWORD_for,
+ KEYWORD_call,
+ KEYWORD_package,
+ KEYWORD_pragma,
+ KEYWORD_procedure,
+ KEYWORD_record,
+ KEYWORD_object,
+ KEYWORD_ref,
+ KEYWORD_rem,
+ KEYWORD_return,
+ KEYWORD_returns,
+ KEYWORD_subtype,
+ KEYWORD_table,
+ KEYWORD_trigger,
+ KEYWORD_type,
+ KEYWORD_index,
+ KEYWORD_event,
+ KEYWORD_publication,
+ KEYWORD_service,
+ KEYWORD_domain,
+ KEYWORD_datatype,
+ KEYWORD_result,
+ KEYWORD_url,
+ KEYWORD_internal,
+ KEYWORD_external,
+ KEYWORD_when,
+ KEYWORD_then,
+ KEYWORD_variable,
+ KEYWORD_exception,
+ KEYWORD_at,
+ KEYWORD_on,
+ KEYWORD_primary,
+ KEYWORD_references,
+ KEYWORD_unique,
+ KEYWORD_check,
+ KEYWORD_constraint,
+ KEYWORD_foreign,
+ KEYWORD_ml_table,
+ KEYWORD_ml_table_lang,
+ KEYWORD_ml_table_dnet,
+ KEYWORD_ml_table_java,
+ KEYWORD_ml_table_chk,
+ KEYWORD_ml_conn,
+ KEYWORD_ml_conn_lang,
+ KEYWORD_ml_conn_dnet,
+ KEYWORD_ml_conn_java,
+ KEYWORD_ml_conn_chk,
+ KEYWORD_local,
+ KEYWORD_temporary,
+ KEYWORD_drop,
+ KEYWORD_view,
+ KEYWORD_synonym,
+ KEYWORD_handler,
+ KEYWORD_comment,
+ KEYWORD_create,
+ KEYWORD_go
+} keywordId;
+
+/*
+ * Used to determine whether keyword is valid for the token language and
+ * what its ID is.
+ */
+typedef struct sKeywordDesc {
+ const char *name;
+ keywordId id;
+} keywordDesc;
+
+typedef enum eTokenType {
+ TOKEN_UNDEFINED,
+ TOKEN_BLOCK_LABEL_BEGIN,
+ TOKEN_BLOCK_LABEL_END,
+ TOKEN_CHARACTER,
+ TOKEN_CLOSE_PAREN,
+ TOKEN_SEMICOLON,
+ TOKEN_COMMA,
+ TOKEN_IDENTIFIER,
+ TOKEN_KEYWORD,
+ TOKEN_OPEN_PAREN,
+ TOKEN_OPERATOR,
+ TOKEN_OTHER,
+ TOKEN_STRING,
+ TOKEN_PERIOD,
+ TOKEN_OPEN_CURLY,
+ TOKEN_CLOSE_CURLY,
+ TOKEN_OPEN_SQUARE,
+ TOKEN_CLOSE_SQUARE,
+ TOKEN_TILDE,
+ TOKEN_FORWARD_SLASH
+} tokenType;
+
+typedef struct sTokenInfoSQL {
+ tokenType type;
+ keywordId keyword;
+ vString * string;
+ vString * scope;
+ int begin_end_nest_lvl;
+ unsigned long lineNumber;
+ fpos_t filePosition;
+} tokenInfo;
+
+/*
+ * DATA DEFINITIONS
+ */
+
+static langType Lang_sql;
+
+static jmp_buf Exception;
+
+typedef enum {
+ SQLTAG_CURSOR,
+ SQLTAG_PROTOTYPE,
+ SQLTAG_FUNCTION,
+ SQLTAG_FIELD,
+ SQLTAG_LOCAL_VARIABLE,
+ SQLTAG_BLOCK_LABEL,
+ SQLTAG_PACKAGE,
+ SQLTAG_PROCEDURE,
+ SQLTAG_RECORD,
+ SQLTAG_SUBTYPE,
+ SQLTAG_TABLE,
+ SQLTAG_TRIGGER,
+ SQLTAG_VARIABLE,
+ SQLTAG_INDEX,
+ SQLTAG_EVENT,
+ SQLTAG_PUBLICATION,
+ SQLTAG_SERVICE,
+ SQLTAG_DOMAIN,
+ SQLTAG_VIEW,
+ SQLTAG_SYNONYM,
+ SQLTAG_MLTABLE,
+ SQLTAG_MLCONN,
+ SQLTAG_COUNT
+} sqlKind;
+
+static kindOption SqlKinds [] = {
+ { TRUE, 'c', "cursor", "cursors" },
+ { FALSE, 'd', "prototype", "prototypes" },
+ { TRUE, 'f', "function", "functions" },
+ { TRUE, 'F', "field", "record fields" },
+ { FALSE, 'l', "local", "local variables" },
+ { TRUE, 'L', "label", "block label" },
+ { TRUE, 'P', "package", "packages" },
+ { TRUE, 'p', "procedure", "procedures" },
+ { FALSE, 'r', "record", "records" },
+ { TRUE, 's', "subtype", "subtypes" },
+ { TRUE, 't', "table", "tables" },
+ { TRUE, 'T', "trigger", "triggers" },
+ { TRUE, 'v', "variable", "variables" },
+ { TRUE, 'i', "index", "indexes" },
+ { TRUE, 'e', "event", "events" },
+ { TRUE, 'U', "publication", "publications" },
+ { TRUE, 'R', "service", "services" },
+ { TRUE, 'D', "domain", "domains" },
+ { TRUE, 'V', "view", "views" },
+ { TRUE, 'n', "synonym", "synonyms" },
+ { TRUE, 'x', "mltable", "MobiLink Table Scripts" },
+ { TRUE, 'y', "mlconn", "MobiLink Conn Scripts" }
+};
+
+static const keywordDesc SqlKeywordTable [] = {
+ /* keyword keyword ID */
+ { "as", KEYWORD_is },
+ { "is", KEYWORD_is },
+ { "begin", KEYWORD_begin },
+ { "body", KEYWORD_body },
+ { "cursor", KEYWORD_cursor },
+ { "declare", KEYWORD_declare },
+ { "end", KEYWORD_end },
+ { "function", KEYWORD_function },
+ { "if", KEYWORD_if },
+ { "loop", KEYWORD_loop },
+ { "case", KEYWORD_case },
+ { "for", KEYWORD_for },
+ { "call", KEYWORD_call },
+ { "package", KEYWORD_package },
+ { "pragma", KEYWORD_pragma },
+ { "procedure", KEYWORD_procedure },
+ { "record", KEYWORD_record },
+ { "object", KEYWORD_object },
+ { "ref", KEYWORD_ref },
+ { "rem", KEYWORD_rem },
+ { "return", KEYWORD_return },
+ { "returns", KEYWORD_returns },
+ { "subtype", KEYWORD_subtype },
+ { "table", KEYWORD_table },
+ { "trigger", KEYWORD_trigger },
+ { "type", KEYWORD_type },
+ { "index", KEYWORD_index },
+ { "event", KEYWORD_event },
+ { "publication", KEYWORD_publication },
+ { "service", KEYWORD_service },
+ { "domain", KEYWORD_domain },
+ { "datatype", KEYWORD_datatype },
+ { "result", KEYWORD_result },
+ { "url", KEYWORD_url },
+ { "internal", KEYWORD_internal },
+ { "external", KEYWORD_external },
+ { "when", KEYWORD_when },
+ { "then", KEYWORD_then },
+ { "variable", KEYWORD_variable },
+ { "exception", KEYWORD_exception },
+ { "at", KEYWORD_at },
+ { "on", KEYWORD_on },
+ { "primary", KEYWORD_primary },
+ { "references", KEYWORD_references },
+ { "unique", KEYWORD_unique },
+ { "check", KEYWORD_check },
+ { "constraint", KEYWORD_constraint },
+ { "foreign", KEYWORD_foreign },
+ { "ml_add_table_script", KEYWORD_ml_table },
+ { "ml_add_lang_table_script", KEYWORD_ml_table_lang },
+ { "ml_add_dnet_table_script", KEYWORD_ml_table_dnet },
+ { "ml_add_java_table_script", KEYWORD_ml_table_java },
+ { "ml_add_lang_table_script_chk", KEYWORD_ml_table_chk },
+ { "ml_add_connection_script", KEYWORD_ml_conn },
+ { "ml_add_lang_connection_script", KEYWORD_ml_conn_lang },
+ { "ml_add_dnet_connection_script", KEYWORD_ml_conn_dnet },
+ { "ml_add_java_connection_script", KEYWORD_ml_conn_java },
+ { "ml_add_lang_conn_script_chk", KEYWORD_ml_conn_chk },
+ { "local", KEYWORD_local },
+ { "temporary", KEYWORD_temporary },
+ { "drop", KEYWORD_drop },
+ { "view", KEYWORD_view },
+ { "synonym", KEYWORD_synonym },
+ { "handler", KEYWORD_handler },
+ { "comment", KEYWORD_comment },
+ { "create", KEYWORD_create },
+ { "go", KEYWORD_go }
+};
+
+/*
+ * FUNCTION DECLARATIONS
+ */
+
+/* Recursive calls */
+static void parseBlock (tokenInfo *const token, const boolean local);
+static void parseKeywords (tokenInfo *const token);
+static void parseSqlFile (tokenInfo *const token);
+
+/*
+ * FUNCTION DEFINITIONS
+ */
+
+static boolean isIdentChar1 (const int c)
+{
+ /*
+ * Other databases are less restrictive on the first character of
+ * an identifier.
+ * isIdentChar1 is used to identify the first character of an
+ * identifier, so we are removing some restrictions.
+ */
+ return (boolean)
+ (isalpha (c) || c == '@' || c == '_' );
+}
+
+static boolean isIdentChar (const int c)
+{
+ return (boolean)
+ (isalpha (c) || isdigit (c) || c == '$' ||
+ c == '@' || c == '_' || c == '#');
+}
+
+static boolean isCmdTerm (tokenInfo *const token)
+{
+ DebugStatement (
+ debugPrintf (DEBUG_PARSE
+ , "\n isCmdTerm: token same tt:%d tk:%d\n"
+ , token->type
+ , token->keyword
+ );
+ );
+
+ /*
+ * Based on the various customer sites I have been at
+ * the most common command delimiters are
+ * ;
+ * ~
+ * /
+ * go
+ * This routine will check for any of these, more
+ * can easily be added by modifying readToken and
+ * either adding the character to:
+ * enum eTokenType
+ * enum eTokenType
+ */
+ return ( isType (token, TOKEN_SEMICOLON) ||
+ isType (token, TOKEN_TILDE) ||
+ isType (token, TOKEN_FORWARD_SLASH) ||
+ isKeyword (token, KEYWORD_go)
+ );
+}
+
+static boolean isMatchedEnd(tokenInfo *const token, int nest_lvl)
+{
+ boolean terminated = FALSE;
+ /*
+ * Since different forms of SQL allow the use of
+ * BEGIN
+ * ...
+ * END
+ * blocks, some statements may not be terminated using
+ * the standard delimiters:
+ * ;
+ * ~
+ * /
+ * go
+ * This routine will check to see if we encounter and END
+ * for the matching nest level of BEGIN ... END statements.
+ * If we find one, then we can assume, the statement was terminated
+ * since we have fallen through to the END statement of the BEGIN
+ * block.
+ */
+ if ( nest_lvl > 0 && isKeyword (token, KEYWORD_end) )
+ {
+ if ( token->begin_end_nest_lvl == nest_lvl )
+ terminated = TRUE;
+ }
+
+ return terminated;
+}
+
+static void buildSqlKeywordHash (void)
+{
+ const size_t count = sizeof (SqlKeywordTable) /
+ sizeof (SqlKeywordTable [0]);
+ size_t i;
+ for (i = 0 ; i < count ; ++i)
+ {
+ const keywordDesc* const p = &SqlKeywordTable [i];
+ addKeyword (p->name, Lang_sql, (int) p->id);
+ }
+}
+
+static tokenInfo *newToken (void)
+{
+ tokenInfo *const token = xMalloc (1, tokenInfo);
+
+ token->type = TOKEN_UNDEFINED;
+ token->keyword = KEYWORD_NONE;
+ token->string = vStringNew ();
+ token->scope = vStringNew ();
+ token->begin_end_nest_lvl = 0;
+ token->lineNumber = getSourceLineNumber ();
+ token->filePosition = getInputFilePosition ();
+
+ return token;
+}
+
+static void deleteToken (tokenInfo *const token)
+{
+ vStringDelete (token->string);
+ vStringDelete (token->scope);
+ eFree (token);
+}
+
+/*
+ * Tag generation functions
+ */
+
+static void makeConstTag (tokenInfo *const token, const sqlKind kind)
+{
+ if (SqlKinds [kind].enabled)
+ {
+ const char *const name = vStringValue (token->string);
+ tagEntryInfo e;
+ initTagEntry (&e, name);
+
+ e.lineNumber = token->lineNumber;
+ e.filePosition = token->filePosition;
+ e.kindName = SqlKinds [kind].name;
+ e.kind = SqlKinds [kind].letter;
+
+ makeTagEntry (&e);
+ }
+}
+
+static void makeSqlTag (tokenInfo *const token, const sqlKind kind)
+{
+ vString * fulltag;
+
+ if (SqlKinds [kind].enabled)
+ {
+ /*
+ * If a scope has been added to the token, change the token
+ * string to include the scope when making the tag.
+ */
+ if ( vStringLength(token->scope) > 0 )
+ {
+ fulltag = vStringNew ();
+ vStringCopy(fulltag, token->scope);
+ vStringCatS (fulltag, ".");
+ vStringCatS (fulltag, vStringValue(token->string));
+ vStringTerminate(fulltag);
+ vStringCopy(token->string, fulltag);
+ vStringDelete (fulltag);
+ }
+ makeConstTag (token, kind);
+ }
+}
+
+/*
+ * Parsing functions
+ */
+
+static void parseString (vString *const string, const int delimiter)
+{
+ boolean end = FALSE;
+ while (! end)
+ {
+ int c = fileGetc ();
+ if (c == EOF)
+ end = TRUE;
+ /*
+ else if (c == '\\')
+ {
+ c = fileGetc(); // This maybe a ' or ". //
+ vStringPut(string, c);
+ }
+ */
+ else if (c == delimiter)
+ end = TRUE;
+ else
+ vStringPut (string, c);
+ }
+ vStringTerminate (string);
+}
+
+/* Read a C identifier beginning with "firstChar" and places it into "name".
+*/
+static void parseIdentifier (vString *const string, const int firstChar)
+{
+ int c = firstChar;
+ Assert (isIdentChar1 (c));
+ do
+ {
+ vStringPut (string, c);
+ c = fileGetc ();
+ } while (isIdentChar (c));
+ vStringTerminate (string);
+ if (!isspace (c))
+ fileUngetc (c); /* unget non-identifier character */
+}
+
+static void readToken (tokenInfo *const token)
+{
+ int c;
+
+ token->type = TOKEN_UNDEFINED;
+ token->keyword = KEYWORD_NONE;
+ vStringClear (token->string);
+
+getNextChar:
+ do
+ {
+ c = fileGetc ();
+ token->lineNumber = getSourceLineNumber ();
+ token->filePosition = getInputFilePosition ();
+ /*
+ * Added " to the list of ignores, not sure what this
+ * might break but it gets by this issue:
+ * create table "t1" (...)
+ *
+ * Darren, the code passes all my tests for both
+ * Oracle and SQL Anywhere, but maybe you can tell me
+ * what this may effect.
+ */
+ }
+ while (c == '\t' || c == ' ' || c == '\n');
+
+ switch (c)
+ {
+ case EOF: longjmp (Exception, (int)ExceptionEOF); break;
+ case '(': token->type = TOKEN_OPEN_PAREN; break;
+ case ')': token->type = TOKEN_CLOSE_PAREN; break;
+ case ';': token->type = TOKEN_SEMICOLON; break;
+ case '.': token->type = TOKEN_PERIOD; break;
+ case ',': token->type = TOKEN_COMMA; break;
+ case '{': token->type = TOKEN_OPEN_CURLY; break;
+ case '}': token->type = TOKEN_CLOSE_CURLY; break;
+ case '~': token->type = TOKEN_TILDE; break;
+ case '[': token->type = TOKEN_OPEN_SQUARE; break;
+ case ']': token->type = TOKEN_CLOSE_SQUARE; break;
+
+ case '\'':
+ case '"':
+ token->type = TOKEN_STRING;
+ parseString (token->string, c);
+ token->lineNumber = getSourceLineNumber ();
+ token->filePosition = getInputFilePosition ();
+ break;
+
+ case '-':
+ c = fileGetc ();
+ if (c == '-') /* -- is this the start of a comment? */
+ {
+ fileSkipToCharacter ('\n');
+ goto getNextChar;
+ }
+ else
+ {
+ if (!isspace (c))
+ fileUngetc (c);
+ token->type = TOKEN_OPERATOR;
+ }
+ break;
+
+ case '<':
+ case '>':
+ {
+ const int initial = c;
+ int d = fileGetc ();
+ if (d == initial)
+ {
+ if (initial == '<')
+ token->type = TOKEN_BLOCK_LABEL_BEGIN;
+ else
+ token->type = TOKEN_BLOCK_LABEL_END;
+ }
+ else
+ {
+ fileUngetc (d);
+ token->type = TOKEN_UNDEFINED;
+ }
+ break;
+ }
+
+ case '\\':
+ c = fileGetc ();
+ if (c != '\\' && c != '"' && c != '\'' && !isspace (c))
+ fileUngetc (c);
+ token->type = TOKEN_CHARACTER;
+ token->lineNumber = getSourceLineNumber ();
+ token->filePosition = getInputFilePosition ();
+ break;
+
+ case '/':
+ {
+ int d = fileGetc ();
+ if ( (d != '*') && /* is this the start of a comment? */
+ (d != '/') ) /* is a one line comment? */
+ {
+ token->type = TOKEN_FORWARD_SLASH;
+ fileUngetc (d);
+ }
+ else
+ {
+ if (d == '*')
+ {
+ do
+ {
+ fileSkipToCharacter ('*');
+ c = fileGetc ();
+ if (c == '/')
+ break;
+ else
+ fileUngetc (c);
+ } while (c != EOF && c != '\0');
+ goto getNextChar;
+ }
+ else if (d == '/') /* is this the start of a comment? */
+ {
+ fileSkipToCharacter ('\n');
+ goto getNextChar;
+ }
+ }
+ break;
+ }
+
+ default:
+ if (! isIdentChar1 (c))
+ token->type = TOKEN_UNDEFINED;
+ else
+ {
+ parseIdentifier (token->string, c);
+ token->lineNumber = getSourceLineNumber ();
+ token->filePosition = getInputFilePosition ();
+ token->keyword = analyzeToken (token->string, Lang_sql);
+ if (isKeyword (token, KEYWORD_rem))
+ {
+ vStringClear (token->string);
+ fileSkipToCharacter ('\n');
+ goto getNextChar;
+ }
+ else if (isKeyword (token, KEYWORD_NONE))
+ token->type = TOKEN_IDENTIFIER;
+ else
+ token->type = TOKEN_KEYWORD;
+ }
+ break;
+ }
+}
+
+/*
+ * Token parsing functions
+ */
+
+/*
+ * static void addContext (tokenInfo* const parent, const tokenInfo* const child)
+ * {
+ * if (vStringLength (parent->string) > 0)
+ * {
+ * vStringCatS (parent->string, ".");
+ * }
+ * vStringCatS (parent->string, vStringValue(child->string));
+ * vStringTerminate(parent->string);
+ * }
+ */
+
+static void addToScope (tokenInfo* const token, vString* const extra)
+{
+ if (vStringLength (token->scope) > 0)
+ {
+ vStringCatS (token->scope, ".");
+ }
+ vStringCatS (token->scope, vStringValue(extra));
+ vStringTerminate(token->scope);
+}
+
+/*
+ * Scanning functions
+ */
+
+static void findToken (tokenInfo *const token, const tokenType type)
+{
+ while (! isType (token, type))
+ {
+ readToken (token);
+ }
+}
+
+static void findCmdTerm (tokenInfo *const token, const boolean check_first)
+{
+ int begin_end_nest_lvl = token->begin_end_nest_lvl;
+
+ if ( check_first )
+ {
+ if ( isCmdTerm(token) )
+ return;
+ }
+ do
+ {
+ readToken (token);
+ } while ( !isCmdTerm(token) && !isMatchedEnd(token, begin_end_nest_lvl) );
+}
+
+static void skipToMatched(tokenInfo *const token)
+{
+ int nest_level = 0;
+ tokenType open_token;
+ tokenType close_token;
+
+ switch (token->type)
+ {
+ case TOKEN_OPEN_PAREN:
+ open_token = TOKEN_OPEN_PAREN;
+ close_token = TOKEN_CLOSE_PAREN;
+ break;
+ case TOKEN_OPEN_CURLY:
+ open_token = TOKEN_OPEN_CURLY;
+ close_token = TOKEN_CLOSE_CURLY;
+ break;
+ case TOKEN_OPEN_SQUARE:
+ open_token = TOKEN_OPEN_SQUARE;
+ close_token = TOKEN_CLOSE_SQUARE;
+ break;
+ default:
+ return;
+ }
+
+ /*
+ * This routine will skip to a matching closing token.
+ * It will also handle nested tokens like the (, ) below.
+ * ( name varchar(30), text binary(10) )
+ */
+
+ if (isType (token, open_token))
+ {
+ nest_level++;
+ while (! (isType (token, close_token) && (nest_level == 0)))
+ {
+ readToken (token);
+ if (isType (token, open_token))
+ {
+ nest_level++;
+ }
+ if (isType (token, close_token))
+ {
+ if (nest_level > 0)
+ {
+ nest_level--;
+ }
+ }
+ }
+ readToken (token);
+ }
+}
+
+static void skipArgumentList (tokenInfo *const token)
+{
+ /*
+ * Other databases can have arguments with fully declared
+ * datatypes:
+ * ( name varchar(30), text binary(10) )
+ * So we must check for nested open and closing parantheses
+ */
+
+ if (isType (token, TOKEN_OPEN_PAREN)) /* arguments? */
+ {
+ skipToMatched (token);
+ }
+}
+
+static void parseSubProgram (tokenInfo *const token)
+{
+ tokenInfo *const name = newToken ();
+
+ /*
+ * This must handle both prototypes and the body of
+ * the procedures.
+ *
+ * Prototype:
+ * FUNCTION func_name RETURN integer;
+ * PROCEDURE proc_name( parameters );
+ * Procedure
+ * FUNCTION GET_ML_USERNAME RETURN VARCHAR2
+ * IS
+ * BEGIN
+ * RETURN v_sync_user_id;
+ * END GET_ML_USERNAME;
+ *
+ * PROCEDURE proc_name( parameters )
+ * IS
+ * BEGIN
+ * END;
+ * CREATE PROCEDURE proc_name( parameters )
+ * EXTERNAL NAME ... ;
+ * CREATE PROCEDURE proc_name( parameters )
+ * BEGIN
+ * END;
+ *
+ * CREATE FUNCTION f_GetClassName(
+ * IN @object VARCHAR(128)
+ * ,IN @code VARCHAR(128)
+ * )
+ * RETURNS VARCHAR(200)
+ * DETERMINISTIC
+ * BEGIN
+ *
+ * IF( @object = 'user_state' ) THEN
+ * SET something = something;
+ * END IF;
+ *
+ * RETURN @name;
+ * END;
+ */
+ const sqlKind kind = isKeyword (token, KEYWORD_function) ?
+ SQLTAG_FUNCTION : SQLTAG_PROCEDURE;
+ Assert (isKeyword (token, KEYWORD_function) ||
+ isKeyword (token, KEYWORD_procedure));
+ readToken (name);
+ readToken (token);
+ if (isType (token, TOKEN_PERIOD))
+ {
+ readToken (name);
+ readToken (token);
+ }
+ if (isType (token, TOKEN_OPEN_PAREN))
+ {
+ /* Reads to the next token after the TOKEN_CLOSE_PAREN */
+ skipArgumentList(token);
+ }
+
+ if (kind == SQLTAG_FUNCTION)
+ {
+ if (isKeyword (token, KEYWORD_return) || isKeyword (token, KEYWORD_returns))
+ {
+ /* Read datatype */
+ readToken (token);
+ /*
+ * Read token after which could be the
+ * command terminator if a prototype
+ * or an open parantheses
+ */
+ readToken (token);
+ if (isType (token, TOKEN_OPEN_PAREN))
+ {
+ /* Reads to the next token after the TOKEN_CLOSE_PAREN */
+ skipArgumentList(token);
+ }
+ }
+ }
+ if( isCmdTerm (token) )
+ {
+ makeSqlTag (name, SQLTAG_PROTOTYPE);
+ }
+ else
+ {
+ while (!(isKeyword (token, KEYWORD_is) ||
+ isKeyword (token, KEYWORD_begin) ||
+ isKeyword (token, KEYWORD_at) ||
+ isKeyword (token, KEYWORD_internal) ||
+ isKeyword (token, KEYWORD_external) ||
+ isKeyword (token, KEYWORD_url) ||
+ isCmdTerm (token)
+ )
+ )
+ {
+ if ( isKeyword (token, KEYWORD_result) )
+ {
+ readToken (token);
+ if (isType (token, TOKEN_OPEN_PAREN))
+ {
+ /* Reads to the next token after the TOKEN_CLOSE_PAREN */
+ skipArgumentList(token);
+ }
+ } else {
+ readToken (token);
+ }
+ }
+ if (isKeyword (token, KEYWORD_at) ||
+ isKeyword (token, KEYWORD_url) ||
+ isKeyword (token, KEYWORD_internal) ||
+ isKeyword (token, KEYWORD_external) )
+ {
+ addToScope(token, name->string);
+ if (isType (name, TOKEN_IDENTIFIER) ||
+ isType (name, TOKEN_STRING) ||
+ !isKeyword (token, KEYWORD_NONE)
+ )
+ makeSqlTag (name, kind);
+
+ vStringClear (token->scope);
+ }
+ if (isKeyword (token, KEYWORD_is) ||
+ isKeyword (token, KEYWORD_begin) )
+ {
+ addToScope(token, name->string);
+ if (isType (name, TOKEN_IDENTIFIER) ||
+ isType (name, TOKEN_STRING) ||
+ !isKeyword (token, KEYWORD_NONE)
+ )
+ makeSqlTag (name, kind);
+
+ parseBlock (token, TRUE);
+ vStringClear (token->scope);
+ }
+ }
+ deleteToken (name);
+}
+
+static void parseRecord (tokenInfo *const token)
+{
+ /*
+ * Make it a bit forgiving, this is called from
+ * multiple functions, parseTable, parseType
+ */
+ if (!isType (token, TOKEN_OPEN_PAREN))
+ readToken (token);
+
+ Assert (isType (token, TOKEN_OPEN_PAREN));
+ do
+ {
+ if ( isType (token, TOKEN_COMMA) || isType (token, TOKEN_OPEN_PAREN) )
+ readToken (token);
+
+ /*
+ * Create table statements can end with various constraints
+ * which must be excluded from the SQLTAG_FIELD.
+ * create table t1 (
+ * c1 integer,
+ * c2 char(30),
+ * c3 numeric(10,5),
+ * c4 integer,
+ * constraint whatever,
+ * primary key(c1),
+ * foreign key (),
+ * check ()
+ * )
+ */
+ if (! (isKeyword(token, KEYWORD_primary) ||
+ isKeyword(token, KEYWORD_references) ||
+ isKeyword(token, KEYWORD_unique) ||
+ isKeyword(token, KEYWORD_check) ||
+ isKeyword(token, KEYWORD_constraint) ||
+ isKeyword(token, KEYWORD_foreign) ) )
+ {
+ if (isType (token, TOKEN_IDENTIFIER) ||
+ isType (token, TOKEN_STRING))
+ makeSqlTag (token, SQLTAG_FIELD);
+ }
+
+ while (!(isType (token, TOKEN_COMMA) ||
+ isType (token, TOKEN_CLOSE_PAREN) ||
+ isType (token, TOKEN_OPEN_PAREN)
+ ))
+ {
+ readToken (token);
+ /*
+ * A table structure can look like this:
+ * create table t1 (
+ * c1 integer,
+ * c2 char(30),
+ * c3 numeric(10,5),
+ * c4 integer
+ * )
+ * We can't just look for a COMMA or CLOSE_PAREN
+ * since that will not deal with the numeric(10,5)
+ * case. So we need to skip the argument list
+ * when we find an open paren.
+ */
+ if (isType (token, TOKEN_OPEN_PAREN))
+ {
+ /* Reads to the next token after the TOKEN_CLOSE_PAREN */
+ skipArgumentList(token);
+ }
+ }
+ } while (! isType (token, TOKEN_CLOSE_PAREN));
+}
+
+static void parseType (tokenInfo *const token)
+{
+ tokenInfo *const name = newToken ();
+ vString * saveScope = vStringNew ();
+
+ vStringCopy(saveScope, token->scope);
+ /* If a scope has been set, add it to the name */
+ addToScope (name, token->scope);
+ readToken (name);
+ if (isType (name, TOKEN_IDENTIFIER))
+ {
+ readToken (token);
+ if (isKeyword (token, KEYWORD_is))
+ {
+ readToken (token);
+ addToScope (token, name->string);
+ switch (token->keyword)
+ {
+ case KEYWORD_record:
+ case KEYWORD_object:
+ makeSqlTag (name, SQLTAG_RECORD);
+ parseRecord (token);
+ break;
+
+ case KEYWORD_table:
+ makeSqlTag (name, SQLTAG_TABLE);
+ break;
+
+ case KEYWORD_ref:
+ readToken (token);
+ if (isKeyword (token, KEYWORD_cursor))
+ makeSqlTag (name, SQLTAG_CURSOR);
+ break;
+
+ default: break;
+ }
+ vStringClear (token->scope);
+ }
+ }
+ vStringCopy(token->scope, saveScope);
+ deleteToken (name);
+ vStringDelete(saveScope);
+}
+
+static void parseSimple (tokenInfo *const token, const sqlKind kind)
+{
+ /* This will simply make the tagname from the first word found */
+ readToken (token);
+ if (isType (token, TOKEN_IDENTIFIER) ||
+ isType (token, TOKEN_STRING))
+ makeSqlTag (token, kind);
+}
+
+static void parseDeclare (tokenInfo *const token, const boolean local)
+{
+ /*
+ * PL/SQL declares are of this format:
+ * IS|AS
+ * [declare]
+ * CURSOR curname ...
+ * varname1 datatype;
+ * varname2 datatype;
+ * varname3 datatype;
+ * begin
+ */
+
+ if (isKeyword (token, KEYWORD_declare))
+ readToken (token);
+ while (! isKeyword (token, KEYWORD_begin) && ! isKeyword (token, KEYWORD_end))
+ {
+ switch (token->keyword)
+ {
+ case KEYWORD_cursor: parseSimple (token, SQLTAG_CURSOR); break;
+ case KEYWORD_function: parseSubProgram (token); break;
+ case KEYWORD_procedure: parseSubProgram (token); break;
+ case KEYWORD_subtype: parseSimple (token, SQLTAG_SUBTYPE); break;
+ case KEYWORD_trigger: parseSimple (token, SQLTAG_TRIGGER); break;
+ case KEYWORD_type: parseType (token); break;
+
+ default:
+ if (isType (token, TOKEN_IDENTIFIER))
+ {
+ if (local)
+ {
+ makeSqlTag (token, SQLTAG_LOCAL_VARIABLE);
+ }
+ else
+ {
+ makeSqlTag (token, SQLTAG_VARIABLE);
+ }
+ }
+ break;
+ }
+ findToken (token, TOKEN_SEMICOLON);
+ readToken (token);
+ }
+}
+
+static void parseDeclareANSI (tokenInfo *const token, const boolean local)
+{
+ tokenInfo *const type = newToken ();
+ /*
+ * ANSI declares are of this format:
+ * BEGIN
+ * DECLARE varname1 datatype;
+ * DECLARE varname2 datatype;
+ * ...
+ *
+ * This differ from PL/SQL where DECLARE preceeds the BEGIN block
+ * and the DECLARE keyword is not repeated.
+ */
+ while (isKeyword (token, KEYWORD_declare))
+ {
+ readToken (token);
+ readToken (type);
+
+ if (isKeyword (type, KEYWORD_cursor))
+ makeSqlTag (token, SQLTAG_CURSOR);
+ else if (isKeyword (token, KEYWORD_local) &&
+ isKeyword (type, KEYWORD_temporary))
+ {
+ /*
+ * DECLARE LOCAL TEMPORARY TABLE table_name (
+ * c1 int,
+ * c2 int
+ * );
+ */
+ readToken (token);
+ if (isKeyword (token, KEYWORD_table))
+ {
+ readToken (token);
+ if (isType(token, TOKEN_IDENTIFIER) ||
+ isType(token, TOKEN_STRING) )
+ {
+ makeSqlTag (token, SQLTAG_TABLE);
+ }
+ }
+ }
+ else if (isType (token, TOKEN_IDENTIFIER) ||
+ isType (token, TOKEN_STRING))
+ {
+ if (local)
+ makeSqlTag (token, SQLTAG_LOCAL_VARIABLE);
+ else
+ makeSqlTag (token, SQLTAG_VARIABLE);
+ }
+ findToken (token, TOKEN_SEMICOLON);
+ readToken (token);
+ }
+ deleteToken (type);
+}
+
+static void parseLabel (tokenInfo *const token)
+{
+ /*
+ * A label has this format:
+ * <<tobacco_dependency>>
+ * DECLARE
+ * v_senator VARCHAR2(100) := 'THURMOND, JESSE';
+ * BEGIN
+ * IF total_contributions (v_senator, 'TOBACCO') > 25000
+ * THEN
+ * <<alochol_dependency>>
+ * DECLARE
+ * v_senator VARCHAR2(100) := 'WHATEVERIT, TAKES';
+ * BEGIN
+ * ...
+ */
+
+ Assert (isType (token, TOKEN_BLOCK_LABEL_BEGIN));
+ readToken (token);
+ if (isType (token, TOKEN_IDENTIFIER))
+ {
+ makeSqlTag (token, SQLTAG_BLOCK_LABEL);
+ readToken (token); /* read end of label */
+ }
+}
+
+static void parseStatements (tokenInfo *const token)
+{
+ boolean isAnsi = TRUE;
+ boolean stmtTerm = FALSE;
+ do
+ {
+ if (isType (token, TOKEN_BLOCK_LABEL_BEGIN))
+ parseLabel (token);
+ else
+ {
+ switch (token->keyword)
+ {
+ case KEYWORD_exception:
+ /*
+ * EXCEPTION
+ * <exception handler>;
+ *
+ * Where an exception handler could be:
+ * BEGIN
+ * WHEN OTHERS THEN
+ * x := x + 3;
+ * END;
+ * In this case we need to skip this keyword and
+ * move on to the next token without reading until
+ * TOKEN_SEMICOLON;
+ */
+ readToken (token);
+ continue;
+
+ case KEYWORD_when:
+ /*
+ * WHEN statements can be used in exception clauses
+ * and CASE statements. The CASE statement should skip
+ * these given below we skip over to an END statement.
+ * But for an exception clause, we can have:
+ * EXCEPTION
+ * WHEN OTHERS THEN
+ * BEGIN
+ * x := x + 3;
+ * END;
+ * If we skip to the TOKEN_SEMICOLON, we miss the begin
+ * of a nested BEGIN END block. So read the next token
+ * after the THEN and restart the LOOP.
+ */
+ while (! isKeyword (token, KEYWORD_then))
+ readToken (token);
+ readToken (token);
+ continue;
+
+ case KEYWORD_if:
+ /*
+ * We do not want to look for a ; since for an empty
+ * IF block, it would skip over the END.
+ * IF...THEN
+ * END IF;
+ *
+ * or non-ANSI
+ * IF ...
+ * BEGIN
+ * END
+ */
+ while ( ! isKeyword (token, KEYWORD_then) &&
+ ! isKeyword (token, KEYWORD_begin) )
+ {
+ readToken (token);
+ }
+
+ if( isKeyword (token, KEYWORD_begin ) )
+ {
+ isAnsi = FALSE;
+ parseBlock(token, FALSE);
+
+ /*
+ * Handle the non-Ansi IF blocks.
+ * parseBlock consumes the END, so if the next
+ * token in a command terminator (like GO)
+ * we know we are done with this statement.
+ */
+ if ( isCmdTerm (token) )
+ stmtTerm = TRUE;
+ }
+ else
+ {
+ readToken (token);
+ parseStatements (token);
+ /*
+ * parseStatements returns when it finds an END, an IF
+ * should follow the END for ANSI anyway.
+ * IF...THEN
+ * END IF;
+ */
+ if( isKeyword (token, KEYWORD_end ) )
+ readToken (token);
+
+ if( ! isKeyword (token, KEYWORD_if ) )
+ {
+ /*
+ * Well we need to do something here.
+ * There are lots of different END statements
+ * END;
+ * END CASE;
+ * ENDIF;
+ * ENDCASE;
+ */
+ }
+ }
+ break;
+
+ case KEYWORD_loop:
+ case KEYWORD_case:
+ case KEYWORD_for:
+ /*
+ * LOOP...
+ * END LOOP;
+ *
+ * CASE
+ * WHEN '1' THEN
+ * END CASE;
+ *
+ * FOR loop_name AS cursor_name CURSOR FOR ...
+ * END FOR;
+ */
+ readToken (token);
+ parseStatements (token);
+
+ if( isKeyword (token, KEYWORD_end ) )
+ readToken (token);
+
+ break;
+
+ case KEYWORD_create:
+ readToken (token);
+ parseKeywords(token);
+ break;
+
+ case KEYWORD_declare:
+ case KEYWORD_begin:
+ parseBlock (token, TRUE);
+ break;
+
+ case KEYWORD_end:
+ break;
+
+ default:
+ readToken (token);
+ break;
+ }
+ /*
+ * Not all statements must end in a semi-colon
+ * begin
+ * if current publisher <> 'publish' then
+ * signal UE_FailStatement
+ * end if
+ * end;
+ * The last statement prior to an end ("signal" above) does
+ * not need a semi-colon, nor does the end if, since it is
+ * also the last statement prior to the end of the block.
+ *
+ * So we must read to the first semi-colon or an END block
+ */
+ while ( ! stmtTerm &&
+ ! ( isKeyword (token, KEYWORD_end) ||
+ (isCmdTerm(token)) )
+ )
+ {
+ readToken (token);
+
+ if (isType (token, TOKEN_OPEN_PAREN) ||
+ isType (token, TOKEN_OPEN_CURLY) ||
+ isType (token, TOKEN_OPEN_SQUARE) )
+ skipToMatched (token);
+
+ }
+ }
+ /*
+ * We assumed earlier all statements ended with a command terminator.
+ * See comment above, now, only read if the current token
+ * is not a command terminator.
+ */
+ if ( isCmdTerm(token) )
+ {
+ readToken (token);
+ }
+ } while (! isKeyword (token, KEYWORD_end) && ! stmtTerm );
+}
+
+static void parseBlock (tokenInfo *const token, const boolean local)
+{
+ if (isType (token, TOKEN_BLOCK_LABEL_BEGIN))
+ {
+ parseLabel (token);
+ readToken (token);
+ }
+ if (! isKeyword (token, KEYWORD_begin))
+ {
+ readToken (token);
+ /*
+ * These are Oracle style declares which generally come
+ * between an IS/AS and BEGIN block.
+ */
+ parseDeclare (token, local);
+ }
+ if (isKeyword (token, KEYWORD_begin))
+ {
+ readToken (token);
+ /*
+ * Check for ANSI declarations which always follow
+ * a BEGIN statement. This routine will not advance
+ * the token if none are found.
+ */
+ parseDeclareANSI (token, local);
+ token->begin_end_nest_lvl++;
+ while (! isKeyword (token, KEYWORD_end))
+ {
+ parseStatements (token);
+ }
+ token->begin_end_nest_lvl--;
+
+ /*
+ * Read the next token (we will assume
+ * it is the command delimiter)
+ */
+ readToken (token);
+
+ /*
+ * Check if the END block is terminated
+ */
+ if ( !isCmdTerm (token) )
+ {
+ /*
+ * Not sure what to do here at the moment.
+ * I think the routine that calls parseBlock
+ * must expect the next token has already
+ * been read since it is possible this
+ * token is not a command delimiter.
+ */
+ /* findCmdTerm (token, FALSE); */
+ }
+ }
+}
+
+static void parsePackage (tokenInfo *const token)
+{
+ /*
+ * Packages can be specified in a number of ways:
+ * CREATE OR REPLACE PACKAGE pkg_name AS
+ * or
+ * CREATE OR REPLACE PACKAGE owner.pkg_name AS
+ * or by specifying a package body
+ * CREATE OR REPLACE PACKAGE BODY pkg_name AS
+ * CREATE OR REPLACE PACKAGE BODY owner.pkg_name AS
+ */
+ tokenInfo *const name = newToken ();
+ readToken (name);
+ if (isKeyword (name, KEYWORD_body))
+ {
+ /*
+ * Ignore the BODY tag since we will process
+ * the body or prototypes in the same manner
+ */
+ readToken (name);
+ }
+ /* Check for owner.pkg_name */
+ while (! isKeyword (token, KEYWORD_is))
+ {
+ readToken (token);
+ if ( isType(token, TOKEN_PERIOD) )
+ {
+ readToken (name);
+ }
+ }
+ if (isKeyword (token, KEYWORD_is))
+ {
+ if (isType (name, TOKEN_IDENTIFIER) ||
+ isType (name, TOKEN_STRING))
+ makeSqlTag (name, SQLTAG_PACKAGE);
+ parseBlock (token, FALSE);
+ }
+ findCmdTerm (token, FALSE);
+ deleteToken (name);
+}
+
+static void parseTable (tokenInfo *const token)
+{
+ tokenInfo *const name = newToken ();
+
+ /*
+ * This deals with these formats:
+ * create table t1 (c1 int);
+ * create global tempoary table t2 (c1 int);
+ * create table "t3" (c1 int);
+ * create table bob.t4 (c1 int);
+ * create table bob."t5" (c1 int);
+ * create table "bob"."t6" (c1 int);
+ * create table bob."t7" (c1 int);
+ * Proxy tables use this format:
+ * create existing table bob."t7" AT '...';
+ * SQL Server and Sybase formats
+ * create table OnlyTable (
+ * create table dbo.HasOwner (
+ * create table [dbo].[HasOwnerSquare] (
+ * create table master.dbo.HasDb (
+ * create table master..HasDbNoOwner (
+ * create table [master].dbo.[HasDbAndOwnerSquare] (
+ * create table [master]..[HasDbNoOwnerSquare] (
+ */
+
+ /* This could be a database, owner or table name */
+ readToken (name);
+ if (isType (name, TOKEN_OPEN_SQUARE))
+ {
+ readToken (name);
+ /* Read close square */
+ readToken (token);
+ }
+ readToken (token);
+ if (isType (token, TOKEN_PERIOD))
+ {
+ /*
+ * This could be a owner or table name.
+ * But this is also a special case since the table can be
+ * referenced with a blank owner:
+ * dbname..tablename
+ */
+ readToken (name);
+ if (isType (name, TOKEN_OPEN_SQUARE))
+ {
+ readToken (name);
+ /* Read close square */
+ readToken (token);
+ }
+ /* Check if a blank name was provided */
+ if (isType (name, TOKEN_PERIOD))
+ {
+ readToken (name);
+ if (isType (name, TOKEN_OPEN_SQUARE))
+ {
+ readToken (name);
+ /* Read close square */
+ readToken (token);
+ }
+ }
+ readToken (token);
+ if (isType (token, TOKEN_PERIOD))
+ {
+ /* This can only be the table name */
+ readToken (name);
+ if (isType (name, TOKEN_OPEN_SQUARE))
+ {
+ readToken (name);
+ /* Read close square */
+ readToken (token);
+ }
+ readToken (token);
+ }
+ }
+ if (isType (token, TOKEN_OPEN_PAREN))
+ {
+ if (isType (name, TOKEN_IDENTIFIER) ||
+ isType (name, TOKEN_STRING))
+ {
+ makeSqlTag (name, SQLTAG_TABLE);
+ vStringCopy(token->scope, name->string);
+ parseRecord (token);
+ vStringClear (token->scope);
+ }
+ }
+ else if (isKeyword (token, KEYWORD_at))
+ {
+ if (isType (name, TOKEN_IDENTIFIER))
+ {
+ makeSqlTag (name, SQLTAG_TABLE);
+ }
+ }
+ findCmdTerm (token, FALSE);
+ deleteToken (name);
+}
+
+static void parseIndex (tokenInfo *const token)
+{
+ tokenInfo *const name = newToken ();
+ tokenInfo *const owner = newToken ();
+
+ /*
+ * This deals with these formats
+ * create index i1 on t1(c1) create index "i2" on t1(c1)
+ * create virtual unique clustered index "i3" on t1(c1)
+ * create unique clustered index "i4" on t1(c1)
+ * create clustered index "i5" on t1(c1)
+ * create bitmap index "i6" on t1(c1)
+ */
+
+ readToken (name);
+ readToken (token);
+ if (isType (token, TOKEN_PERIOD))
+ {
+ readToken (name);
+ readToken (token);
+ }
+ if ( isKeyword (token, KEYWORD_on) &&
+ (isType (name, TOKEN_IDENTIFIER) || isType (name, TOKEN_STRING) ) )
+ {
+ readToken (owner);
+ readToken (token);
+ if (isType (token, TOKEN_PERIOD))
+ {
+ readToken (owner);
+ readToken (token);
+ }
+ addToScope(name, owner->string);
+ makeSqlTag (name, SQLTAG_INDEX);
+ }
+ findCmdTerm (token, FALSE);
+ deleteToken (name);
+ deleteToken (owner);
+}
+
+static void parseEvent (tokenInfo *const token)
+{
+ tokenInfo *const name = newToken ();
+
+ /*
+ * This deals with these formats
+ * create event e1 handler begin end;
+ * create event "e2" handler begin end;
+ * create event dba."e3" handler begin end;
+ * create event "dba"."e4" handler begin end;
+ */
+
+ readToken (name);
+ readToken (token);
+ if (isType (token, TOKEN_PERIOD))
+ {
+ readToken (name);
+ }
+ while (! (isKeyword (token, KEYWORD_handler) ||
+ (isType (token, TOKEN_SEMICOLON))) )
+ {
+ readToken (token);
+ }
+
+ if ( isKeyword (token, KEYWORD_handler) ||
+ isType (token, TOKEN_SEMICOLON) )
+ {
+ makeSqlTag (name, SQLTAG_EVENT);
+ }
+
+ if (isKeyword (token, KEYWORD_handler))
+ {
+ readToken (token);
+ if ( isKeyword (token, KEYWORD_begin) )
+ {
+ parseBlock (token, TRUE);
+ }
+ findCmdTerm (token, TRUE);
+ }
+ deleteToken (name);
+}
+
+static void parseTrigger (tokenInfo *const token)
+{
+ tokenInfo *const name = newToken ();
+ tokenInfo *const table = newToken ();
+
+ /*
+ * This deals with these formats
+ * create or replace trigger tr1 begin end;
+ * create trigger "tr2" begin end;
+ * drop trigger "droptr1";
+ * create trigger "tr3" CALL sp_something();
+ * create trigger "owner"."tr4" begin end;
+ * create trigger "tr5" not valid;
+ * create trigger "tr6" begin end;
+ */
+
+ readToken (name);
+ readToken (token);
+ if (isType (token, TOKEN_PERIOD))
+ {
+ readToken (name);
+ readToken (token);
+ }
+
+ while ( !isKeyword (token, KEYWORD_on) &&
+ !isCmdTerm (token) )
+ {
+ readToken (token);
+ }
+
+ /*if (! isType (token, TOKEN_SEMICOLON) ) */
+ if (! isCmdTerm (token) )
+ {
+ readToken (table);
+ readToken (token);
+ if (isType (token, TOKEN_PERIOD))
+ {
+ readToken (table);
+ readToken (token);
+ }
+
+ while (! (isKeyword (token, KEYWORD_begin) ||
+ (isKeyword (token, KEYWORD_call)) ||
+ ( isCmdTerm (token))) )
+ {
+ if ( isKeyword (token, KEYWORD_declare) )
+ {
+ addToScope(token, name->string);
+ parseDeclare(token, TRUE);
+ vStringClear(token->scope);
+ }
+ else
+ readToken (token);
+ }
+
+ if ( isKeyword (token, KEYWORD_begin) ||
+ isKeyword (token, KEYWORD_call) )
+ {
+ addToScope(name, table->string);
+ makeSqlTag (name, SQLTAG_TRIGGER);
+ addToScope(token, table->string);
+ if ( isKeyword (token, KEYWORD_begin) )
+ {
+ parseBlock (token, TRUE);
+ }
+ vStringClear(token->scope);
+ }
+ }
+
+ findCmdTerm (token, TRUE);
+ deleteToken (name);
+ deleteToken (table);
+}
+
+static void parsePublication (tokenInfo *const token)
+{
+ tokenInfo *const name = newToken ();
+
+ /*
+ * This deals with these formats
+ * create or replace publication pu1 ()
+ * create publication "pu2" ()
+ * create publication dba."pu3" ()
+ * create publication "dba"."pu4" ()
+ */
+
+ readToken (name);
+ readToken (token);
+ if (isType (token, TOKEN_PERIOD))
+ {
+ readToken (name);
+ readToken (token);
+ }
+ if (isType (token, TOKEN_OPEN_PAREN))
+ {
+ if (isType (name, TOKEN_IDENTIFIER) ||
+ isType (name, TOKEN_STRING))
+ {
+ makeSqlTag (name, SQLTAG_PUBLICATION);
+ }
+ }
+ findCmdTerm (token, FALSE);
+ deleteToken (name);
+}
+
+static void parseService (tokenInfo *const token)
+{
+ tokenInfo *const name = newToken ();
+
+ /*
+ * This deals with these formats
+ * CREATE SERVICE s1 TYPE 'HTML'
+ * AUTHORIZATION OFF USER DBA AS
+ * SELECT *
+ * FROM SYS.SYSTABLE;
+ * CREATE SERVICE "s2" TYPE 'HTML'
+ * AUTHORIZATION OFF USER DBA AS
+ * CALL sp_Something();
+ */
+
+ readToken (name);
+ readToken (token);
+ if (isKeyword (token, KEYWORD_type))
+ {
+ if (isType (name, TOKEN_IDENTIFIER) ||
+ isType (name, TOKEN_STRING))
+ {
+ makeSqlTag (name, SQLTAG_SERVICE);
+ }
+ }
+ findCmdTerm (token, FALSE);
+ deleteToken (name);
+}
+
+static void parseDomain (tokenInfo *const token)
+{
+ tokenInfo *const name = newToken ();
+
+ /*
+ * This deals with these formats
+ * CREATE DOMAIN|DATATYPE [AS] your_name ...;
+ */
+
+ readToken (name);
+ if (isKeyword (name, KEYWORD_is))
+ {
+ readToken (name);
+ }
+ readToken (token);
+ if (isType (name, TOKEN_IDENTIFIER) ||
+ isType (name, TOKEN_STRING))
+ {
+ makeSqlTag (name, SQLTAG_DOMAIN);
+ }
+ findCmdTerm (token, FALSE);
+ deleteToken (name);
+}
+
+static void parseDrop (tokenInfo *const token)
+{
+ /*
+ * This deals with these formats
+ * DROP TABLE|PROCEDURE|DOMAIN|DATATYPE name;
+ *
+ * Just simply skip over these statements.
+ * They are often confused with PROCEDURE prototypes
+ * since the syntax is similar, this effectively deals with
+ * the issue for all types.
+ */
+
+ findCmdTerm (token, FALSE);
+}
+
+static void parseVariable (tokenInfo *const token)
+{
+ tokenInfo *const name = newToken ();
+
+ /*
+ * This deals with these formats
+ * create variable varname1 integer;
+ * create variable @varname2 integer;
+ * create variable "varname3" integer;
+ * drop variable @varname3;
+ */
+
+ readToken (name);
+ readToken (token);
+ if ( (isType (name, TOKEN_IDENTIFIER) || isType (name, TOKEN_STRING))
+ && !isType (token, TOKEN_SEMICOLON) )
+ {
+ makeSqlTag (name, SQLTAG_VARIABLE);
+ }
+ findCmdTerm (token, TRUE);
+
+ deleteToken (name);
+}
+
+static void parseSynonym (tokenInfo *const token)
+{
+ tokenInfo *const name = newToken ();
+
+ /*
+ * This deals with these formats
+ * create variable varname1 integer;
+ * create variable @varname2 integer;
+ * create variable "varname3" integer;
+ * drop variable @varname3;
+ */
+
+ readToken (name);
+ readToken (token);
+ if ( (isType (name, TOKEN_IDENTIFIER) || isType (name, TOKEN_STRING))
+ && isKeyword (token, KEYWORD_for) )
+ {
+ makeSqlTag (name, SQLTAG_SYNONYM);
+ }
+ findCmdTerm (token, TRUE);
+
+ deleteToken (name);
+}
+
+static void parseView (tokenInfo *const token)
+{
+ tokenInfo *const name = newToken ();
+
+ /*
+ * This deals with these formats
+ * create variable varname1 integer;
+ * create variable @varname2 integer;
+ * create variable "varname3" integer;
+ * drop variable @varname3;
+ */
+
+ readToken (name);
+ readToken (token);
+ if (isType (token, TOKEN_PERIOD))
+ {
+ readToken (name);
+ readToken (token);
+ }
+ if ( isType (token, TOKEN_OPEN_PAREN) )
+ {
+ skipArgumentList(token);
+
+ }
+
+ while (!(isKeyword (token, KEYWORD_is) ||
+ isType (token, TOKEN_SEMICOLON)
+ ))
+ {
+ readToken (token);
+ }
+
+ if ( (isType (name, TOKEN_IDENTIFIER) || isType (name, TOKEN_STRING))
+ && isKeyword (token, KEYWORD_is) )
+ {
+ makeSqlTag (name, SQLTAG_VIEW);
+ }
+
+ findCmdTerm (token, TRUE);
+
+ deleteToken (name);
+}
+
+static void parseMLTable (tokenInfo *const token)
+{
+ tokenInfo *const version = newToken ();
+ tokenInfo *const table = newToken ();
+ tokenInfo *const event = newToken ();
+
+ /*
+ * This deals with these formats
+ * call dbo.ml_add_table_script( 'version', 'table_name', 'event',
+ * 'some SQL statement'
+ * );
+ */
+
+ readToken (token);
+ if ( isType (token, TOKEN_OPEN_PAREN) )
+ {
+ readToken (version);
+ readToken (token);
+ while (!(isType (token, TOKEN_COMMA) ||
+ isType (token, TOKEN_CLOSE_PAREN)
+ ))
+ {
+ readToken (token);
+ }
+
+ if (isType (token, TOKEN_COMMA))
+ {
+ readToken (table);
+ readToken (token);
+ while (!(isType (token, TOKEN_COMMA) ||
+ isType (token, TOKEN_CLOSE_PAREN)
+ ))
+ {
+ readToken (token);
+ }
+
+ if (isType (token, TOKEN_COMMA))
+ {
+ readToken (event);
+
+ if (isType (version, TOKEN_STRING) &&
+ isType (table, TOKEN_STRING) &&
+ isType (event, TOKEN_STRING) )
+ {
+ addToScope(version, table->string);
+ addToScope(version, event->string);
+ makeSqlTag (version, SQLTAG_MLTABLE);
+ }
+ }
+ if( !isType (token, TOKEN_CLOSE_PAREN) )
+ findToken (token, TOKEN_CLOSE_PAREN);
+ }
+ }
+
+ findCmdTerm (token, TRUE);
+
+ deleteToken (version);
+ deleteToken (table);
+ deleteToken (event);
+}
+
+static void parseMLConn (tokenInfo *const token)
+{
+ tokenInfo *const version = newToken ();
+ tokenInfo *const event = newToken ();
+
+ /*
+ * This deals with these formats
+ * call ml_add_connection_script( 'version', 'event',
+ * 'some SQL statement'
+ * );
+ */
+
+ readToken (token);
+ if ( isType (token, TOKEN_OPEN_PAREN) )
+ {
+ readToken (version);
+ readToken (token);
+ while (!(isType (token, TOKEN_COMMA) ||
+ isType (token, TOKEN_CLOSE_PAREN)
+ ))
+ {
+ readToken (token);
+ }
+
+ if (isType (token, TOKEN_COMMA))
+ {
+ readToken (event);
+
+ if (isType (version, TOKEN_STRING) &&
+ isType (event, TOKEN_STRING) )
+ {
+ addToScope(version, event->string);
+ makeSqlTag (version, SQLTAG_MLCONN);
+ }
+ }
+ if( !isType (token, TOKEN_CLOSE_PAREN) )
+ findToken (token, TOKEN_CLOSE_PAREN);
+
+ }
+
+ findCmdTerm (token, TRUE);
+
+ deleteToken (version);
+ deleteToken (event);
+}
+
+static void parseComment (tokenInfo *const token)
+{
+ /*
+ * This deals with this statement:
+ * COMMENT TO PRESERVE FORMAT ON PROCEDURE "DBA"."test" IS
+ * {create PROCEDURE DBA."test"()
+ * BEGIN
+ * signal dave;
+ * END
+ * }
+ * ;
+ * The comment can contain anything between the CURLY
+ * braces
+ * COMMENT ON USER "admin" IS
+ * 'Administration Group'
+ * ;
+ * Or it could be a simple string with no curly braces
+ */
+ while (! isKeyword (token, KEYWORD_is))
+ {
+ readToken (token);
+ }
+ readToken (token);
+ if ( isType(token, TOKEN_OPEN_CURLY) )
+ {
+ findToken (token, TOKEN_CLOSE_CURLY);
+ }
+
+ findCmdTerm (token, TRUE);
+}
+
+
+static void parseKeywords (tokenInfo *const token)
+{
+ switch (token->keyword)
+ {
+ case KEYWORD_begin: parseBlock (token, FALSE); break;
+ case KEYWORD_comment: parseComment (token); break;
+ case KEYWORD_cursor: parseSimple (token, SQLTAG_CURSOR); break;
+ case KEYWORD_datatype: parseDomain (token); break;
+ case KEYWORD_declare: parseBlock (token, FALSE); break;
+ case KEYWORD_domain: parseDomain (token); break;
+ case KEYWORD_drop: parseDrop (token); break;
+ case KEYWORD_event: parseEvent (token); break;
+ case KEYWORD_function: parseSubProgram (token); break;
+ case KEYWORD_if: parseStatements (token); break;
+ case KEYWORD_index: parseIndex (token); break;
+ case KEYWORD_ml_table: parseMLTable (token); break;
+ case KEYWORD_ml_table_lang: parseMLTable (token); break;
+ case KEYWORD_ml_table_dnet: parseMLTable (token); break;
+ case KEYWORD_ml_table_java: parseMLTable (token); break;
+ case KEYWORD_ml_table_chk: parseMLTable (token); break;
+ case KEYWORD_ml_conn: parseMLConn (token); break;
+ case KEYWORD_ml_conn_lang: parseMLConn (token); break;
+ case KEYWORD_ml_conn_dnet: parseMLConn (token); break;
+ case KEYWORD_ml_conn_java: parseMLConn (token); break;
+ case KEYWORD_ml_conn_chk: parseMLConn (token); break;
+ case KEYWORD_package: parsePackage (token); break;
+ case KEYWORD_procedure: parseSubProgram (token); break;
+ case KEYWORD_publication: parsePublication (token); break;
+ case KEYWORD_service: parseService (token); break;
+ case KEYWORD_subtype: parseSimple (token, SQLTAG_SUBTYPE); break;
+ case KEYWORD_synonym: parseSynonym (token); break;
+ case KEYWORD_table: parseTable (token); break;
+ case KEYWORD_trigger: parseTrigger (token); break;
+ case KEYWORD_type: parseType (token); break;
+ case KEYWORD_variable: parseVariable (token); break;
+ case KEYWORD_view: parseView (token); break;
+ default: break;
+ }
+}
+
+static void parseSqlFile (tokenInfo *const token)
+{
+ do
+ {
+ readToken (token);
+
+ if (isType (token, TOKEN_BLOCK_LABEL_BEGIN))
+ parseLabel (token);
+ else
+ parseKeywords (token);
+ } while (! isKeyword (token, KEYWORD_end));
+}
+
+static void initialize (const langType language)
+{
+ Assert (sizeof (SqlKinds) / sizeof (SqlKinds [0]) == SQLTAG_COUNT);
+ Lang_sql = language;
+ buildSqlKeywordHash ();
+}
+
+static void findSqlTags (void)
+{
+ tokenInfo *const token = newToken ();
+ exception_t exception = (exception_t) (setjmp (Exception));
+
+ while (exception == ExceptionNone)
+ parseSqlFile (token);
+
+ deleteToken (token);
+}
+
+extern parserDefinition* SqlParser (void)
+{
+ static const char *const extensions [] = { "sql", NULL };
+ parserDefinition* def = parserNew ("SQL");
+ def->kinds = SqlKinds;
+ def->kindCount = KIND_COUNT (SqlKinds);
+ def->extensions = extensions;
+ def->parser = findSqlTags;
+ def->initialize = initialize;
+ return def;
+}
+
+/* vi:set tabstop=4 shiftwidth=4 noexpandtab: */
diff --git a/strlist.c b/strlist.c
new file mode 100644
index 0000000..8797795
--- /dev/null
+++ b/strlist.c
@@ -0,0 +1,281 @@
+/*
+* $Id: strlist.c 443 2006-05-30 04:37:13Z darren $
+*
+* Copyright (c) 1999-2002, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* This module contains functions managing resizable string lists.
+*/
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include <string.h>
+#ifdef HAVE_FNMATCH_H
+# include <fnmatch.h>
+#endif
+
+#include "debug.h"
+#include "read.h"
+#include "routines.h"
+#include "strlist.h"
+
+/*
+* FUNCTION DEFINITIONS
+*/
+
+extern stringList *stringListNew (void)
+{
+ stringList* const result = xMalloc (1, stringList);
+ result->max = 0;
+ result->count = 0;
+ result->list = NULL;
+ return result;
+}
+
+extern void stringListAdd (stringList *const current, vString *string)
+{
+ enum { incrementalIncrease = 10 };
+ Assert (current != NULL);
+ if (current->list == NULL)
+ {
+ Assert (current->max == 0);
+ current->count = 0;
+ current->max = incrementalIncrease;
+ current->list = xMalloc (current->max, vString*);
+ }
+ else if (current->count == current->max)
+ {
+ current->max += incrementalIncrease;
+ current->list = xRealloc (current->list, current->max, vString*);
+ }
+ current->list [current->count++] = string;
+}
+
+extern void stringListRemoveLast (stringList *const current)
+{
+ Assert (current != NULL);
+ Assert (current->count > 0);
+ --current->count;
+ current->list [current->count] = NULL;
+}
+
+/* Combine list `from' into `current', deleting `from' */
+extern void stringListCombine (
+ stringList *const current, stringList *const from)
+{
+ unsigned int i;
+ Assert (current != NULL);
+ Assert (from != NULL);
+ for (i = 0 ; i < from->count ; ++i)
+ {
+ stringListAdd (current, from->list [i]);
+ from->list [i] = NULL;
+ }
+ stringListDelete (from);
+}
+
+extern stringList* stringListNewFromArgv (const char* const* const argv)
+{
+ stringList* const result = stringListNew ();
+ const char *const *p;
+ Assert (argv != NULL);
+ for (p = argv ; *p != NULL ; ++p)
+ stringListAdd (result, vStringNewInit (*p));
+ return result;
+}
+
+extern stringList* stringListNewFromFile (const char* const fileName)
+{
+ stringList* result = NULL;
+ FILE* const fp = fopen (fileName, "r");
+ if (fp != NULL)
+ {
+ result = stringListNew ();
+ while (! feof (fp))
+ {
+ vString* const str = vStringNew ();
+ readLine (str, fp);
+ vStringStripTrailing (str);
+ if (vStringLength (str) > 0)
+ stringListAdd (result, str);
+ else
+ vStringDelete (str);
+ }
+ }
+ return result;
+}
+
+extern unsigned int stringListCount (const stringList *const current)
+{
+ Assert (current != NULL);
+ return current->count;
+}
+
+extern vString* stringListItem (
+ const stringList *const current, const unsigned int indx)
+{
+ Assert (current != NULL);
+ return current->list [indx];
+}
+
+extern vString* stringListLast (const stringList *const current)
+{
+ Assert (current != NULL);
+ Assert (current->count > 0);
+ return current->list [current->count - 1];
+}
+
+extern void stringListClear (stringList *const current)
+{
+ unsigned int i;
+ Assert (current != NULL);
+ for (i = 0 ; i < current->count ; ++i)
+ {
+ vStringDelete (current->list [i]);
+ current->list [i] = NULL;
+ }
+ current->count = 0;
+}
+
+extern void stringListDelete (stringList *const current)
+{
+ if (current != NULL)
+ {
+ if (current->list != NULL)
+ {
+ stringListClear (current);
+ eFree (current->list);
+ current->list = NULL;
+ }
+ current->max = 0;
+ current->count = 0;
+ eFree (current);
+ }
+}
+
+static boolean compareString (
+ const char *const string, vString *const itm)
+{
+ return (boolean) (strcmp (string, vStringValue (itm)) == 0);
+}
+
+static boolean compareStringInsensitive (
+ const char *const string, vString *const itm)
+{
+ return (boolean) (strcasecmp (string, vStringValue (itm)) == 0);
+}
+
+static int stringListIndex (
+ const stringList *const current,
+ const char *const string,
+ boolean (*test)(const char *s, vString *const vs))
+{
+ int result = -1;
+ unsigned int i;
+ Assert (current != NULL);
+ Assert (string != NULL);
+ Assert (test != NULL);
+ for (i = 0 ; result == -1 && i < current->count ; ++i)
+ if ((*test)(string, current->list [i]))
+ result = i;
+ return result;
+}
+
+extern boolean stringListHas (
+ const stringList *const current, const char *const string)
+{
+ boolean result = FALSE;
+ Assert (current != NULL);
+ result = stringListIndex (current, string, compareString) != -1;
+ return result;
+}
+
+extern boolean stringListHasInsensitive (
+ const stringList *const current, const char *const string)
+{
+ boolean result = FALSE;
+ Assert (current != NULL);
+ Assert (string != NULL);
+ result = stringListIndex (current, string, compareStringInsensitive) != -1;
+ return result;
+}
+
+extern boolean stringListHasTest (
+ const stringList *const current, boolean (*test)(const char *s))
+{
+ boolean result = FALSE;
+ unsigned int i;
+ Assert (current != NULL);
+ for (i = 0 ; ! result && i < current->count ; ++i)
+ result = (*test)(vStringValue (current->list [i]));
+ return result;
+}
+
+extern boolean stringListRemoveExtension (
+ stringList* const current, const char* const extension)
+{
+ boolean result = FALSE;
+ int where;
+#ifdef CASE_INSENSITIVE_FILENAMES
+ where = stringListIndex (current, extension, compareStringInsensitive);
+#else
+ where = stringListIndex (current, extension, compareString);
+#endif
+ if (where != -1)
+ {
+ memmove (current->list + where, current->list + where + 1,
+ (current->count - where) * sizeof (*current->list));
+ current->list [current->count - 1] = NULL;
+ --current->count;
+ result = TRUE;
+ }
+ return result;
+}
+
+extern boolean stringListExtensionMatched (
+ const stringList* const current, const char* const extension)
+{
+#ifdef CASE_INSENSITIVE_FILENAMES
+ return stringListHasInsensitive (current, extension);
+#else
+ return stringListHas (current, extension);
+#endif
+}
+
+static boolean fileNameMatched (
+ const vString* const vpattern, const char* const fileName)
+{
+ const char* const pattern = vStringValue (vpattern);
+#if defined (HAVE_FNMATCH)
+ return (boolean) (fnmatch (pattern, fileName, 0) == 0);
+#elif defined (CASE_INSENSITIVE_FILENAMES)
+ return (boolean) (strcasecmp (pattern, fileName) == 0);
+#else
+ return (boolean) (strcmp (pattern, fileName) == 0);
+#endif
+}
+
+extern boolean stringListFileMatched (
+ const stringList* const current, const char* const fileName)
+{
+ boolean result = FALSE;
+ unsigned int i;
+ for (i = 0 ; ! result && i < stringListCount (current) ; ++i)
+ result = fileNameMatched (stringListItem (current, i), fileName);
+ return result;
+}
+
+extern void stringListPrint (const stringList *const current)
+{
+ unsigned int i;
+ Assert (current != NULL);
+ for (i = 0 ; i < current->count ; ++i)
+ printf ("%s%s", (i > 0) ? ", " : "", vStringValue (current->list [i]));
+}
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/strlist.h b/strlist.h
new file mode 100644
index 0000000..c0d2909
--- /dev/null
+++ b/strlist.h
@@ -0,0 +1,54 @@
+/*
+* $Id: strlist.h 443 2006-05-30 04:37:13Z darren $
+*
+* Copyright (c) 1999-2002, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* Defines external interface to resizable string lists.
+*/
+#ifndef _STRLIST_H
+#define _STRLIST_H
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include "vstring.h"
+
+/*
+* DATA DECLARATIONS
+*/
+typedef struct sStringList {
+ unsigned int max;
+ unsigned int count;
+ vString **list;
+} stringList;
+
+/*
+* FUNCTION PROTOTYPES
+*/
+extern stringList *stringListNew (void);
+extern void stringListAdd (stringList *const current, vString *string);
+extern void stringListRemoveLast (stringList *const current);
+extern void stringListCombine (stringList *const current, stringList *const from);
+extern stringList* stringListNewFromArgv (const char* const* const list);
+extern stringList* stringListNewFromFile (const char* const fileName);
+extern void stringListClear (stringList *const current);
+extern unsigned int stringListCount (const stringList *const current);
+extern vString* stringListItem (const stringList *const current, const unsigned int indx);
+extern vString* stringListLast (const stringList *const current);
+extern void stringListDelete (stringList *const current);
+extern boolean stringListHasInsensitive (const stringList *const current, const char *const string);
+extern boolean stringListHas (const stringList *const current, const char *const string);
+extern boolean stringListHasTest (const stringList *const current, boolean (*test)(const char *s));
+extern boolean stringListRemoveExtension (stringList* const current, const char* const extension);
+extern boolean stringListExtensionMatched (const stringList* const list, const char* const extension);
+extern boolean stringListFileMatched (const stringList* const list, const char* const str);
+extern void stringListPrint (const stringList *const current);
+
+#endif /* _STRLIST_H */
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/tcl.c b/tcl.c
new file mode 100644
index 0000000..b3a3a5b
--- /dev/null
+++ b/tcl.c
@@ -0,0 +1,116 @@
+/*
+* $Id: tcl.c 443 2006-05-30 04:37:13Z darren $
+*
+* Copyright (c) 2000-2003, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* This module contains functions for generating tags for TCL scripts.
+*/
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include <string.h>
+
+#include "parse.h"
+#include "read.h"
+#include "vstring.h"
+
+/*
+* DATA DEFINITIONS
+*/
+typedef enum {
+ K_CLASS, K_METHOD, K_PROCEDURE
+} tclKind;
+
+static kindOption TclKinds [] = {
+ { TRUE, 'c', "class", "classes" },
+ { TRUE, 'm', "method", "methods" },
+ { TRUE, 'p', "procedure", "procedures" }
+};
+
+/*
+* FUNCTION DEFINITIONS
+*/
+
+static const unsigned char *makeTclTag (
+ const unsigned char *cp,
+ vString *const name,
+ const tclKind kind)
+{
+ vStringClear (name);
+ while ((int) *cp != '\0' && ! isspace ((int) *cp))
+ {
+ vStringPut (name, (int) *cp);
+ ++cp;
+ }
+ vStringTerminate (name);
+ makeSimpleTag (name, TclKinds, kind);
+ return cp;
+}
+
+static boolean match (const unsigned char *line, const char *word)
+{
+ return (boolean) (strncmp ((const char*) line, word, strlen (word)) == 0);
+}
+
+static void findTclTags (void)
+{
+ vString *name = vStringNew ();
+ const unsigned char *line;
+
+ while ((line = fileReadLine ()) != NULL)
+ {
+ const unsigned char *cp;
+
+ while (isspace (line [0]))
+ ++line;
+
+ if (line [0] == '\0' || line [0] == '#')
+ continue;
+
+ /* read first word */
+ for (cp = line ; *cp != '\0' && ! isspace ((int) *cp) ; ++cp)
+ ;
+ if (! isspace ((int) *cp))
+ continue;
+ while (isspace ((int) *cp))
+ ++cp;
+ /* Now `line' points at first word and `cp' points at next word */
+
+ if (match (line, "proc"))
+ cp = makeTclTag (cp, name, K_PROCEDURE);
+ else if (match (line, "class") || match (line, "itcl::class"))
+ cp = makeTclTag (cp, name, K_CLASS);
+ else if (match (line, "public") ||
+ match (line, "protected") ||
+ match (line, "private"))
+ {
+ if (match (cp, "method"))
+ {
+ cp += 6;
+ while (isspace ((int) *cp))
+ ++cp;
+ cp = makeTclTag (cp, name, K_METHOD);
+ }
+ }
+ }
+ vStringDelete (name);
+}
+
+extern parserDefinition* TclParser (void)
+{
+ static const char *const extensions [] = { "tcl", "tk", "wish", "itcl", NULL };
+ parserDefinition* def = parserNew ("Tcl");
+ def->kinds = TclKinds;
+ def->kindCount = KIND_COUNT (TclKinds);
+ def->extensions = extensions;
+ def->parser = findTclTags;
+ return def;
+}
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/tex.c b/tex.c
new file mode 100644
index 0000000..a285797
--- /dev/null
+++ b/tex.c
@@ -0,0 +1,524 @@
+/*
+ * $Id: tex.c 666 2008-05-15 17:47:31Z dfishburn $
+ *
+ * Copyright (c) 2008, David Fishburn
+ *
+ * This source code is released for free distribution under the terms of the
+ * GNU General Public License.
+ *
+ * This module contains functions for generating tags for TeX language files.
+ *
+ * Tex language reference:
+ * http://en.wikibooks.org/wiki/TeX#The_Structure_of_TeX
+ */
+
+/*
+ * INCLUDE FILES
+ */
+#include "general.h" /* must always come first */
+#include <ctype.h> /* to define isalpha () */
+#include <setjmp.h>
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+#include "debug.h"
+#include "entry.h"
+#include "keyword.h"
+#include "parse.h"
+#include "read.h"
+#include "routines.h"
+#include "vstring.h"
+
+/*
+ * MACROS
+ */
+#define isType(token,t) (boolean) ((token)->type == (t))
+#define isKeyword(token,k) (boolean) ((token)->keyword == (k))
+
+/*
+ * DATA DECLARATIONS
+ */
+
+typedef enum eException { ExceptionNone, ExceptionEOF } exception_t;
+
+/*
+ * Used to specify type of keyword.
+ */
+typedef enum eKeywordId {
+ KEYWORD_NONE = -1,
+ KEYWORD_chapter,
+ KEYWORD_section,
+ KEYWORD_subsection,
+ KEYWORD_subsubsection,
+ KEYWORD_part,
+ KEYWORD_paragraph,
+ KEYWORD_subparagraph
+} keywordId;
+
+/* Used to determine whether keyword is valid for the token language and
+ * what its ID is.
+ */
+typedef struct sKeywordDesc {
+ const char *name;
+ keywordId id;
+} keywordDesc;
+
+typedef enum eTokenType {
+ TOKEN_UNDEFINED,
+ TOKEN_CHARACTER,
+ TOKEN_CLOSE_PAREN,
+ TOKEN_SEMICOLON,
+ TOKEN_COLON,
+ TOKEN_COMMA,
+ TOKEN_KEYWORD,
+ TOKEN_OPEN_PAREN,
+ TOKEN_OPERATOR,
+ TOKEN_IDENTIFIER,
+ TOKEN_STRING,
+ TOKEN_PERIOD,
+ TOKEN_OPEN_CURLY,
+ TOKEN_CLOSE_CURLY,
+ TOKEN_EQUAL_SIGN,
+ TOKEN_EXCLAMATION,
+ TOKEN_FORWARD_SLASH,
+ TOKEN_OPEN_SQUARE,
+ TOKEN_CLOSE_SQUARE,
+ TOKEN_OPEN_MXML,
+ TOKEN_CLOSE_MXML,
+ TOKEN_CLOSE_SGML,
+ TOKEN_LESS_THAN,
+ TOKEN_GREATER_THAN,
+ TOKEN_QUESTION_MARK,
+ TOKEN_STAR
+} tokenType;
+
+typedef struct sTokenInfo {
+ tokenType type;
+ keywordId keyword;
+ vString * string;
+ vString * scope;
+ unsigned long lineNumber;
+ fpos_t filePosition;
+} tokenInfo;
+
+/*
+ * DATA DEFINITIONS
+ */
+
+static langType Lang_js;
+
+static jmp_buf Exception;
+
+typedef enum {
+ TEXTAG_CHAPTER,
+ TEXTAG_SECTION,
+ TEXTAG_SUBSECTION,
+ TEXTAG_SUBSUBSECTION,
+ TEXTAG_PART,
+ TEXTAG_PARAGRAPH,
+ TEXTAG_SUBPARAGRAPH,
+ TEXTAG_COUNT
+} texKind;
+
+static kindOption TexKinds [] = {
+ { TRUE, 'c', "chapter", "chapters" },
+ { TRUE, 's', "section", "sections" },
+ { TRUE, 'u', "subsection", "subsections" },
+ { TRUE, 'b', "subsubsection", "subsubsections" },
+ { TRUE, 'p', "part", "parts" },
+ { TRUE, 'P', "paragraph", "paragraphs" },
+ { TRUE, 'G', "subparagraph", "subparagraphs" }
+};
+
+static const keywordDesc TexKeywordTable [] = {
+ /* keyword keyword ID */
+ { "chapter", KEYWORD_chapter },
+ { "section", KEYWORD_section },
+ { "subsection", KEYWORD_subsection },
+ { "subsubsection", KEYWORD_subsubsection },
+ { "part", KEYWORD_part },
+ { "paragraph", KEYWORD_paragraph },
+ { "subparagraph", KEYWORD_subparagraph }
+};
+
+/*
+ * FUNCTION DEFINITIONS
+ */
+
+static boolean isIdentChar (const int c)
+{
+ return (boolean)
+ (isalpha (c) || isdigit (c) || c == '$' ||
+ c == '_' || c == '#');
+}
+
+static void buildTexKeywordHash (void)
+{
+ const size_t count = sizeof (TexKeywordTable) /
+ sizeof (TexKeywordTable [0]);
+ size_t i;
+ for (i = 0 ; i < count ; ++i)
+ {
+ const keywordDesc* const p = &TexKeywordTable [i];
+ addKeyword (p->name, Lang_js, (int) p->id);
+ }
+}
+
+static tokenInfo *newToken (void)
+{
+ tokenInfo *const token = xMalloc (1, tokenInfo);
+
+ token->type = TOKEN_UNDEFINED;
+ token->keyword = KEYWORD_NONE;
+ token->string = vStringNew ();
+ token->scope = vStringNew ();
+ token->lineNumber = getSourceLineNumber ();
+ token->filePosition = getInputFilePosition ();
+
+ return token;
+}
+
+static void deleteToken (tokenInfo *const token)
+{
+ vStringDelete (token->string);
+ vStringDelete (token->scope);
+ eFree (token);
+}
+
+/*
+ * Tag generation functions
+ */
+
+static void makeConstTag (tokenInfo *const token, const texKind kind)
+{
+ if (TexKinds [kind].enabled )
+ {
+ const char *const name = vStringValue (token->string);
+ tagEntryInfo e;
+ initTagEntry (&e, name);
+
+ e.lineNumber = token->lineNumber;
+ e.filePosition = token->filePosition;
+ e.kindName = TexKinds [kind].name;
+ e.kind = TexKinds [kind].letter;
+
+ makeTagEntry (&e);
+ }
+}
+
+static void makeTexTag (tokenInfo *const token, texKind kind)
+{
+ vString * fulltag;
+
+ if (TexKinds [kind].enabled)
+ {
+ /*
+ * If a scope has been added to the token, change the token
+ * string to include the scope when making the tag.
+ */
+ if ( vStringLength (token->scope) > 0 )
+ {
+ fulltag = vStringNew ();
+ vStringCopy (fulltag, token->scope);
+ vStringCatS (fulltag, ".");
+ vStringCatS (fulltag, vStringValue (token->string));
+ vStringTerminate (fulltag);
+ vStringCopy (token->string, fulltag);
+ vStringDelete (fulltag);
+ }
+ makeConstTag (token, kind);
+ }
+}
+
+/*
+ * Parsing functions
+ */
+
+static void parseString (vString *const string, const int delimiter)
+{
+ boolean end = FALSE;
+ while (! end)
+ {
+ int c = fileGetc ();
+ if (c == EOF)
+ end = TRUE;
+ else if (c == '\\')
+ {
+ c = fileGetc(); /* This maybe a ' or ". */
+ vStringPut (string, c);
+ }
+ else if (c == delimiter)
+ end = TRUE;
+ else
+ vStringPut (string, c);
+ }
+ vStringTerminate (string);
+}
+
+/*
+ * Read a C identifier beginning with "firstChar" and places it into
+ * "name".
+ */
+static void parseIdentifier (vString *const string, const int firstChar)
+{
+ int c = firstChar;
+ Assert (isIdentChar (c));
+ do
+ {
+ vStringPut (string, c);
+ c = fileGetc ();
+ } while (isIdentChar (c));
+
+ vStringTerminate (string);
+ if (!isspace (c))
+ fileUngetc (c); /* unget non-identifier character */
+}
+
+static void readToken (tokenInfo *const token)
+{
+ int c;
+
+ token->type = TOKEN_UNDEFINED;
+ token->keyword = KEYWORD_NONE;
+ vStringClear (token->string);
+
+getNextChar:
+ do
+ {
+ c = fileGetc ();
+ token->lineNumber = getSourceLineNumber ();
+ token->filePosition = getInputFilePosition ();
+ }
+ while (c == '\t' || c == ' ' || c == '\n');
+
+ switch (c)
+ {
+ case EOF: longjmp (Exception, (int)ExceptionEOF); break;
+ case '(': token->type = TOKEN_OPEN_PAREN; break;
+ case ')': token->type = TOKEN_CLOSE_PAREN; break;
+ case ';': token->type = TOKEN_SEMICOLON; break;
+ case ',': token->type = TOKEN_COMMA; break;
+ case '.': token->type = TOKEN_PERIOD; break;
+ case ':': token->type = TOKEN_COLON; break;
+ case '{': token->type = TOKEN_OPEN_CURLY; break;
+ case '}': token->type = TOKEN_CLOSE_CURLY; break;
+ case '=': token->type = TOKEN_EQUAL_SIGN; break;
+ case '[': token->type = TOKEN_OPEN_SQUARE; break;
+ case ']': token->type = TOKEN_CLOSE_SQUARE; break;
+ case '?': token->type = TOKEN_QUESTION_MARK; break;
+ case '*': token->type = TOKEN_STAR; break;
+
+ case '\'':
+ case '"':
+ token->type = TOKEN_STRING;
+ parseString (token->string, c);
+ token->lineNumber = getSourceLineNumber ();
+ token->filePosition = getInputFilePosition ();
+ break;
+
+ case '\\':
+ /*
+ * All Tex tags start with a backslash.
+ * Check if the next character is an alpha character
+ * else it is not a potential tex tag.
+ */
+ c = fileGetc ();
+ if (! isalpha (c))
+ fileUngetc (c);
+ else
+ {
+ parseIdentifier (token->string, c);
+ token->lineNumber = getSourceLineNumber ();
+ token->filePosition = getInputFilePosition ();
+ token->keyword = analyzeToken (token->string, Lang_js);
+ if (isKeyword (token, KEYWORD_NONE))
+ token->type = TOKEN_IDENTIFIER;
+ else
+ token->type = TOKEN_KEYWORD;
+ }
+ break;
+
+ case '%':
+ fileSkipToCharacter ('\n'); /* % are single line comments */
+ goto getNextChar;
+ break;
+
+ default:
+ if (! isIdentChar (c))
+ token->type = TOKEN_UNDEFINED;
+ else
+ {
+ parseIdentifier (token->string, c);
+ token->lineNumber = getSourceLineNumber ();
+ token->filePosition = getInputFilePosition ();
+ token->type = TOKEN_IDENTIFIER;
+ }
+ break;
+ }
+}
+
+static void copyToken (tokenInfo *const dest, tokenInfo *const src)
+{
+ dest->lineNumber = src->lineNumber;
+ dest->filePosition = src->filePosition;
+ dest->type = src->type;
+ dest->keyword = src->keyword;
+ vStringCopy (dest->string, src->string);
+ vStringCopy (dest->scope, src->scope);
+}
+
+/*
+ * Scanning functions
+ */
+
+static boolean parseTag (tokenInfo *const token, texKind kind)
+{
+ tokenInfo *const name = newToken ();
+ vString * fullname;
+ boolean useLongName = TRUE;
+
+ fullname = vStringNew ();
+ vStringClear (fullname);
+
+ /*
+ * Tex tags are of these formats:
+ * \keyword{any number of words}
+ * \keyword[short desc]{any number of words}
+ * \keyword*[short desc]{any number of words}
+ *
+ * When a keyword is found, loop through all words within
+ * the curly braces for the tag name.
+ */
+
+ if (isType (token, TOKEN_KEYWORD))
+ {
+ copyToken (name, token);
+ readToken (token);
+ }
+
+ if (isType (token, TOKEN_OPEN_SQUARE))
+ {
+ useLongName = FALSE;
+
+ readToken (token);
+ while (! isType (token, TOKEN_CLOSE_SQUARE) )
+ {
+ if (isType (token, TOKEN_IDENTIFIER))
+ {
+ if (fullname->length > 0)
+ vStringCatS (fullname, " ");
+ vStringCatS (fullname, vStringValue (token->string));
+ }
+ readToken (token);
+ }
+ vStringTerminate (fullname);
+ vStringCopy (name->string, fullname);
+ makeTexTag (name, kind);
+ }
+
+ if (isType (token, TOKEN_STAR))
+ {
+ readToken (token);
+ }
+
+ if (isType (token, TOKEN_OPEN_CURLY))
+ {
+ readToken (token);
+ while (! isType (token, TOKEN_CLOSE_CURLY) )
+ {
+ if (isType (token, TOKEN_IDENTIFIER) && useLongName)
+ {
+ if (fullname->length > 0)
+ vStringCatS (fullname, " ");
+ vStringCatS (fullname, vStringValue (token->string));
+ }
+ readToken (token);
+ }
+ if (useLongName)
+ {
+ vStringTerminate (fullname);
+ vStringCopy (name->string, fullname);
+ makeTexTag (name, kind);
+ }
+ }
+
+ deleteToken (name);
+ vStringDelete (fullname);
+ return TRUE;
+}
+
+static void parseTexFile (tokenInfo *const token)
+{
+ do
+ {
+ readToken (token);
+
+ if (isType (token, TOKEN_KEYWORD))
+ {
+ switch (token->keyword)
+ {
+ case KEYWORD_chapter:
+ parseTag (token, TEXTAG_CHAPTER);
+ break;
+ case KEYWORD_section:
+ parseTag (token, TEXTAG_SECTION);
+ break;
+ case KEYWORD_subsection:
+ parseTag (token, TEXTAG_SUBSUBSECTION);
+ break;
+ case KEYWORD_subsubsection:
+ parseTag (token, TEXTAG_SUBSUBSECTION);
+ break;
+ case KEYWORD_part:
+ parseTag (token, TEXTAG_PART);
+ break;
+ case KEYWORD_paragraph:
+ parseTag (token, TEXTAG_PARAGRAPH);
+ break;
+ case KEYWORD_subparagraph:
+ parseTag (token, TEXTAG_SUBPARAGRAPH);
+ break;
+ default:
+ break;
+ }
+ }
+ } while (TRUE);
+}
+
+static void initialize (const langType language)
+{
+ Assert (sizeof (TexKinds) / sizeof (TexKinds [0]) == TEXTAG_COUNT);
+ Lang_js = language;
+ buildTexKeywordHash ();
+}
+
+static void findTexTags (void)
+{
+ tokenInfo *const token = newToken ();
+ exception_t exception;
+
+ exception = (exception_t) (setjmp (Exception));
+ while (exception == ExceptionNone)
+ parseTexFile (token);
+
+ deleteToken (token);
+}
+
+/* Create parser definition stucture */
+extern parserDefinition* TexParser (void)
+{
+ static const char *const extensions [] = { "tex", NULL };
+ parserDefinition *const def = parserNew ("Tex");
+ def->extensions = extensions;
+ /*
+ * New definitions for parsing instead of regex
+ */
+ def->kinds = TexKinds;
+ def->kindCount = KIND_COUNT (TexKinds);
+ def->parser = findTexTags;
+ def->initialize = initialize;
+
+ return def;
+}
+/* vi:set tabstop=4 shiftwidth=4 noexpandtab: */
diff --git a/verilog.c b/verilog.c
new file mode 100644
index 0000000..814f5b0
--- /dev/null
+++ b/verilog.c
@@ -0,0 +1,340 @@
+/*
+* $Id: verilog.c 573 2007-06-26 05:41:27Z elliotth $
+*
+* Copyright (c) 2003, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* This module contains functions for generating tags for the Verilog HDL
+* (Hardware Description Language).
+*
+* Language definition documents:
+* http://www.eg.bucknell.edu/~cs320/verilog/verilog-manual.html
+* http://www.sutherland-hdl.com/on-line_ref_guide/vlog_ref_top.html
+* http://www.verilog.com/VerilogBNF.html
+* http://eesun.free.fr/DOC/VERILOG/verilog_manual1.html
+*/
+
+/*
+ * INCLUDE FILES
+ */
+#include "general.h" /* must always come first */
+
+#include <string.h>
+#include <setjmp.h>
+
+#include "debug.h"
+#include "get.h"
+#include "keyword.h"
+#include "parse.h"
+#include "read.h"
+#include "vstring.h"
+
+/*
+ * DATA DECLARATIONS
+ */
+typedef enum eException { ExceptionNone, ExceptionEOF } exception_t;
+
+typedef enum {
+ K_UNDEFINED = -1,
+ K_CONSTANT,
+ K_EVENT,
+ K_FUNCTION,
+ K_MODULE,
+ K_NET,
+ K_PORT,
+ K_REGISTER,
+ K_TASK
+} verilogKind;
+
+typedef struct {
+ const char *keyword;
+ verilogKind kind;
+} keywordAssoc;
+
+/*
+ * DATA DEFINITIONS
+ */
+static int Ungetc;
+static int Lang_verilog;
+static jmp_buf Exception;
+
+static kindOption VerilogKinds [] = {
+ { TRUE, 'c', "constant", "constants (define, parameter, specparam)" },
+ { TRUE, 'e', "event", "events" },
+ { TRUE, 'f', "function", "functions" },
+ { TRUE, 'm', "module", "modules" },
+ { TRUE, 'n', "net", "net data types" },
+ { TRUE, 'p', "port", "ports" },
+ { TRUE, 'r', "register", "register data types" },
+ { TRUE, 't', "task", "tasks" }
+};
+
+static keywordAssoc VerilogKeywordTable [] = {
+ { "`define", K_CONSTANT },
+ { "event", K_EVENT },
+ { "function", K_FUNCTION },
+ { "inout", K_PORT },
+ { "input", K_PORT },
+ { "integer", K_REGISTER },
+ { "module", K_MODULE },
+ { "output", K_PORT },
+ { "parameter", K_CONSTANT },
+ { "real", K_REGISTER },
+ { "realtime", K_REGISTER },
+ { "reg", K_REGISTER },
+ { "specparam", K_CONSTANT },
+ { "supply0", K_NET },
+ { "supply1", K_NET },
+ { "task", K_TASK },
+ { "time", K_REGISTER },
+ { "tri0", K_NET },
+ { "tri1", K_NET },
+ { "triand", K_NET },
+ { "tri", K_NET },
+ { "trior", K_NET },
+ { "trireg", K_NET },
+ { "wand", K_NET },
+ { "wire", K_NET },
+ { "wor", K_NET }
+};
+
+/*
+ * FUNCTION DEFINITIONS
+ */
+
+static void initialize (const langType language)
+{
+ size_t i;
+ const size_t count =
+ sizeof (VerilogKeywordTable) / sizeof (VerilogKeywordTable [0]);
+ Lang_verilog = language;
+ for (i = 0 ; i < count ; ++i)
+ {
+ const keywordAssoc* const p = &VerilogKeywordTable [i];
+ addKeyword (p->keyword, language, (int) p->kind);
+ }
+}
+
+static void vUngetc (int c)
+{
+ Assert (Ungetc == '\0');
+ Ungetc = c;
+}
+
+static int vGetc (void)
+{
+ int c;
+ if (Ungetc == '\0')
+ c = fileGetc ();
+ else
+ {
+ c = Ungetc;
+ Ungetc = '\0';
+ }
+ if (c == '/')
+ {
+ int c2 = fileGetc ();
+ if (c2 == EOF)
+ longjmp (Exception, (int) ExceptionEOF);
+ else if (c2 == '/') /* strip comment until end-of-line */
+ {
+ do
+ c = fileGetc ();
+ while (c != '\n' && c != EOF);
+ }
+ else if (c2 == '*') /* strip block comment */
+ {
+ c = skipOverCComment();
+ }
+ else
+ {
+ fileUngetc (c2);
+ }
+ }
+ else if (c == '"') /* strip string contents */
+ {
+ int c2;
+ do
+ c2 = fileGetc ();
+ while (c2 != '"' && c2 != EOF);
+ c = '@';
+ }
+ if (c == EOF)
+ longjmp (Exception, (int) ExceptionEOF);
+ return c;
+}
+
+static boolean isIdentifierCharacter (const int c)
+{
+ return (boolean)(isalnum (c) || c == '_' || c == '`');
+}
+
+static int skipWhite (int c)
+{
+ while (isspace (c))
+ c = vGetc ();
+ return c;
+}
+
+static int skipPastMatch (const char *const pair)
+{
+ const int begin = pair [0], end = pair [1];
+ int matchLevel = 1;
+ int c;
+ do
+ {
+ c = vGetc ();
+ if (c == begin)
+ ++matchLevel;
+ else if (c == end)
+ --matchLevel;
+ }
+ while (matchLevel > 0);
+ return vGetc ();
+}
+
+static boolean readIdentifier (vString *const name, int c)
+{
+ vStringClear (name);
+ if (isIdentifierCharacter (c))
+ {
+ while (isIdentifierCharacter (c))
+ {
+ vStringPut (name, c);
+ c = vGetc ();
+ }
+ vUngetc (c);
+ vStringTerminate (name);
+ }
+ return (boolean)(name->length > 0);
+}
+
+static void tagNameList (const verilogKind kind, int c)
+{
+ vString *name = vStringNew ();
+ boolean repeat;
+ Assert (isIdentifierCharacter (c));
+ do
+ {
+ repeat = FALSE;
+ if (isIdentifierCharacter (c))
+ {
+ readIdentifier (name, c);
+ makeSimpleTag (name, VerilogKinds, kind);
+ }
+ else
+ break;
+ c = skipWhite (vGetc ());
+ if (c == '[')
+ c = skipPastMatch ("[]");
+ c = skipWhite (c);
+ if (c == '=')
+ {
+ if (c == '{')
+ skipPastMatch ("{}");
+ else
+ {
+ do
+ c = vGetc ();
+ while (c != ',' && c != ';');
+ }
+ }
+ if (c == ',')
+ {
+ c = skipWhite (vGetc ());
+ repeat = TRUE;
+ }
+ else
+ repeat = FALSE;
+ } while (repeat);
+ vStringDelete (name);
+ vUngetc (c);
+}
+
+static void findTag (vString *const name)
+{
+ const verilogKind kind = (verilogKind) lookupKeyword (vStringValue (name), Lang_verilog);
+ if (kind == K_CONSTANT && vStringItem (name, 0) == '`')
+ {
+ /* Bug #961001: Verilog compiler directives are line-based. */
+ int c = skipWhite (vGetc ());
+ readIdentifier (name, c);
+ makeSimpleTag (name, VerilogKinds, kind);
+ /* Skip the rest of the line. */
+ do {
+ c = vGetc();
+ } while (c != '\n');
+ vUngetc (c);
+ }
+ else if (kind != K_UNDEFINED)
+ {
+ int c = skipWhite (vGetc ());
+
+ /* Many keywords can have bit width.
+ * reg [3:0] net_name;
+ * inout [(`DBUSWIDTH-1):0] databus;
+ */
+ if (c == '(')
+ c = skipPastMatch ("()");
+ c = skipWhite (c);
+ if (c == '[')
+ c = skipPastMatch ("[]");
+ c = skipWhite (c);
+ if (c == '#')
+ {
+ c = vGetc ();
+ if (c == '(')
+ c = skipPastMatch ("()");
+ }
+ c = skipWhite (c);
+ if (isIdentifierCharacter (c))
+ tagNameList (kind, c);
+ }
+}
+
+static void findVerilogTags (void)
+{
+ vString *const name = vStringNew ();
+ volatile boolean newStatement = TRUE;
+ volatile int c = '\0';
+ exception_t exception = (exception_t) setjmp (Exception);
+
+ if (exception == ExceptionNone) while (c != EOF)
+ {
+ c = vGetc ();
+ switch (c)
+ {
+ case ';':
+ case '\n':
+ newStatement = TRUE;
+ break;
+
+ case ' ':
+ case '\t':
+ break;
+
+ default:
+ if (newStatement && readIdentifier (name, c))
+ findTag (name);
+ newStatement = FALSE;
+ break;
+ }
+ }
+ vStringDelete (name);
+}
+
+extern parserDefinition* VerilogParser (void)
+{
+ static const char *const extensions [] = { "v", NULL };
+ parserDefinition* def = parserNew ("Verilog");
+ def->kinds = VerilogKinds;
+ def->kindCount = KIND_COUNT (VerilogKinds);
+ def->extensions = extensions;
+ def->parser = findVerilogTags;
+ def->initialize = initialize;
+ return def;
+}
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/vhdl.c b/vhdl.c
new file mode 100644
index 0000000..994d2e1
--- /dev/null
+++ b/vhdl.c
@@ -0,0 +1,835 @@
+/*
+* $Id: vhdl.c 652 2008-04-18 03:51:47Z elliotth $
+*
+* Copyright (c) 2008, Nicolas Vincent
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* This module contains functions for generating tags for VHDL files.
+*/
+
+/*
+ * INCLUDE FILES
+ */
+#include "general.h" /* must always come first */
+
+#include <ctype.h> /* to define isalpha () */
+#include <string.h>
+#include <setjmp.h>
+
+#include "debug.h"
+#include "entry.h"
+#include "keyword.h"
+#include "parse.h"
+#include "read.h"
+#include "routines.h"
+#include "vstring.h"
+
+/*
+ * MACROS
+ */
+#define isType(token,t) (boolean) ((token)->type == (t))
+#define isKeyword(token,k) (boolean) ((token)->keyword == (k))
+
+/*
+ * DATA DECLARATIONS
+ */
+typedef enum eException { ExceptionNone, ExceptionEOF } exception_t;
+
+/*
+ * Used to specify type of keyword.
+ */
+typedef enum eKeywordId {
+ KEYWORD_NONE = -1,
+ KEYWORD_ABS,
+ KEYWORD_ACCESS,
+ KEYWORD_AFTER,
+ KEYWORD_ALIAS,
+ KEYWORD_ALL,
+ KEYWORD_AND,
+ KEYWORD_ARCHITECTURE,
+ KEYWORD_ARRAY,
+ KEYWORD_ASSERT,
+ KEYWORD_ATTRIBUTE,
+ KEYWORD_BEGIN,
+ KEYWORD_BLOCK,
+ KEYWORD_BODY,
+ KEYWORD_BUFFER,
+ KEYWORD_BUS,
+ KEYWORD_CASE,
+ KEYWORD_COMPONENT,
+ KEYWORD_CONFIGURATION,
+ KEYWORD_CONSTANT,
+ KEYWORD_DISCONNECT,
+ KEYWORD_DOWNTO,
+ KEYWORD_ELSE,
+ KEYWORD_ELSIF,
+ KEYWORD_END,
+ KEYWORD_ENTITY,
+ KEYWORD_EXIT,
+ KEYWORD_FILE,
+ KEYWORD_FOR,
+ KEYWORD_FUNCTION,
+ KEYWORD_GENERATE,
+ KEYWORD_GENERIC,
+ KEYWORD_GROUP,
+ KEYWORD_GUARDED,
+ KEYWORD_IF,
+ KEYWORD_IMPURE,
+ KEYWORD_IN,
+ KEYWORD_INERTIAL,
+ KEYWORD_INOUT,
+ KEYWORD_IS,
+ KEYWORD_LABEL,
+ KEYWORD_LIBRARY,
+ KEYWORD_LINKAGE,
+ KEYWORD_LITERAL,
+ KEYWORD_LOOP,
+ KEYWORD_MAP,
+ KEYWORD_MOD,
+ KEYWORD_NAND,
+ KEYWORD_NEW,
+ KEYWORD_NEXT,
+ KEYWORD_NOR,
+ KEYWORD_NOT,
+ KEYWORD_NULL,
+ KEYWORD_OF,
+ KEYWORD_ON,
+ KEYWORD_OPEN,
+ KEYWORD_OR,
+ KEYWORD_OTHERS,
+ KEYWORD_OUT,
+ KEYWORD_PACKAGE,
+ KEYWORD_PORT,
+ KEYWORD_POSTPONED,
+ KEYWORD_PROCEDURE,
+ KEYWORD_PROCESS,
+ KEYWORD_PURE,
+ KEYWORD_RANGE,
+ KEYWORD_RECORD,
+ KEYWORD_REGISTER,
+ KEYWORD_REJECT,
+ KEYWORD_RETURN,
+ KEYWORD_ROL,
+ KEYWORD_ROR,
+ KEYWORD_SELECT,
+ KEYWORD_SEVERITY,
+ KEYWORD_SIGNAL,
+ KEYWORD_SHARED,
+ KEYWORD_SLA,
+ KEYWORD_SLI,
+ KEYWORD_SRA,
+ KEYWORD_SRL,
+ KEYWORD_SUBTYPE,
+ KEYWORD_THEN,
+ KEYWORD_TO,
+ KEYWORD_TRANSPORT,
+ KEYWORD_TYPE,
+ KEYWORD_UNAFFECTED,
+ KEYWORD_UNITS,
+ KEYWORD_UNTIL,
+ KEYWORD_USE,
+ KEYWORD_VARIABLE,
+ KEYWORD_WAIT,
+ KEYWORD_WHEN,
+ KEYWORD_WHILE,
+ KEYWORD_WITH,
+ KEYWORD_XNOR,
+ KEYWORD_XOR
+} keywordId;
+
+/* Used to determine whether keyword is valid for the current language and
+ * what its ID is.
+ */
+typedef struct sKeywordDesc {
+ const char *name;
+ keywordId id;
+} keywordDesc;
+
+typedef enum eTokenType {
+ TOKEN_NONE, /* none */
+ TOKEN_OPEN_PAREN, /* ( */
+ TOKEN_CLOSE_PAREN, /* ) */
+ TOKEN_COMMA, /* the comma character */
+ TOKEN_IDENTIFIER,
+ TOKEN_KEYWORD,
+ TOKEN_PERIOD, /* . */
+ TOKEN_OPERATOR,
+ TOKEN_SEMICOLON, /* the semicolon character */
+ TOKEN_STRING
+} tokenType;
+
+typedef struct sTokenInfo {
+ tokenType type;
+ keywordId keyword;
+ vString *string; /* the name of the token */
+ vString *scope;
+ unsigned long lineNumber; /* line number of tag */
+ fpos_t filePosition; /* file position of line containing name */
+} tokenInfo;
+
+/*
+ * DATA DEFINITIONS
+ */
+static int Lang_vhdl;
+static jmp_buf Exception;
+
+/* Used to index into the VhdlKinds table. */
+typedef enum {
+ VHDLTAG_UNDEFINED = -1,
+ VHDLTAG_CONSTANT,
+ VHDLTAG_TYPE,
+ VHDLTAG_SUBTYPE,
+ VHDLTAG_RECORD,
+ VHDLTAG_ENTITY,
+ VHDLTAG_COMPONENT,
+ VHDLTAG_PROTOTYPE,
+ VHDLTAG_FUNCTION,
+ VHDLTAG_PROCEDURE,
+ VHDLTAG_PACKAGE,
+ VHDLTAG_LOCAL
+} vhdlKind;
+
+static kindOption VhdlKinds[] = {
+ {TRUE, 'c', "constant", "constant declarations"},
+ {TRUE, 't', "type", "type definitions"},
+ {TRUE, 'T', "subtype", "subtype definitions"},
+ {TRUE, 'r', "record", "record names"},
+ {TRUE, 'e', "entity", "entity declarations"},
+ {FALSE, 'C', "component", "component declarations"},
+ {FALSE, 'd', "prototype", "prototypes"},
+ {TRUE, 'f', "function", "function prototypes and declarations"},
+ {TRUE, 'p', "procedure", "procedure prototypes and declarations"},
+ {TRUE, 'P', "package", "package definitions"},
+ {FALSE, 'l', "local", "local definitions"}
+};
+
+static keywordDesc VhdlKeywordTable[] = {
+ {"abs", KEYWORD_ABS},
+ {"access", KEYWORD_ACCESS},
+ {"after", KEYWORD_AFTER},
+ {"alias", KEYWORD_ALIAS},
+ {"all", KEYWORD_ALL},
+ {"and", KEYWORD_AND},
+ {"architecture", KEYWORD_ARCHITECTURE},
+ {"array", KEYWORD_ARRAY},
+ {"assert", KEYWORD_ASSERT},
+ {"attribute", KEYWORD_ATTRIBUTE},
+ {"begin", KEYWORD_BEGIN},
+ {"block", KEYWORD_BLOCK},
+ {"body", KEYWORD_BODY},
+ {"buffer", KEYWORD_BUFFER},
+ {"bus", KEYWORD_BUS},
+ {"case", KEYWORD_CASE},
+ {"component", KEYWORD_COMPONENT},
+ {"configuration", KEYWORD_CONFIGURATION},
+ {"constant", KEYWORD_CONSTANT},
+ {"disconnect", KEYWORD_DISCONNECT},
+ {"downto", KEYWORD_DOWNTO},
+ {"else", KEYWORD_ELSE},
+ {"elsif", KEYWORD_ELSIF},
+ {"end", KEYWORD_END},
+ {"entity", KEYWORD_ENTITY},
+ {"exit", KEYWORD_EXIT},
+ {"file", KEYWORD_FILE},
+ {"for", KEYWORD_FOR},
+ {"function", KEYWORD_FUNCTION},
+ {"generate", KEYWORD_GENERATE},
+ {"generic", KEYWORD_GENERIC},
+ {"group", KEYWORD_GROUP},
+ {"guarded", KEYWORD_GUARDED},
+ {"if", KEYWORD_IF},
+ {"impure", KEYWORD_IMPURE},
+ {"in", KEYWORD_IN},
+ {"inertial", KEYWORD_INERTIAL},
+ {"inout", KEYWORD_INOUT},
+ {"is", KEYWORD_IS},
+ {"label", KEYWORD_LABEL},
+ {"library", KEYWORD_LIBRARY},
+ {"linkage", KEYWORD_LINKAGE},
+ {"literal", KEYWORD_LITERAL},
+ {"loop", KEYWORD_LOOP},
+ {"map", KEYWORD_MAP},
+ {"mod", KEYWORD_MOD},
+ {"nand", KEYWORD_NAND},
+ {"new", KEYWORD_NEW},
+ {"next", KEYWORD_NEXT},
+ {"nor", KEYWORD_NOR},
+ {"not", KEYWORD_NOT},
+ {"null", KEYWORD_NULL},
+ {"of", KEYWORD_OF},
+ {"on", KEYWORD_ON},
+ {"open", KEYWORD_OPEN},
+ {"or", KEYWORD_OR},
+ {"others", KEYWORD_OTHERS},
+ {"out", KEYWORD_OUT},
+ {"package", KEYWORD_PACKAGE},
+ {"port", KEYWORD_PORT},
+ {"postponed", KEYWORD_POSTPONED},
+ {"procedure", KEYWORD_PROCEDURE},
+ {"process", KEYWORD_PROCESS},
+ {"pure", KEYWORD_PURE},
+ {"range", KEYWORD_RANGE},
+ {"record", KEYWORD_RECORD},
+ {"register", KEYWORD_REGISTER},
+ {"reject", KEYWORD_REJECT},
+ {"return", KEYWORD_RETURN},
+ {"rol", KEYWORD_ROL},
+ {"ror", KEYWORD_ROR},
+ {"select", KEYWORD_SELECT},
+ {"severity", KEYWORD_SEVERITY},
+ {"signal", KEYWORD_SIGNAL},
+ {"shared", KEYWORD_SHARED},
+ {"sla", KEYWORD_SLA},
+ {"sli", KEYWORD_SLI},
+ {"sra", KEYWORD_SRA},
+ {"srl", KEYWORD_SRL},
+ {"subtype", KEYWORD_SUBTYPE},
+ {"then", KEYWORD_THEN},
+ {"to", KEYWORD_TO},
+ {"transport", KEYWORD_TRANSPORT},
+ {"type", KEYWORD_TYPE},
+ {"unaffected", KEYWORD_UNAFFECTED},
+ {"units", KEYWORD_UNITS},
+ {"until", KEYWORD_UNTIL},
+ {"use", KEYWORD_USE},
+ {"variable", KEYWORD_VARIABLE},
+ {"wait", KEYWORD_WAIT},
+ {"when", KEYWORD_WHEN},
+ {"while", KEYWORD_WHILE},
+ {"with", KEYWORD_WITH},
+ {"xnor", KEYWORD_XNOR},
+ {"xor", KEYWORD_XOR}
+};
+
+/*
+ * FUNCTION DECLARATIONS
+ */
+static void parseKeywords (tokenInfo * const token, boolean local);
+
+/*
+ * FUNCTION DEFINITIONS
+ */
+
+static boolean isIdentChar1 (const int c)
+{
+ return (boolean) (isalpha (c) || c == '_');
+}
+
+static boolean isIdentChar (const int c)
+{
+ return (boolean) (isalpha (c) || isdigit (c) || c == '_');
+}
+
+static boolean isIdentifierMatch (const tokenInfo * const token,
+ const vString * const name)
+{
+ return (boolean) (isType (token, TOKEN_IDENTIFIER) &&
+ strcasecmp (vStringValue (token->string), vStringValue (name)) == 0);
+ /* XXX this is copy/paste from eiffel.c and slightly modified */
+ /* shouldn't we use strNcasecmp ? */
+}
+
+static boolean isKeywordOrIdent (const tokenInfo * const token,
+ const keywordId keyword, const vString * const name)
+{
+ return (boolean) (isKeyword (token, keyword) ||
+ isIdentifierMatch (token, name));
+}
+
+static tokenInfo *newToken (void)
+{
+ tokenInfo *const token = xMalloc (1, tokenInfo);
+ token->type = TOKEN_NONE;
+ token->keyword = KEYWORD_NONE;
+ token->string = vStringNew ();
+ token->scope = vStringNew ();
+ token->lineNumber = getSourceLineNumber ();
+ token->filePosition = getInputFilePosition ();
+ return token;
+}
+
+static void deleteToken (tokenInfo * const token)
+{
+ if (token != NULL)
+ {
+ vStringDelete (token->string);
+ vStringDelete (token->scope);
+ eFree (token);
+ }
+}
+
+/*
+ * Parsing functions
+ */
+
+static void parseString (vString * const string, const int delimiter)
+{
+ boolean end = FALSE;
+ while (!end)
+ {
+ int c = fileGetc ();
+ if (c == EOF)
+ end = TRUE;
+ else if (c == '\\')
+ {
+ c = fileGetc (); /* This maybe a ' or ". */
+ vStringPut (string, c);
+ }
+ else if (c == delimiter)
+ end = TRUE;
+ else
+ vStringPut (string, c);
+ }
+ vStringTerminate (string);
+}
+
+/* Read a VHDL identifier beginning with "firstChar" and place it into "name".
+*/
+static void parseIdentifier (vString * const string, const int firstChar)
+{
+ int c = firstChar;
+ Assert (isIdentChar1 (c));
+ do
+ {
+ vStringPut (string, c);
+ c = fileGetc ();
+ } while (isIdentChar (c));
+ vStringTerminate (string);
+ if (!isspace (c))
+ fileUngetc (c); /* unget non-identifier character */
+}
+
+static void readToken (tokenInfo * const token)
+{
+ int c;
+
+ token->type = TOKEN_NONE;
+ token->keyword = KEYWORD_NONE;
+ vStringClear (token->string);
+
+ getNextChar:
+ do
+ {
+ c = fileGetc ();
+ token->lineNumber = getSourceLineNumber ();
+ token->filePosition = getInputFilePosition ();
+ }
+ while (c == '\t' || c == ' ' || c == '\n');
+
+ switch (c)
+ {
+ case EOF:
+ longjmp (Exception, (int) ExceptionEOF);
+ break;
+ case '(':
+ token->type = TOKEN_OPEN_PAREN;
+ break;
+ case ')':
+ token->type = TOKEN_CLOSE_PAREN;
+ break;
+ case ';':
+ token->type = TOKEN_SEMICOLON;
+ break;
+ case '.':
+ token->type = TOKEN_PERIOD;
+ break;
+ case ',':
+ token->type = TOKEN_COMMA;
+ break;
+ case '\'': /* only single char are inside simple quotes */
+ break; /* or it is for attributes so we don't care */
+ case '"':
+ token->type = TOKEN_STRING;
+ parseString (token->string, c);
+ token->lineNumber = getSourceLineNumber ();
+ token->filePosition = getInputFilePosition ();
+ break;
+ case '-':
+ c = fileGetc ();
+ if (c == '-') /* start of a comment */
+ {
+ fileSkipToCharacter ('\n');
+ goto getNextChar;
+ }
+ else
+ {
+ if (!isspace (c))
+ fileUngetc (c);
+ token->type = TOKEN_OPERATOR;
+ }
+ break;
+ default:
+ if (!isIdentChar1 (c))
+ token->type = TOKEN_NONE;
+ else
+ {
+ parseIdentifier (token->string, c);
+ token->lineNumber = getSourceLineNumber ();
+ token->filePosition = getInputFilePosition ();
+ token->keyword = analyzeToken (token->string, Lang_vhdl);
+ if (isKeyword (token, KEYWORD_NONE))
+ token->type = TOKEN_IDENTIFIER;
+ else
+ token->type = TOKEN_KEYWORD;
+ }
+ break;
+ }
+}
+
+static void skipToKeyword (const keywordId keyword)
+{
+ tokenInfo *const token = newToken ();
+ do
+ {
+ readToken (token);
+ }
+ while (!isKeyword (token, keyword));
+ deleteToken (token);
+}
+
+static void skipToMatched (tokenInfo * const token)
+{
+ int nest_level = 0;
+ tokenType open_token;
+ tokenType close_token;
+
+ switch (token->type)
+ {
+ case TOKEN_OPEN_PAREN:
+ open_token = TOKEN_OPEN_PAREN;
+ close_token = TOKEN_CLOSE_PAREN;
+ break;
+ default:
+ return;
+ }
+
+ /*
+ * This routine will skip to a matching closing token.
+ * It will also handle nested tokens like the (, ) below.
+ * ( name varchar(30), text binary(10) )
+ */
+ if (isType (token, open_token))
+ {
+ nest_level++;
+ while (!(isType (token, close_token) && (nest_level == 0)))
+ {
+ readToken (token);
+ if (isType (token, open_token))
+ {
+ nest_level++;
+ }
+ if (isType (token, close_token))
+ {
+ if (nest_level > 0)
+ {
+ nest_level--;
+ }
+ }
+ }
+ readToken (token);
+ }
+}
+
+static void makeConstTag (tokenInfo * const token, const vhdlKind kind)
+{
+ if (VhdlKinds[kind].enabled)
+ {
+ const char *const name = vStringValue (token->string);
+ tagEntryInfo e;
+ initTagEntry (&e, name);
+ e.lineNumber = token->lineNumber;
+ e.filePosition = token->filePosition;
+ e.kindName = VhdlKinds[kind].name;
+ e.kind = VhdlKinds[kind].letter;
+ makeTagEntry (&e);
+ }
+}
+
+static void makeVhdlTag (tokenInfo * const token, const vhdlKind kind)
+{
+ if (VhdlKinds[kind].enabled)
+ {
+ /*
+ * If a scope has been added to the token, change the token
+ * string to include the scope when making the tag.
+ */
+ if (vStringLength (token->scope) > 0)
+ {
+ vString *fulltag = vStringNew ();
+ vStringCopy (fulltag, token->scope);
+ vStringCatS (fulltag, ".");
+ vStringCatS (fulltag, vStringValue (token->string));
+ vStringTerminate (fulltag);
+ vStringCopy (token->string, fulltag);
+ vStringDelete (fulltag);
+ }
+ makeConstTag (token, kind);
+ }
+}
+
+static void initialize (const langType language)
+{
+ size_t i;
+ const size_t count =
+ sizeof (VhdlKeywordTable) / sizeof (VhdlKeywordTable[0]);
+ Lang_vhdl = language;
+ for (i = 0; i < count; ++i)
+ {
+ const keywordDesc *const p = &VhdlKeywordTable[i];
+ addKeyword (p->name, language, (int) p->id);
+ }
+}
+
+static void parsePackage (tokenInfo * const token)
+{
+ tokenInfo *const name = newToken ();
+ Assert (isKeyword (token, KEYWORD_PACKAGE));
+ readToken (token);
+ if (isKeyword (token, KEYWORD_BODY))
+ {
+ readToken (name);
+ makeVhdlTag (name, VHDLTAG_PACKAGE);
+ }
+ else if (isType (token, TOKEN_IDENTIFIER))
+ {
+ makeVhdlTag (token, VHDLTAG_PACKAGE);
+ }
+ deleteToken (name);
+}
+
+static void parseModule (tokenInfo * const token)
+{
+ tokenInfo *const name = newToken ();
+ const vhdlKind kind = isKeyword (token, KEYWORD_ENTITY) ?
+ VHDLTAG_ENTITY : VHDLTAG_COMPONENT;
+ Assert (isKeyword (token, KEYWORD_ENTITY) ||
+ isKeyword (token, KEYWORD_COMPONENT));
+ readToken (name);
+ if (kind == VHDLTAG_COMPONENT)
+ {
+ makeVhdlTag (name, VHDLTAG_COMPONENT);
+ skipToKeyword (KEYWORD_END);
+ fileSkipToCharacter (';');
+ }
+ else
+ {
+ readToken (token);
+ if (isKeyword (token, KEYWORD_IS))
+ {
+ makeVhdlTag (name, VHDLTAG_ENTITY);
+ skipToKeyword (KEYWORD_END);
+ fileSkipToCharacter (';');
+ }
+ }
+ deleteToken (name);
+}
+
+static void parseRecord (tokenInfo * const token)
+{
+ tokenInfo *const name = newToken ();
+ Assert (isKeyword (token, KEYWORD_RECORD));
+ readToken (name);
+ do
+ {
+ readToken (token); /* should be a colon */
+ fileSkipToCharacter (';');
+ makeVhdlTag (name, VHDLTAG_RECORD);
+ readToken (name);
+ }
+ while (!isKeyword (name, KEYWORD_END));
+ fileSkipToCharacter (';');
+ deleteToken (name);
+}
+
+static void parseTypes (tokenInfo * const token)
+{
+ tokenInfo *const name = newToken ();
+ const vhdlKind kind = isKeyword (token, KEYWORD_TYPE) ?
+ VHDLTAG_TYPE : VHDLTAG_SUBTYPE;
+ Assert (isKeyword (token, KEYWORD_TYPE) ||
+ isKeyword (token, KEYWORD_SUBTYPE));
+ readToken (name);
+ readToken (token);
+ if (isKeyword (token, KEYWORD_IS))
+ {
+ readToken (token); /* type */
+ if (isKeyword (token, KEYWORD_RECORD))
+ {
+ makeVhdlTag (name, kind);
+ /*TODO: make tags of the record's names */
+ parseRecord (token);
+ }
+ else
+ {
+ makeVhdlTag (name, kind);
+ }
+ }
+ deleteToken (name);
+}
+
+static void parseConstant (boolean local)
+{
+ tokenInfo *const name = newToken ();
+ readToken (name);
+ if (local)
+ {
+ makeVhdlTag (name, VHDLTAG_LOCAL);
+ }
+ else
+ {
+ makeVhdlTag (name, VHDLTAG_CONSTANT);
+ }
+ fileSkipToCharacter (';');
+ deleteToken (name);
+}
+
+static void parseSubProgram (tokenInfo * const token)
+{
+ tokenInfo *const name = newToken ();
+ boolean endSubProgram = FALSE;
+ const vhdlKind kind = isKeyword (token, KEYWORD_FUNCTION) ?
+ VHDLTAG_FUNCTION : VHDLTAG_PROCEDURE;
+ Assert (isKeyword (token, KEYWORD_FUNCTION) ||
+ isKeyword (token, KEYWORD_PROCEDURE));
+ readToken (name); /* the name of the function or procedure */
+ readToken (token);
+ if (isType (token, TOKEN_OPEN_PAREN))
+ {
+ skipToMatched (token);
+ }
+
+ if (kind == VHDLTAG_FUNCTION)
+ {
+ if (isKeyword (token, KEYWORD_RETURN))
+ {
+ /* Read datatype */
+ readToken (token);
+ while (! isKeyword (token, KEYWORD_IS) &&
+ ! isType (token, TOKEN_SEMICOLON))
+ {
+ readToken (token);
+ }
+ }
+ }
+
+ if (isType (token, TOKEN_SEMICOLON))
+ {
+ makeVhdlTag (name, VHDLTAG_PROTOTYPE);
+ }
+ else if (isKeyword (token, KEYWORD_IS))
+ {
+ if (kind == VHDLTAG_FUNCTION)
+ {
+ makeVhdlTag (name, VHDLTAG_FUNCTION);
+ do
+ {
+ readToken (token);
+ if (isKeyword (token, KEYWORD_END))
+ {
+ readToken (token);
+ endSubProgram = isKeywordOrIdent (token,
+ KEYWORD_FUNCTION, name->string);
+ fileSkipToCharacter (';');
+ }
+ else
+ {
+ parseKeywords (token, TRUE);
+ }
+ } while (!endSubProgram);
+ }
+ else
+ {
+ makeVhdlTag (name, VHDLTAG_PROCEDURE);
+ do
+ {
+ readToken (token);
+ if (isKeyword (token, KEYWORD_END))
+ {
+ readToken (token);
+ endSubProgram = isKeywordOrIdent (token,
+ KEYWORD_PROCEDURE, name->string);
+ fileSkipToCharacter (';');
+ }
+ else
+ {
+ parseKeywords (token, TRUE);
+ }
+ } while (!endSubProgram);
+ }
+ }
+ deleteToken (name);
+}
+
+/* TODO */
+/* records */
+static void parseKeywords (tokenInfo * const token, boolean local)
+{
+ switch (token->keyword)
+ {
+ case KEYWORD_END:
+ fileSkipToCharacter (';');
+ break;
+ case KEYWORD_CONSTANT:
+ parseConstant (local);
+ break;
+ case KEYWORD_TYPE:
+ parseTypes (token);
+ break;
+ case KEYWORD_SUBTYPE:
+ parseTypes (token);
+ break;
+ case KEYWORD_ENTITY:
+ parseModule (token);
+ break;
+ case KEYWORD_COMPONENT:
+ parseModule (token);
+ break;
+ case KEYWORD_FUNCTION:
+ parseSubProgram (token);
+ break;
+ case KEYWORD_PROCEDURE:
+ parseSubProgram (token);
+ break;
+ case KEYWORD_PACKAGE:
+ parsePackage (token);
+ break;
+ default:
+ break;
+ }
+}
+
+static void parseVhdlFile (tokenInfo * const token)
+{
+ do
+ {
+ readToken (token);
+ parseKeywords (token, FALSE);
+ } while (!isKeyword (token, KEYWORD_END));
+}
+
+static void findVhdlTags (void)
+{
+ tokenInfo *const token = newToken ();
+ exception_t exception = (exception_t) (setjmp (Exception));
+
+ while (exception == ExceptionNone)
+ parseVhdlFile (token);
+
+ deleteToken (token);
+}
+
+extern parserDefinition *VhdlParser (void)
+{
+ static const char *const extensions[] = { "vhdl", "vhd", NULL };
+ parserDefinition *def = parserNew ("VHDL");
+ def->kinds = VhdlKinds;
+ def->kindCount = KIND_COUNT (VhdlKinds);
+ def->extensions = extensions;
+ def->parser = findVhdlTags;
+ def->initialize = initialize;
+ return def;
+}
+
+/* vi:set tabstop=4 shiftwidth=4 noet: */
diff --git a/vim.c b/vim.c
new file mode 100644
index 0000000..951ee5f
--- /dev/null
+++ b/vim.c
@@ -0,0 +1,636 @@
+/*
+* $Id: vim.c 485 2006-10-24 12:06:19Z dfishburn $
+*
+* Copyright (c) 2000-2003, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* Thanks are due to Jay Glanville for significant improvements.
+*
+* This module contains functions for generating tags for user-defined
+* functions for the Vim editor.
+*/
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include <string.h>
+#include <setjmp.h>
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+
+#include "parse.h"
+#include "read.h"
+#include "vstring.h"
+
+#if 0
+typedef struct sLineInfo {
+ tokenType type;
+ keywordId keyword;
+ vString * string;
+ vString * scope;
+ unsigned long lineNumber;
+ fpos_t filePosition;
+} lineInfo;
+#endif
+
+/*
+* DATA DEFINITIONS
+*/
+typedef enum {
+ K_AUGROUP,
+ K_COMMAND,
+ K_FUNCTION,
+ K_MAP,
+ K_VARIABLE
+} vimKind;
+
+static kindOption VimKinds [] = {
+ { TRUE, 'a', "augroup", "autocommand groups" },
+ { TRUE, 'c', "command", "user-defined commands" },
+ { TRUE, 'f', "function", "function definitions" },
+ { TRUE, 'm', "map", "maps" },
+ { TRUE, 'v', "variable", "variable definitions" },
+};
+
+/*
+ * DATA DECLARATIONS
+ */
+
+#if 0
+typedef enum eException {
+ ExceptionNone, ExceptionEOF
+} exception_t;
+#endif
+
+/*
+ * DATA DEFINITIONS
+ */
+
+#if 0
+static jmp_buf Exception;
+#endif
+
+/*
+ * FUNCTION DEFINITIONS
+ */
+
+/* This function takes a char pointer, tries to find a scope separator in the
+ * string, and if it does, returns a pointer to the character after the colon,
+ * and the character defining the scope.
+ * If a colon is not found, it returns the original pointer.
+ */
+static const unsigned char* skipPrefix (const unsigned char* name, int *scope)
+{
+ const unsigned char* result = name;
+ int counter;
+ size_t length;
+ length = strlen((const char*)name);
+ if (scope != NULL)
+ *scope = '\0';
+ if (length > 3 && name[1] == ':')
+ {
+ if (scope != NULL)
+ *scope = *name;
+ result = name + 2;
+ }
+ else if (length > 5 && strncasecmp ((const char*) name, "<SID>", (size_t) 5) == 0)
+ {
+ if (scope != NULL)
+ *scope = *name;
+ result = name + 5;
+ }
+ else
+ {
+ /*
+ * Vim7 check for dictionaries or autoload function names
+ */
+ counter = 0;
+ do
+ {
+ switch ( name[counter] )
+ {
+ case '.':
+ /* Set the scope to d - Dictionary */
+ *scope = 'd';
+ break;
+ case '#':
+ /* Set the scope to a - autoload */
+ *scope = 'a';
+ break;
+ }
+ ++counter;
+ } while (isalnum ((int) name[counter]) ||
+ name[counter] == '_' ||
+ name[counter] == '.' ||
+ name[counter] == '#'
+ );
+ }
+ return result;
+}
+
+static boolean isMap (const unsigned char* line)
+{
+ /*
+ * There are many different short cuts for specifying a map.
+ * This routine should capture all the permutations.
+ */
+ if (
+ strncmp ((const char*) line, "map", (size_t) 3) == 0 ||
+ strncmp ((const char*) line, "nm", (size_t) 2) == 0 ||
+ strncmp ((const char*) line, "nma", (size_t) 3) == 0 ||
+ strncmp ((const char*) line, "nmap", (size_t) 4) == 0 ||
+ strncmp ((const char*) line, "vm", (size_t) 2) == 0 ||
+ strncmp ((const char*) line, "vma", (size_t) 3) == 0 ||
+ strncmp ((const char*) line, "vmap", (size_t) 4) == 0 ||
+ strncmp ((const char*) line, "om", (size_t) 2) == 0 ||
+ strncmp ((const char*) line, "oma", (size_t) 3) == 0 ||
+ strncmp ((const char*) line, "omap", (size_t) 4) == 0 ||
+ strncmp ((const char*) line, "im", (size_t) 2) == 0 ||
+ strncmp ((const char*) line, "ima", (size_t) 3) == 0 ||
+ strncmp ((const char*) line, "imap", (size_t) 4) == 0 ||
+ strncmp ((const char*) line, "lm", (size_t) 2) == 0 ||
+ strncmp ((const char*) line, "lma", (size_t) 3) == 0 ||
+ strncmp ((const char*) line, "lmap", (size_t) 4) == 0 ||
+ strncmp ((const char*) line, "cm", (size_t) 2) == 0 ||
+ strncmp ((const char*) line, "cma", (size_t) 3) == 0 ||
+ strncmp ((const char*) line, "cmap", (size_t) 4) == 0 ||
+ strncmp ((const char*) line, "no", (size_t) 2) == 0 ||
+ strncmp ((const char*) line, "nor", (size_t) 3) == 0 ||
+ strncmp ((const char*) line, "nore", (size_t) 4) == 0 ||
+ strncmp ((const char*) line, "norem", (size_t) 5) == 0 ||
+ strncmp ((const char*) line, "norema", (size_t) 6) == 0 ||
+ strncmp ((const char*) line, "noremap", (size_t) 7) == 0 ||
+ strncmp ((const char*) line, "nno", (size_t) 3) == 0 ||
+ strncmp ((const char*) line, "nnor", (size_t) 4) == 0 ||
+ strncmp ((const char*) line, "nnore", (size_t) 5) == 0 ||
+ strncmp ((const char*) line, "nnorem", (size_t) 6) == 0 ||
+ strncmp ((const char*) line, "nnorema", (size_t) 7) == 0 ||
+ strncmp ((const char*) line, "nnoremap", (size_t) 8) == 0 ||
+ strncmp ((const char*) line, "vno", (size_t) 3) == 0 ||
+ strncmp ((const char*) line, "vnor", (size_t) 4) == 0 ||
+ strncmp ((const char*) line, "vnore", (size_t) 5) == 0 ||
+ strncmp ((const char*) line, "vnorem", (size_t) 6) == 0 ||
+ strncmp ((const char*) line, "vnorema", (size_t) 7) == 0 ||
+ strncmp ((const char*) line, "vnoremap", (size_t) 8) == 0 ||
+ strncmp ((const char*) line, "ono", (size_t) 3) == 0 ||
+ strncmp ((const char*) line, "onor", (size_t) 4) == 0 ||
+ strncmp ((const char*) line, "onore", (size_t) 5) == 0 ||
+ strncmp ((const char*) line, "onorem", (size_t) 6) == 0 ||
+ strncmp ((const char*) line, "onorema", (size_t) 7) == 0 ||
+ strncmp ((const char*) line, "onoremap", (size_t) 8) == 0 ||
+ strncmp ((const char*) line, "ino", (size_t) 3) == 0 ||
+ strncmp ((const char*) line, "inor", (size_t) 4) == 0 ||
+ strncmp ((const char*) line, "inore", (size_t) 5) == 0 ||
+ strncmp ((const char*) line, "inorem", (size_t) 6) == 0 ||
+ strncmp ((const char*) line, "inorema", (size_t) 7) == 0 ||
+ strncmp ((const char*) line, "inoremap", (size_t) 8) == 0 ||
+ strncmp ((const char*) line, "lno", (size_t) 3) == 0 ||
+ strncmp ((const char*) line, "lnor", (size_t) 4) == 0 ||
+ strncmp ((const char*) line, "lnore", (size_t) 5) == 0 ||
+ strncmp ((const char*) line, "lnorem", (size_t) 6) == 0 ||
+ strncmp ((const char*) line, "lnorema", (size_t) 7) == 0 ||
+ strncmp ((const char*) line, "lnoremap", (size_t) 8) == 0 ||
+ strncmp ((const char*) line, "cno", (size_t) 3) == 0 ||
+ strncmp ((const char*) line, "cnor", (size_t) 4) == 0 ||
+ strncmp ((const char*) line, "cnore", (size_t) 5) == 0 ||
+ strncmp ((const char*) line, "cnorem", (size_t) 6) == 0 ||
+ strncmp ((const char*) line, "cnorema", (size_t) 7) == 0 ||
+ strncmp ((const char*) line, "cnoremap", (size_t) 8) == 0
+ )
+ return TRUE;
+
+ return FALSE;
+}
+
+static const unsigned char * readVimLine (void)
+{
+ const unsigned char *line;
+
+ while ((line = fileReadLine ()) != NULL)
+ {
+ while (isspace ((int) *line))
+ ++line;
+
+ if ((int) *line == '"')
+ continue; /* skip comment */
+
+ break;
+ }
+
+ return line;
+}
+
+static void parseFunction (const unsigned char *line)
+{
+ vString *name = vStringNew ();
+ /* boolean inFunction = FALSE; */
+ int scope;
+
+ const unsigned char *cp = line + 1;
+
+ if ((int) *++cp == 'n' && (int) *++cp == 'c' &&
+ (int) *++cp == 't' && (int) *++cp == 'i' &&
+ (int) *++cp == 'o' && (int) *++cp == 'n')
+ ++cp;
+ if ((int) *cp == '!')
+ ++cp;
+ if (isspace ((int) *cp))
+ {
+ while (*cp && isspace ((int) *cp))
+ ++cp;
+
+ if (*cp)
+ {
+ cp = skipPrefix (cp, &scope);
+ if (isupper ((int) *cp) ||
+ scope == 's' || /* script scope */
+ scope == '<' || /* script scope */
+ scope == 'd' || /* dictionary */
+ scope == 'a') /* autoload */
+ {
+ do
+ {
+ vStringPut (name, (int) *cp);
+ ++cp;
+ } while (isalnum ((int) *cp) || *cp == '_' || *cp == '.' || *cp == '#');
+ vStringTerminate (name);
+ makeSimpleTag (name, VimKinds, K_FUNCTION);
+ vStringClear (name);
+ }
+ }
+ }
+
+ /* TODO - update struct to indicate inside function */
+ while ((line = readVimLine ()) != NULL)
+ {
+ /*
+ * Vim7 added the for/endfo[r] construct, so we must first
+ * check for an "endfo", before a "endf"
+ */
+ if ( (!strncmp ((const char*) line, "endfo", (size_t) 5) == 0) &&
+ (strncmp ((const char*) line, "endf", (size_t) 4) == 0) )
+ break;
+ /* TODO - call parseVimLine */
+ }
+ vStringDelete (name);
+}
+
+static void parseAutogroup (const unsigned char *line)
+{
+ vString *name = vStringNew ();
+
+ /* Found Autocommand Group (augroup) */
+ const unsigned char *cp = line + 2;
+ if ((int) *++cp == 'r' && (int) *++cp == 'o' &&
+ (int) *++cp == 'u' && (int) *++cp == 'p')
+ ++cp;
+ if (isspace ((int) *cp))
+ {
+ while (*cp && isspace ((int) *cp))
+ ++cp;
+
+ if (*cp)
+ {
+ if (strncasecmp ((const char*) cp, "end", (size_t) 3) != 0)
+ {
+ do
+ {
+ vStringPut (name, (int) *cp);
+ ++cp;
+ } while (isalnum ((int) *cp) || *cp == '_');
+ vStringTerminate (name);
+ makeSimpleTag (name, VimKinds, K_AUGROUP);
+ vStringClear (name);
+ }
+ }
+ }
+ vStringDelete (name);
+}
+
+static boolean parseCommand (const unsigned char *line)
+{
+ vString *name = vStringNew ();
+ boolean cmdProcessed = TRUE;
+
+ /*
+ * Found a user-defined command
+ *
+ * They can have many options preceeded by a dash
+ * command! -nargs=+ -complete Select :call s:DB_execSql("select " . <q-args>)
+ * The name of the command should be the first word not preceeded by a dash
+ *
+ */
+ const unsigned char *cp = line;
+
+ if ( (int) *cp == '\\' )
+ {
+ /*
+ * We are recursively calling this function is the command
+ * has been continued on to the next line
+ *
+ * Vim statements can be continued onto a newline using a \
+ * to indicate the previous line is continuing.
+ *
+ * com -nargs=1 -bang -complete=customlist,EditFileComplete
+ * \ EditFile edit<bang> <args>
+ *
+ * If the following lines do not have a line continuation
+ * the command must not be spanning multiple lines and should
+ * be synatically incorrect.
+ */
+ if ((int) *cp == '\\')
+ ++cp;
+
+ while (*cp && isspace ((int) *cp))
+ ++cp;
+ }
+ else if ( (!strncmp ((const char*) line, "comp", (size_t) 4) == 0) &&
+ (!strncmp ((const char*) line, "comc", (size_t) 4) == 0) &&
+ (strncmp ((const char*) line, "com", (size_t) 3) == 0) )
+ {
+ cp += 2;
+ if ((int) *++cp == 'm' && (int) *++cp == 'a' &&
+ (int) *++cp == 'n' && (int) *++cp == 'd')
+ ++cp;
+
+ if ((int) *cp == '!')
+ ++cp;
+
+ while (*cp && isspace ((int) *cp))
+ ++cp;
+ }
+ else
+ {
+ /*
+ * We are recursively calling this function. If it does not start
+ * with "com" or a line continuation character, we have moved off
+ * the command line and should let the other routines parse this file.
+ */
+ cmdProcessed = FALSE;
+ goto cleanUp;
+ }
+
+ /*
+ * Strip off any spaces and options which are part of the command.
+ * These should preceed the command name.
+ */
+ do
+ {
+ if (isspace ((int) *cp))
+ {
+ ++cp;
+ }
+ else if (*cp == '-')
+ {
+ /*
+ * Read until the next space which sparates options or the name
+ */
+ while (*cp && !isspace ((int) *cp))
+ ++cp;
+ }
+ } while ( *cp && !isalnum ((int) *cp) );
+
+ if ( ! *cp )
+ {
+ /*
+ * We have reached the end of the line without finding the command name.
+ * Read the next line and continue processing it as a command.
+ */
+ line = readVimLine();
+ parseCommand(line);
+ goto cleanUp;
+ }
+
+ do
+ {
+ vStringPut (name, (int) *cp);
+ ++cp;
+ } while (isalnum ((int) *cp) || *cp == '_');
+
+ vStringTerminate (name);
+ makeSimpleTag (name, VimKinds, K_COMMAND);
+ vStringClear (name);
+
+cleanUp:
+ vStringDelete (name);
+
+ return cmdProcessed;
+}
+
+static void parseLet (const unsigned char *line)
+{
+ vString *name = vStringNew ();
+
+ /* we've found a variable declared outside of a function!! */
+ const unsigned char *cp = line + 3;
+ const unsigned char *np = line;
+ /* get the name */
+ if (isspace ((int) *cp))
+ {
+ while (*cp && isspace ((int) *cp))
+ ++cp;
+
+ /*
+ * Ignore lets which set:
+ * & - local buffer vim settings
+ * @ - registers
+ * [ - Lists or Dictionaries
+ */
+ if (!*cp || *cp == '&' || *cp == '@' || *cp == '[' )
+ goto cleanUp;
+
+ /*
+ * Ignore vim variables which are read only
+ * v: - Vim variables.
+ */
+ np = cp;
+ ++np;
+ if ((int) *cp == 'v' && (int) *np == ':' )
+ goto cleanUp;
+
+ /* deal with spaces, $, @ and & */
+ while (*cp && *cp != '$' && !isalnum ((int) *cp))
+ ++cp;
+
+ if (!*cp)
+ goto cleanUp;
+
+ /* cp = skipPrefix (cp, &scope); */
+ do
+ {
+ if (!*cp)
+ break;
+
+ vStringPut (name, (int) *cp);
+ ++cp;
+ } while (isalnum ((int) *cp) || *cp == '_' || *cp == '#' || *cp == ':' || *cp == '$');
+ vStringTerminate (name);
+ makeSimpleTag (name, VimKinds, K_VARIABLE);
+ vStringClear (name);
+ }
+
+cleanUp:
+ vStringDelete (name);
+}
+
+static boolean parseMap (const unsigned char *line)
+{
+ vString *name = vStringNew ();
+
+ const unsigned char *cp = line;
+
+ /* Remove map */
+ while (*cp && isalnum ((int) *cp))
+ ++cp;
+
+ if ((int) *cp == '!')
+ ++cp;
+
+ /*
+ * Maps follow this basic format
+ * map
+ * nnoremap <silent> <F8> :Tlist<CR>
+ * map <unique> <Leader>scdt <Plug>GetColumnDataType
+ * inoremap ,,, <esc>diwi<<esc>pa><cr></<esc>pa><esc>kA
+ * inoremap <buffer> ( <C-R>=PreviewFunctionSignature()<LF>
+ *
+ * The Vim help shows the various special arguments available to a map:
+ * 1.2 SPECIAL ARGUMENTS *:map-arguments*
+ * <buffer>
+ * <silent>
+ * <script>
+ * <unique>
+ * <special>
+ * <expr>
+ *
+ * Strip the special arguments from the map command, this should leave
+ * the map name which we will use as the "name".
+ */
+
+ do
+ {
+ while (*cp && isspace ((int) *cp))
+ ++cp;
+
+ if (strncmp ((const char*) cp, "<Leader>", (size_t) 8) == 0)
+ break;
+
+ if (
+ strncmp ((const char*) cp, "<buffer>", (size_t) 8) == 0 ||
+ strncmp ((const char*) cp, "<silent>", (size_t) 8) == 0 ||
+ strncmp ((const char*) cp, "<script>", (size_t) 8) == 0 ||
+ strncmp ((const char*) cp, "<unique>", (size_t) 8) == 0
+ )
+ {
+ cp += 8;
+ continue;
+ }
+
+ if (strncmp ((const char*) cp, "<expr>", (size_t) 6) == 0)
+ {
+ cp += 6;
+ continue;
+ }
+
+ if (strncmp ((const char*) cp, "<special>", (size_t) 9) == 0)
+ {
+ cp += 9;
+ continue;
+ }
+
+ break;
+ } while (*cp);
+
+ do
+ {
+ vStringPut (name, (int) *cp);
+ ++cp;
+ } while (*cp && *cp != ' ');
+
+ vStringTerminate (name);
+ makeSimpleTag (name, VimKinds, K_MAP);
+ vStringClear (name);
+
+ vStringDelete (name);
+
+ return TRUE;
+}
+
+static boolean parseVimLine (const unsigned char *line)
+{
+ boolean readNextLine = TRUE;
+
+ if ( (!strncmp ((const char*) line, "comp", (size_t) 4) == 0) &&
+ (!strncmp ((const char*) line, "comc", (size_t) 4) == 0) &&
+ (strncmp ((const char*) line, "com", (size_t) 3) == 0) )
+ {
+ readNextLine = parseCommand(line);
+ /* TODO - Handle parseCommand returning FALSE */
+ }
+
+ if (isMap(line))
+ {
+ parseMap(line);
+ }
+
+ if (strncmp ((const char*) line, "fu", (size_t) 2) == 0)
+ {
+ parseFunction(line);
+ }
+
+ if (strncmp ((const char*) line, "aug", (size_t) 3) == 0)
+ {
+ parseAutogroup(line);
+ }
+
+ if ( strncmp ((const char*) line, "let", (size_t) 3) == 0 )
+ {
+ parseLet(line);
+ }
+
+ return readNextLine;
+}
+
+static void parseVimFile (const unsigned char *line)
+{
+ boolean readNextLine = TRUE;
+ line = readVimLine();
+
+ while (line != NULL)
+ {
+ readNextLine = parseVimLine(line);
+
+ if ( readNextLine )
+ line = readVimLine();
+
+ }
+}
+
+static void findVimTags (void)
+{
+ const unsigned char *line;
+ /* TODO - change this into a structure */
+
+ line = '\0';
+
+ parseVimFile (line);
+}
+
+extern parserDefinition* VimParser (void)
+{
+ static const char *const extensions [] = { "vim", NULL };
+ parserDefinition* def = parserNew ("Vim");
+ def->kinds = VimKinds;
+ def->kindCount = KIND_COUNT (VimKinds);
+ def->extensions = extensions;
+ def->parser = findVimTags;
+ return def;
+}
+
+/* vi:set tabstop=4 shiftwidth=4 noexpandtab: */
diff --git a/vstring.c b/vstring.c
new file mode 100644
index 0000000..7ab52cc
--- /dev/null
+++ b/vstring.c
@@ -0,0 +1,232 @@
+/*
+* $Id: vstring.c 558 2007-06-15 19:17:02Z elliotth $
+*
+* Copyright (c) 1998-2002, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* This module contains functions supporting resizeable strings.
+*/
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include <limits.h> /* to define INT_MAX */
+#include <string.h>
+#include <ctype.h>
+
+#include "debug.h"
+#include "routines.h"
+#include "vstring.h"
+
+/*
+* DATA DEFINITIONS
+*/
+static const size_t vStringInitialSize = 32;
+
+/*
+* FUNCTION DEFINITIONS
+*/
+
+static void vStringResize (vString *const string, const size_t newSize)
+{
+ char *const newBuffer = xRealloc (string->buffer, newSize, char);
+
+ string->size = newSize;
+ string->buffer = newBuffer;
+}
+
+/*
+* External interface
+*/
+
+extern boolean vStringAutoResize (vString *const string)
+{
+ boolean ok = TRUE;
+
+ if (string->size <= INT_MAX / 2)
+ {
+ const size_t newSize = string->size * 2;
+
+ vStringResize (string, newSize);
+ }
+ return ok;
+}
+
+extern void vStringClear (vString *const string)
+{
+ string->length = 0;
+ string->buffer [0] = '\0';
+ DebugStatement ( memset (string->buffer, 0, string->size); )
+}
+
+extern void vStringDelete (vString *const string)
+{
+ if (string != NULL)
+ {
+ if (string->buffer != NULL)
+ eFree (string->buffer);
+ eFree (string);
+ }
+}
+
+extern vString *vStringNew (void)
+{
+ vString *const string = xMalloc (1, vString);
+
+ string->length = 0;
+ string->size = vStringInitialSize;
+ string->buffer = xMalloc (string->size, char);
+
+ vStringClear (string);
+
+ return string;
+}
+
+#ifndef VSTRING_PUTC_MACRO
+extern void vStringPut (vString *const string, const int c)
+{
+ if (string->length + 1 == string->size) /* check for buffer overflow */
+ vStringAutoResize (string);
+
+ string->buffer [string->length] = c;
+ if (c != '\0')
+ string->buffer [++string->length] = '\0';
+}
+#endif
+
+extern void vStringCatS (vString *const string, const char *const s)
+{
+#if 1
+ const size_t len = strlen (s);
+ while (string->length + len + 1 >= string->size)/* check for buffer overflow */
+ vStringAutoResize (string);
+ strcpy (string->buffer + string->length, s);
+ string->length += len;
+#else
+ const char *p = s;
+ do
+ vStringPut (string, *p);
+ while (*p++ != '\0');
+#endif
+}
+
+extern vString *vStringNewCopy (const vString *const string)
+{
+ vString *vs = vStringNew ();
+ vStringCatS (vs, string->buffer);
+ return vs;
+}
+
+extern vString *vStringNewInit (const char *const s)
+{
+ vString *vs = vStringNew ();
+ vStringCatS (vs, s);
+ return vs;
+}
+
+extern void vStringNCatS (
+ vString *const string, const char *const s, const size_t length)
+{
+ const char *p = s;
+ size_t remain = length;
+
+ while (*p != '\0' && remain > 0)
+ {
+ vStringPut (string, *p);
+ --remain;
+ ++p;
+ }
+ vStringTerminate (string);
+}
+
+/* Strip trailing newline from string.
+ */
+extern void vStringStripNewline (vString *const string)
+{
+ const size_t final = string->length - 1;
+ if (string->buffer [final] == '\n')
+ {
+ string->buffer [final] = '\0';
+ string->length--;
+ }
+}
+
+/* Strip leading white space from string.
+ */
+extern void vStringStripLeading (vString *const string)
+{
+ while (isspace ((int) string->buffer [0]) && string->length > 0)
+ {
+ size_t i;
+ for (i = 1 ; i < string->length ; ++i)
+ string->buffer [i - 1] = string->buffer [i];
+ --string->length;
+ string->buffer [string->length] = '\0';
+ }
+}
+
+/* Strip trailing white space from string.
+ */
+extern void vStringStripTrailing (vString *const string)
+{
+ while (isspace ((int) string->buffer [string->length - 1]) &&
+ string->length > 0)
+ {
+ string->length--;
+ string->buffer [string->length] = '\0';
+ }
+}
+
+/* Chop last character from string.
+ */
+extern void vStringChop (vString *const string)
+{
+ if (string->length > 0)
+ {
+ --string->length;
+ string->buffer [string->length] = '\0';
+ }
+}
+
+extern void vStringCopyS (vString *const string, const char *const s)
+{
+ vStringClear (string);
+ vStringCatS (string, s);
+}
+
+extern void vStringNCopyS (
+ vString *const string, const char *const s, const size_t length)
+{
+ vStringClear (string);
+ vStringNCatS (string, s, length);
+}
+
+extern void vStringCopyToLower (vString *const dest, const vString *const src)
+{
+ const size_t length = src->length;
+ const char *s = src->buffer;
+ char *d;
+ size_t i;
+
+ if (dest->size < src->size)
+ vStringResize (dest, src->size);
+ d = dest->buffer;
+ for (i = 0 ; i < length ; ++i)
+ {
+ int c = s [i];
+
+ d [i] = tolower (c);
+ }
+ d [i] = '\0';
+}
+
+extern void vStringSetLength (vString *const string)
+{
+ string->length = strlen (string->buffer);
+}
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/vstring.h b/vstring.h
new file mode 100644
index 0000000..611d3a9
--- /dev/null
+++ b/vstring.h
@@ -0,0 +1,85 @@
+/*
+* $Id: vstring.h 719 2009-07-07 03:46:59Z dhiebert $
+*
+* Copyright (c) 1998-2002, Darren Hiebert
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* Provides the external interface for resizeable strings.
+*/
+#ifndef _VSTRING_H
+#define _VSTRING_H
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#if defined(HAVE_STDLIB_H)
+# include <stdlib.h> /* to define size_t */
+#endif
+
+/*
+* MACROS
+*/
+#ifndef DEBUG
+# define VSTRING_PUTC_MACRO 1
+#endif
+#ifdef VSTRING_PUTC_MACRO
+#define vStringPut(s,c) \
+ (void)(((s)->length + 1 == (s)->size ? vStringAutoResize (s) : 0), \
+ ((s)->buffer [(s)->length] = (c)), \
+ ((c) == '\0' ? 0 : ((s)->buffer [++(s)->length] = '\0')))
+#endif
+
+#define vStringValue(vs) ((vs)->buffer)
+#define vStringItem(vs,i) ((vs)->buffer[i])
+#define vStringLast(vs) ((vs)->buffer[(vs)->length - 1])
+#define vStringLength(vs) ((vs)->length)
+#define vStringSize(vs) ((vs)->size)
+#define vStringCat(vs,s) vStringCatS((vs), vStringValue((s)))
+#define vStringNCat(vs,s,l) vStringNCatS((vs), vStringValue((s)), (l))
+#define vStringCopy(vs,s) vStringCopyS((vs), vStringValue((s)))
+#define vStringNCopy(vs,s,l) vStringNCopyS((vs), vStringValue((s)), (l))
+#define vStringChar(vs,i) ((vs)->buffer[i])
+#define vStringTerminate(vs) vStringPut(vs, '\0')
+#define vStringLower(vs) toLowerString((vs)->buffer)
+#define vStringUpper(vs) toUpperString((vs)->buffer)
+
+/*
+* DATA DECLARATIONS
+*/
+
+typedef struct sVString {
+ size_t length; /* size of buffer used */
+ size_t size; /* allocated size of buffer */
+ char *buffer; /* location of buffer */
+} vString;
+
+/*
+* FUNCTION PROTOTYPES
+*/
+extern boolean vStringAutoResize (vString *const string);
+extern void vStringClear (vString *const string);
+extern vString *vStringNew (void);
+extern void vStringDelete (vString *const string);
+#ifndef VSTRING_PUTC_MACRO
+extern void vStringPut (vString *const string, const int c);
+#endif
+extern void vStringStripNewline (vString *const string);
+extern void vStringStripLeading (vString *const string);
+extern void vStringChop (vString *const string);
+extern void vStringStripTrailing (vString *const string);
+extern void vStringCatS (vString *const string, const char *const s);
+extern void vStringNCatS (vString *const string, const char *const s, const size_t length);
+extern vString *vStringNewCopy (const vString *const string);
+extern vString *vStringNewInit (const char *const s);
+extern void vStringCopyS (vString *const string, const char *const s);
+extern void vStringNCopyS (vString *const string, const char *const s, const size_t length);
+extern void vStringCopyToLower (vString *const dest, const vString *const src);
+extern void vStringSetLength (vString *const string);
+
+#endif /* _VSTRING_H */
+
+/* vi:set tabstop=4 shiftwidth=4: */
diff --git a/yacc.c b/yacc.c
new file mode 100644
index 0000000..01a6414
--- /dev/null
+++ b/yacc.c
@@ -0,0 +1,40 @@
+/*
+* $Id: yacc.c 443 2006-05-30 04:37:13Z darren $
+*
+* Copyright (c) 2001-2002, Nick Hibma <n_hibma@van-laarhoven.org>
+*
+* This source code is released for free distribution under the terms of the
+* GNU General Public License.
+*
+* This module contains functions for generating tags for YACC language files.
+*/
+
+/*
+* INCLUDE FILES
+*/
+#include "general.h" /* must always come first */
+
+#include <string.h>
+#include "parse.h"
+
+/*
+* FUNCTION DEFINITIONS
+*/
+
+static void installYaccRegex (const langType language)
+{
+ addTagRegex (language,
+ "^([A-Za-z][A-Za-z_0-9]+)[ \t]*:", "\\1", "l,label,labels", NULL);
+}
+
+extern parserDefinition* YaccParser ()
+{
+ static const char *const extensions [] = { "y", NULL };
+ parserDefinition* const def = parserNew ("YACC");
+ def->extensions = extensions;
+ def->initialize = installYaccRegex;
+ def->regex = TRUE;
+ return def;
+}
+
+/* vi:set tabstop=4 shiftwidth=4: */