1 files changed, 429 insertions, 0 deletions
diff --git a/boost/wave/cpplexer/re2clex/cpp_re2c_lexer.hpp b/boost/wave/cpplexer/re2clex/cpp_re2c_lexer.hpp
new file mode 100644
index 0000000000..748738544b
--- /dev/null
+++ b/boost/wave/cpplexer/re2clex/cpp_re2c_lexer.hpp
@@ -0,0 +1,429 @@
+/*=============================================================================
+    Boost.Wave: A Standard compliant C++ preprocessor library
+
+    Re2C based C++ lexer
+
+    http://www.boost.org/
+
+    Copyright (c) 2001-2011 Hartmut Kaiser. Distributed under the Boost
+    Software License, Version 1.0. (See accompanying file
+    LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+=============================================================================*/
+
+#if !defined(CPP_RE2C_LEXER_HPP_B81A2629_D5B1_4944_A97D_60254182B9A8_INCLUDED)
+#define CPP_RE2C_LEXER_HPP_B81A2629_D5B1_4944_A97D_60254182B9A8_INCLUDED
+
+#include <string>
+#include <cstdio>
+#include <cstdarg>
+#if defined(BOOST_SPIRIT_DEBUG)
+#include <iostream>
+#endif // defined(BOOST_SPIRIT_DEBUG)
+
+#include <boost/concept_check.hpp>
+#include <boost/assert.hpp>
+#include <boost/spirit/include/classic_core.hpp>
+
+#include <boost/wave/wave_config.hpp>
+#include <boost/wave/language_support.hpp>
+#include <boost/wave/token_ids.hpp>
+#include <boost/wave/util/file_position.hpp>
+#include <boost/wave/cpplexer/validate_universal_char.hpp>
+#include <boost/wave/cpplexer/cpplexer_exceptions.hpp>
+#include <boost/wave/cpplexer/token_cache.hpp>
+#include <boost/wave/cpplexer/convert_trigraphs.hpp>
+
+#include <boost/wave/cpplexer/cpp_lex_interface.hpp>
+#include <boost/wave/cpplexer/re2clex/scanner.hpp>
+#include <boost/wave/cpplexer/re2clex/cpp_re.hpp>
+#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
+#include <boost/wave/cpplexer/detect_include_guards.hpp>
+#endif
+
+#include <boost/wave/cpplexer/cpp_lex_interface_generator.hpp>
+
+// this must occur after all of the includes and before any code appears
+#ifdef BOOST_HAS_ABI_HEADERS
+#include BOOST_ABI_PREFIX
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+namespace boost {
+namespace wave {
+namespace cpplexer {
+namespace re2clex {
+
+///////////////////////////////////////////////////////////////////////////////
+// 
+//  encapsulation of the re2c based cpp lexer
+//
+///////////////////////////////////////////////////////////////////////////////
+
+template <typename IteratorT, 
+    typename PositionT = boost::wave::util::file_position_type,
+    typename TokenT = lex_token<PositionT> >
+class lexer 
+{
+public:
+    typedef TokenT token_type;
+    typedef typename token_type::string_type  string_type;
+
+    lexer(IteratorT const &first, IteratorT const &last, 
+        PositionT const &pos, boost::wave::language_support language_);
+    ~lexer();
+
+    token_type& get(token_type&);
+    void set_position(PositionT const &pos)
+    {
+        // set position has to change the file name and line number only
+        filename = pos.get_file();
+        scanner.line = pos.get_line();
+//        scanner.column = scanner.curr_column = pos.get_column();
+        scanner.file_name = filename.c_str();
+    }
+#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
+    bool has_include_guards(std::string& guard_name) const 
+    { 
+        return guards.detected(guard_name); 
+    }
+#endif
+
+// error reporting from the re2c generated lexer
+    static int report_error(Scanner const* s, int code, char const *, ...);
+
+private:
+    static char const *tok_names[];
+
+    Scanner scanner;
+    string_type filename;
+    string_type value;
+    bool at_eof;
+    boost::wave::language_support language;
+#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
+    include_guards<token_type> guards;
+#endif
+
+#if BOOST_WAVE_SUPPORT_THREADING == 0
+    static token_cache<string_type> const cache;
+#else
+    token_cache<string_type> const cache;
+#endif
+};
+
+///////////////////////////////////////////////////////////////////////////////
+// initialize cpp lexer 
+template <typename IteratorT, typename PositionT, typename TokenT>
+inline
+lexer<IteratorT, PositionT, TokenT>::lexer(IteratorT const &first, 
+        IteratorT const &last, PositionT const &pos, 
+        boost::wave::language_support language_) 
+  : filename(pos.get_file()), at_eof(false), language(language_)
+#if BOOST_WAVE_SUPPORT_THREADING != 0
+  , cache()
+#endif
+{
+    using namespace std;        // some systems have memset in std
+    memset(&scanner, '\0', sizeof(Scanner));
+    scanner.eol_offsets = aq_create();
+    if (first != last) {
+        scanner.first = scanner.act = (uchar *)&(*first);
+        scanner.last = scanner.first + std::distance(first, last);  
+    }
+    scanner.line = pos.get_line();
+    scanner.column = scanner.curr_column = pos.get_column();
+    scanner.error_proc = report_error;
+    scanner.file_name = filename.c_str();
+
+#if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
+    scanner.enable_ms_extensions = true;
+#else
+    scanner.enable_ms_extensions = false;
+#endif
+
+#if BOOST_WAVE_SUPPORT_VARIADICS_PLACEMARKERS != 0
+    scanner.act_in_c99_mode = boost::wave::need_c99(language_);
+#endif
+
+#if BOOST_WAVE_SUPPORT_IMPORT_KEYWORD != 0
+    scanner.enable_import_keyword = !boost::wave::need_c99(language_);
+#else
+    scanner.enable_import_keyword = false;
+#endif
+
+    scanner.detect_pp_numbers = boost::wave::need_prefer_pp_numbers(language_);
+    scanner.single_line_only = boost::wave::need_single_line(language_);
+
+#if BOOST_WAVE_SUPPORT_CPP0X != 0
+    scanner.act_in_cpp0x_mode = boost::wave::need_cpp0x(language_);
+#else
+    scanner.act_in_cpp0x_mode = false;
+#endif
+}
+
+template <typename IteratorT, typename PositionT, typename TokenT>
+inline
+lexer<IteratorT, PositionT, TokenT>::~lexer() 
+{
+    using namespace std;        // some systems have free in std
+    aq_terminate(scanner.eol_offsets);
+    free(scanner.bot);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+//  get the next token from the input stream
+template <typename IteratorT, typename PositionT, typename TokenT>
+inline TokenT&
+lexer<IteratorT, PositionT, TokenT>::get(TokenT& result)
+{
+    if (at_eof) 
+        return result = token_type();  // return T_EOI
+
+    unsigned int actline = scanner.line;
+    token_id id = token_id(scan(&scanner));
+
+    switch (static_cast<unsigned int>(id)) {
+    case T_IDENTIFIER:
+    // test identifier characters for validity (throws if invalid chars found)
+        value = string_type((char const *)scanner.tok, 
+            scanner.cur-scanner.tok);
+        if (!boost::wave::need_no_character_validation(language))
+            impl::validate_identifier_name(value, actline, scanner.column, filename); 
+        break;
+
+    case T_STRINGLIT:
+    case T_CHARLIT:
+    case T_RAWSTRINGLIT:
+    // test literal characters for validity (throws if invalid chars found)
+        value = string_type((char const *)scanner.tok, 
+            scanner.cur-scanner.tok);
+        if (boost::wave::need_convert_trigraphs(language))
+            value = impl::convert_trigraphs(value); 
+        if (!boost::wave::need_no_character_validation(language))
+            impl::validate_literal(value, actline, scanner.column, filename); 
+        break;
+
+#if BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0
+    case T_PP_HHEADER:
+    case T_PP_QHEADER:
+    case T_PP_INCLUDE:
+    // convert to the corresponding ..._next token, if appropriate
+      {
+          value = string_type((char const *)scanner.tok, 
+              scanner.cur-scanner.tok);
+
+      // Skip '#' and whitespace and see whether we find an 'include_next' here.
+          typename string_type::size_type start = value.find("include");
+          if (value.compare(start, 12, "include_next", 12) == 0)
+              id = token_id(id | AltTokenType);
+          break;
+      }
+#endif
+
+    case T_LONGINTLIT:  // supported in C++0x, C99 and long_long mode
+        value = string_type((char const *)scanner.tok, 
+            scanner.cur-scanner.tok);
+        if (!boost::wave::need_long_long(language)) {
+        // syntax error: not allowed in C++ mode
+            BOOST_WAVE_LEXER_THROW(lexing_exception, invalid_long_long_literal, 
+                value.c_str(), actline, scanner.column, filename.c_str());
+        }
+        break;
+
+    case T_OCTALINT:
+    case T_DECIMALINT:
+    case T_HEXAINT:
+    case T_INTLIT:
+    case T_FLOATLIT:
+    case T_FIXEDPOINTLIT:
+    case T_CCOMMENT:
+    case T_CPPCOMMENT:
+    case T_SPACE:
+    case T_SPACE2:
+    case T_ANY:
+    case T_PP_NUMBER:
+        value = string_type((char const *)scanner.tok, 
+            scanner.cur-scanner.tok);
+        break;
+
+    case T_EOF:
+    // T_EOF is returned as a valid token, the next call will return T_EOI,
+    // i.e. the actual end of input
+        at_eof = true;
+        value.clear();
+        break;
+
+    case T_OR_TRIGRAPH:
+    case T_XOR_TRIGRAPH:
+    case T_LEFTBRACE_TRIGRAPH:
+    case T_RIGHTBRACE_TRIGRAPH:
+    case T_LEFTBRACKET_TRIGRAPH:
+    case T_RIGHTBRACKET_TRIGRAPH:
+    case T_COMPL_TRIGRAPH:
+    case T_POUND_TRIGRAPH:
+        if (boost::wave::need_convert_trigraphs(language)) {
+            value = cache.get_token_value(BASEID_FROM_TOKEN(id));
+        }
+        else {
+            value = string_type((char const *)scanner.tok, 
+                scanner.cur-scanner.tok);
+        }
+        break;
+
+    case T_ANY_TRIGRAPH:
+        if (boost::wave::need_convert_trigraphs(language)) {
+            value = impl::convert_trigraph(
+                string_type((char const *)scanner.tok)); 
+        }
+        else {
+            value = string_type((char const *)scanner.tok, 
+                scanner.cur-scanner.tok);
+        }
+        break;
+
+    default:
+        if (CATEGORY_FROM_TOKEN(id) != EXTCATEGORY_FROM_TOKEN(id) ||
+            IS_CATEGORY(id, UnknownTokenType))
+        {
+            value = string_type((char const *)scanner.tok, 
+                scanner.cur-scanner.tok);
+        }
+        else {
+            value = cache.get_token_value(id);
+        }
+        break;
+    }
+
+//     std::cerr << boost::wave::get_token_name(id) << ": " << value << std::endl;
+
+    // the re2c lexer reports the new line number for newline tokens
+    result = token_type(id, value, PositionT(filename, actline, scanner.column));
+
+#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
+    return guards.detect_guard(result);
+#else
+    return result;
+#endif
+}
+
+template <typename IteratorT, typename PositionT, typename TokenT>
+inline int 
+lexer<IteratorT, PositionT, TokenT>::report_error(Scanner const *s, int errcode, 
+    char const *msg, ...)
+{
+    BOOST_ASSERT(0 != s);
+    BOOST_ASSERT(0 != msg);
+
+    using namespace std;    // some system have vsprintf in namespace std
+
+    char buffer[200];           // should be large enough
+    va_list params;
+    va_start(params, msg);
+    vsprintf(buffer, msg, params);
+    va_end(params);
+
+    BOOST_WAVE_LEXER_THROW_VAR(lexing_exception, errcode, buffer, s->line, 
+        s->column, s->file_name);
+//    BOOST_UNREACHABLE_RETURN(0);
+    return 0;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+//   
+//  lex_functor
+//   
+///////////////////////////////////////////////////////////////////////////////
+
+template <typename IteratorT, 
+    typename PositionT = boost::wave::util::file_position_type,
+    typename TokenT = typename lexer<IteratorT, PositionT>::token_type>
+class lex_functor 
+:   public lex_input_interface_generator<TokenT>
+{
+public:
+    typedef TokenT token_type;
+
+    lex_functor(IteratorT const &first, IteratorT const &last, 
+            PositionT const &pos, boost::wave::language_support language)
+    :   re2c_lexer(first, last, pos, language)
+    {}
+    virtual ~lex_functor() {}
+
+// get the next token from the input stream
+    token_type& get(token_type& result) { return re2c_lexer.get(result); }
+    void set_position(PositionT const &pos) { re2c_lexer.set_position(pos); }
+#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
+    bool has_include_guards(std::string& guard_name) const 
+        { return re2c_lexer.has_include_guards(guard_name); }
+#endif
+
+private:
+    lexer<IteratorT, PositionT, TokenT> re2c_lexer;
+};
+
+#if BOOST_WAVE_SUPPORT_THREADING == 0
+///////////////////////////////////////////////////////////////////////////////
+template <typename IteratorT, typename PositionT, typename TokenT>
+token_cache<typename lexer<IteratorT, PositionT, TokenT>::string_type> const
+    lexer<IteratorT, PositionT, TokenT>::cache = 
+        token_cache<typename lexer<IteratorT, PositionT, TokenT>::string_type>();
+#endif
+
+}   // namespace re2clex
+
+///////////////////////////////////////////////////////////////////////////////
+//  
+//  The new_lexer_gen<>::new_lexer function (declared in cpp_lex_interface.hpp)
+//  should be defined inline, if the lex_functor shouldn't be instantiated 
+//  separately from the lex_iterator.
+//
+//  Separate (explicit) instantiation helps to reduce compilation time.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#if BOOST_WAVE_SEPARATE_LEXER_INSTANTIATION != 0
+#define BOOST_WAVE_RE2C_NEW_LEXER_INLINE
+#else
+#define BOOST_WAVE_RE2C_NEW_LEXER_INLINE inline
+#endif 
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//  The 'new_lexer' function allows the opaque generation of a new lexer object.
+//  It is coupled to the iterator type to allow to decouple the lexer/iterator 
+//  configurations at compile time.
+//
+//  This function is declared inside the cpp_lex_token.hpp file, which is 
+//  referenced by the source file calling the lexer and the source file, which
+//  instantiates the lex_functor. But is is defined here, so it will be 
+//  instantiated only while compiling the source file, which instantiates the 
+//  lex_functor. While the cpp_re2c_token.hpp file may be included everywhere,
+//  this file (cpp_re2c_lexer.hpp) should be included only once. This allows
+//  to decouple the lexer interface from the lexer implementation and reduces 
+//  compilation time.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+template <typename IteratorT, typename PositionT, typename TokenT>
+BOOST_WAVE_RE2C_NEW_LEXER_INLINE
+lex_input_interface<TokenT> *
+new_lexer_gen<IteratorT, PositionT, TokenT>::new_lexer(IteratorT const &first,
+    IteratorT const &last, PositionT const &pos, 
+    boost::wave::language_support language)
+{
+    using re2clex::lex_functor;
+    return new lex_functor<IteratorT, PositionT, TokenT>(first, last, pos, language);
+}
+
+#undef BOOST_WAVE_RE2C_NEW_LEXER_INLINE
+
+///////////////////////////////////////////////////////////////////////////////
+}   // namespace cpplexer
+}   // namespace wave
+}   // namespace boost 
+
+// the suffix header occurs after all of the code
+#ifdef BOOST_HAS_ABI_HEADERS
+#include BOOST_ABI_SUFFIX
+#endif
+
+#endif // !defined(CPP_RE2C_LEXER_HPP_B81A2629_D5B1_4944_A97D_60254182B9A8_INCLUDED)