summaryrefslogtreecommitdiff
path: root/boost/spirit/home/lex/lexer/lexertl/functor.hpp
blob: d7b22413a81586f1e33c4e8825d00541806c0584 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
//  Copyright (c) 2001-2011 Hartmut Kaiser
// 
//  Distributed under the Boost Software License, Version 1.0. (See accompanying 
//  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

#if !defined(BOOST_SPIRIT_LEX_LEXER_FUNCTOR_NOV_18_2007_1112PM)
#define BOOST_SPIRIT_LEX_LEXER_FUNCTOR_NOV_18_2007_1112PM

#if defined(_MSC_VER)
#pragma once
#endif

#include <boost/mpl/bool.hpp>
#include <boost/detail/iterator.hpp>
#include <boost/detail/workaround.hpp>
#include <boost/spirit/home/lex/lexer/pass_flags.hpp>
#include <boost/assert.hpp>

#if 0 != __COMO_VERSION__ || !BOOST_WORKAROUND(BOOST_MSVC, <= 1310)
#define BOOST_SPIRIT_STATIC_EOF 1
#define BOOST_SPIRIT_EOF_PREFIX static
#else
#define BOOST_SPIRIT_EOF_PREFIX 
#endif

namespace boost { namespace spirit { namespace lex { namespace lexertl
{ 
    ///////////////////////////////////////////////////////////////////////////
    //
    //  functor is a template usable as the functor object for the 
    //  multi_pass iterator allowing to wrap a lexertl based dfa into a 
    //  iterator based interface.
    //  
    //    Token:      the type of the tokens produced by this functor
    //                this needs to expose a constructor with the following
    //                prototype:
    //
    //                Token(std::size_t id, std::size_t state, 
    //                      Iterator start, Iterator end)
    //
    //                where 'id' is the token id, state is the lexer state,
    //                this token has been matched in, and 'first' and 'end'  
    //                mark the start and the end of the token with respect 
    //                to the underlying character stream.
    //    FunctorData:
    //                this is expected to encapsulate the shared part of the 
    //                functor (see lex/lexer/lexertl/functor_data.hpp for an
    //                example and documentation).
    //    Iterator:   the type of the underlying iterator
    //    SupportsActors:
    //                this is expected to be a mpl::bool_, if mpl::true_ the
    //                functor invokes functors which (optionally) have 
    //                been attached to the token definitions.
    //    SupportState:
    //                this is expected to be a mpl::bool_, if mpl::true_ the
    //                functor supports different lexer states, 
    //                otherwise no lexer state is supported.
    //
    ///////////////////////////////////////////////////////////////////////////
    template <typename Token
      , template <typename, typename, typename, typename> class FunctorData
      , typename Iterator = typename Token::iterator_type
      , typename SupportsActors = mpl::false_
      , typename SupportsState = typename Token::has_state>
    class functor
    {
    public:
        typedef typename 
            boost::detail::iterator_traits<Iterator>::value_type 
        char_type;

    private:
        // Needed by compilers not implementing the resolution to DR45. For
        // reference, see
        // http://www.open-std.org/JTC1/SC22/WG21/docs/cwg_defects.html#45.
        typedef typename Token::token_value_type token_value_type;
        friend class FunctorData<Iterator, SupportsActors, SupportsState
          , token_value_type>;

        // Helper template allowing to assign a value on exit
        template <typename T>
        struct assign_on_exit
        {
            assign_on_exit(T& dst, T const& src)
              : dst_(dst), src_(src) {}

            ~assign_on_exit()
            {
                dst_ = src_;
            }

            T& dst_;
            T const& src_;

        private:
            // silence MSVC warning C4512: assignment operator could not be generated
            assign_on_exit& operator= (assign_on_exit const&);
        };

    public:
        functor() {}

#if BOOST_WORKAROUND(BOOST_MSVC, <= 1310)
        // somehow VC7.1 needs this (meaningless) assignment operator
        functor& operator=(functor const& rhs)
        {
            return *this;
        }
#endif

        ///////////////////////////////////////////////////////////////////////
        // interface to the iterator_policies::split_functor_input policy
        typedef Token result_type;
        typedef functor unique;
        typedef FunctorData<Iterator, SupportsActors, SupportsState
          , token_value_type> shared;

        BOOST_SPIRIT_EOF_PREFIX result_type const eof;

        ///////////////////////////////////////////////////////////////////////
        typedef Iterator iterator_type;
        typedef typename shared::semantic_actions_type semantic_actions_type;
        typedef typename shared::next_token_functor next_token_functor;
        typedef typename shared::get_state_name_type get_state_name_type;

        // this is needed to wrap the semantic actions in a proper way
        typedef typename shared::wrap_action_type wrap_action_type;

        ///////////////////////////////////////////////////////////////////////
        template <typename MultiPass>
        static result_type& get_next(MultiPass& mp, result_type& result)
        {
            typedef typename result_type::id_type id_type;

            shared& data = mp.shared()->ftor;
            for(;;) 
            {
                if (data.get_first() == data.get_last()) 
#if defined(BOOST_SPIRIT_STATIC_EOF)
                    return result = eof;
#else
                    return result = mp.ftor.eof;
#endif

                data.reset_value();
                Iterator end = data.get_first();
                std::size_t unique_id = boost::lexer::npos;
                bool prev_bol = false;

                // lexer matching might change state
                std::size_t state = data.get_state();
                std::size_t id = data.next(end, unique_id, prev_bol);

                if (boost::lexer::npos == id) {   // no match
#if defined(BOOST_SPIRIT_LEXERTL_DEBUG)
                    std::string next;
                    Iterator it = data.get_first();
                    for (std::size_t i = 0; i < 10 && it != data.get_last(); ++it, ++i)
                        next += *it;

                    std::cerr << "Not matched, in state: " << state 
                              << ", lookahead: >" << next << "<" << std::endl;
#endif
                    return result = result_type(0);
                }
                else if (0 == id) {         // EOF reached
#if defined(BOOST_SPIRIT_STATIC_EOF)
                    return result = eof;
#else
                    return result = mp.ftor.eof;
#endif
                }

#if defined(BOOST_SPIRIT_LEXERTL_DEBUG)
                {
                    std::string next;
                    Iterator it = end;
                    for (std::size_t i = 0; i < 10 && it != data.get_last(); ++it, ++i)
                        next += *it;

                    std::cerr << "Matched: " << id << ", in state: " 
                              << state << ", string: >" 
                              << std::basic_string<char_type>(data.get_first(), end) << "<"
                              << ", lookahead: >" << next << "<" << std::endl;
                    if (data.get_state() != state) {
                        std::cerr << "Switched to state: " 
                                  << data.get_state() << std::endl;
                    }
                }
#endif
                // account for a possibly pending lex::more(), i.e. moving 
                // data.first_ back to the start of the previously matched token.
                bool adjusted = data.adjust_start();

                // set the end of the matched input sequence in the token data
                data.set_end(end);

                // invoke attached semantic actions, if defined, might change
                // state, id, data.first_, and/or end
                BOOST_SCOPED_ENUM(pass_flags) pass = 
                    data.invoke_actions(state, id, unique_id, end);

                if (data.has_value()) {
                    // return matched token using the token value as set before
                    // using data.set_value(), advancing 'data.first_' past the 
                    // matched sequence
                    assign_on_exit<Iterator> on_exit(data.get_first(), end);
                    return result = result_type(id_type(id), state, data.get_value());
                }
                else if (pass_flags::pass_normal == pass) {
                    // return matched token, advancing 'data.first_' past the 
                    // matched sequence
                    assign_on_exit<Iterator> on_exit(data.get_first(), end);
                    return result = result_type(id_type(id), state, data.get_first(), end);
                }
                else if (pass_flags::pass_fail == pass) {
#if defined(BOOST_SPIRIT_LEXERTL_DEBUG)
                    std::cerr << "Matching forced to fail" << std::endl; 
#endif
                    // if the data.first_ got adjusted above, revert this adjustment
                    if (adjusted)
                        data.revert_adjust_start();

                    // one of the semantic actions signaled no-match
                    data.reset_bol(prev_bol);
                    if (state != data.get_state())
                        continue;       // retry matching if state has changed

                    // if the state is unchanged repeating the match wouldn't
                    // move the input forward, causing an infinite loop
                    return result = result_type(0);
                }

#if defined(BOOST_SPIRIT_LEXERTL_DEBUG)
                std::cerr << "Token ignored, continuing matching" << std::endl; 
#endif
            // if this token needs to be ignored, just repeat the matching,
            // while starting right after the current match
                data.get_first() = end;
            }
        }

        // set_state are propagated up to the iterator interface, allowing to 
        // manipulate the current lexer state through any of the exposed 
        // iterators.
        template <typename MultiPass>
        static std::size_t set_state(MultiPass& mp, std::size_t state) 
        { 
            std::size_t oldstate = mp.shared()->ftor.get_state();
            mp.shared()->ftor.set_state(state);

#if defined(BOOST_SPIRIT_LEXERTL_DEBUG)
            std::cerr << "Switching state from: " << oldstate 
                      << " to: " << state
                      << std::endl;
#endif
            return oldstate; 
        }

        template <typename MultiPass>
        static std::size_t get_state(MultiPass& mp) 
        { 
            return mp.shared()->ftor.get_state();
        }

        template <typename MultiPass>
        static std::size_t 
        map_state(MultiPass const& mp, char_type const* statename)  
        { 
            return mp.shared()->ftor.get_state_id(statename);
        }

        // we don't need this, but it must be there
        template <typename MultiPass>
        static void destroy(MultiPass const&) {}
    };

#if defined(BOOST_SPIRIT_STATIC_EOF)
    ///////////////////////////////////////////////////////////////////////////
    //  eof token
    ///////////////////////////////////////////////////////////////////////////
    template <typename Token
      , template <typename, typename, typename, typename> class FunctorData
      , typename Iterator, typename SupportsActors, typename SupportsState>
    typename functor<Token, FunctorData, Iterator, SupportsActors, SupportsState>::result_type const
        functor<Token, FunctorData, Iterator, SupportsActors, SupportsState>::eof = 
            typename functor<Token, FunctorData, Iterator, SupportsActors
              , SupportsState>::result_type();
#endif

}}}}

#undef BOOST_SPIRIT_EOF_PREFIX
#undef BOOST_SPIRIT_STATIC_EOF

#endif