summaryrefslogtreecommitdiff
path: root/boost/xpressive/detail/utility/boyer_moore.hpp
blob: d05e38752c20f523a2cf4f02a231901ba994b2b6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
///////////////////////////////////////////////////////////////////////////////
/// \file boyer_moore.hpp
///   Contains the boyer-moore implementation. Note: this is *not* a general-
///   purpose boyer-moore implementation. It truncates the search string at
///   256 characters, but it is sufficient for the needs of xpressive.
//
//  Copyright 2008 Eric Niebler. Distributed under the Boost
//  Software License, Version 1.0. (See accompanying file
//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

#ifndef BOOST_XPRESSIVE_DETAIL_BOYER_MOORE_HPP_EAN_10_04_2005
#define BOOST_XPRESSIVE_DETAIL_BOYER_MOORE_HPP_EAN_10_04_2005

// MS compatible compilers support #pragma once
#if defined(_MSC_VER)
# pragma once
# pragma warning(push)
# pragma warning(disable : 4100) // unreferenced formal parameter
#endif

#include <climits>  // for UCHAR_MAX
#include <cstddef>  // for std::ptrdiff_t
#include <utility>  // for std::max
#include <vector>
#include <boost/mpl/bool.hpp>
#include <boost/noncopyable.hpp>
#include <boost/iterator/iterator_traits.hpp>
#include <boost/type_traits/is_convertible.hpp>
#include <boost/xpressive/detail/detail_fwd.hpp>

namespace boost { namespace xpressive { namespace detail
{

///////////////////////////////////////////////////////////////////////////////
// boyer_moore
//
template<typename BidiIter, typename Traits>
struct boyer_moore
  : noncopyable
{
    typedef typename iterator_value<BidiIter>::type char_type;
    typedef Traits traits_type;
    typedef has_fold_case<Traits> case_fold;
    typedef typename Traits::string_type string_type;

    // initialize the Boyer-Moore search data structure, using the
    // search sub-sequence to prime the pump.
    boyer_moore(char_type const *begin, char_type const *end, Traits const &tr, bool icase)
      : begin_(begin)
      , last_(begin)
      , fold_()
      , find_fun_(
              icase
            ? (case_fold() ? &boyer_moore::find_nocase_fold_ : &boyer_moore::find_nocase_)
            : &boyer_moore::find_
        )
    {
        std::ptrdiff_t const uchar_max = UCHAR_MAX;
        std::ptrdiff_t diff = std::distance(begin, end);
        this->length_  = static_cast<unsigned char>((std::min)(diff, uchar_max));
        std::fill_n(static_cast<unsigned char *>(this->offsets_), uchar_max + 1, this->length_);
        --this->length_;

        icase ? this->init_(tr, case_fold()) : this->init_(tr, mpl::false_());
    }

    BidiIter find(BidiIter begin, BidiIter end, Traits const &tr) const
    {
        return (this->*this->find_fun_)(begin, end, tr);
    }

private:

    void init_(Traits const &tr, mpl::false_)
    {
        for(unsigned char offset = this->length_; offset; --offset, ++this->last_)
        {
            this->offsets_[tr.hash(*this->last_)] = offset;
        }
    }

    void init_(Traits const &tr, mpl::true_)
    {
        this->fold_.reserve(this->length_ + 1);
        for(unsigned char offset = this->length_; offset; --offset, ++this->last_)
        {
            this->fold_.push_back(tr.fold_case(*this->last_));
            for(typename string_type::const_iterator beg = this->fold_.back().begin(), end = this->fold_.back().end();
                beg != end; ++beg)
            {
                this->offsets_[tr.hash(*beg)] = offset;
            }
        }
        this->fold_.push_back(tr.fold_case(*this->last_));
    }

    // case-sensitive Boyer-Moore search
    BidiIter find_(BidiIter begin, BidiIter end, Traits const &tr) const
    {
        typedef typename boost::iterator_difference<BidiIter>::type diff_type;
        diff_type const endpos = std::distance(begin, end);
        diff_type offset = static_cast<diff_type>(this->length_);

        for(diff_type curpos = offset; curpos < endpos; curpos += offset)
        {
            std::advance(begin, offset);

            char_type const *pat_tmp = this->last_;
            BidiIter str_tmp = begin;

            for(; tr.translate(*str_tmp) == *pat_tmp; --pat_tmp, --str_tmp)
            {
                if(pat_tmp == this->begin_)
                {
                    return str_tmp;
                }
            }

            offset = this->offsets_[tr.hash(tr.translate(*begin))];
        }

        return end;
    }

    // case-insensitive Boyer-Moore search
    BidiIter find_nocase_(BidiIter begin, BidiIter end, Traits const &tr) const
    {
        typedef typename boost::iterator_difference<BidiIter>::type diff_type;
        diff_type const endpos = std::distance(begin, end);
        diff_type offset = static_cast<diff_type>(this->length_);

        for(diff_type curpos = offset; curpos < endpos; curpos += offset)
        {
            std::advance(begin, offset);

            char_type const *pat_tmp = this->last_;
            BidiIter str_tmp = begin;

            for(; tr.translate_nocase(*str_tmp) == *pat_tmp; --pat_tmp, --str_tmp)
            {
                if(pat_tmp == this->begin_)
                {
                    return str_tmp;
                }
            }

            offset = this->offsets_[tr.hash(tr.translate_nocase(*begin))];
        }

        return end;
    }

    // case-insensitive Boyer-Moore search with case-folding
    BidiIter find_nocase_fold_(BidiIter begin, BidiIter end, Traits const &tr) const
    {
        typedef typename boost::iterator_difference<BidiIter>::type diff_type;
        diff_type const endpos = std::distance(begin, end);
        diff_type offset = static_cast<diff_type>(this->length_);

        for(diff_type curpos = offset; curpos < endpos; curpos += offset)
        {
            std::advance(begin, offset);

            string_type const *pat_tmp = &this->fold_.back();
            BidiIter str_tmp = begin;

            for(; pat_tmp->end() != std::find(pat_tmp->begin(), pat_tmp->end(), *str_tmp);
                --pat_tmp, --str_tmp)
            {
                if(pat_tmp == &this->fold_.front())
                {
                    return str_tmp;
                }
            }

            offset = this->offsets_[tr.hash(*begin)];
        }

        return end;
    }

private:

    char_type const *begin_;
    char_type const *last_;
    std::vector<string_type> fold_;
    BidiIter (boyer_moore::*const find_fun_)(BidiIter, BidiIter, Traits const &) const;
    unsigned char length_;
    unsigned char offsets_[UCHAR_MAX + 1];
};

}}} // namespace boost::xpressive::detail

#if defined(_MSC_VER)
# pragma warning(pop)
#endif

#endif