diff options
Diffstat (limited to 'boost/locale/boundary/index.hpp')
-rw-r--r-- | boost/locale/boundary/index.hpp | 1113 |
1 files changed, 1113 insertions, 0 deletions
diff --git a/boost/locale/boundary/index.hpp b/boost/locale/boundary/index.hpp new file mode 100644 index 0000000000..cff6a97b65 --- /dev/null +++ b/boost/locale/boundary/index.hpp @@ -0,0 +1,1113 @@ +// +// Copyright (c) 2009-2011 Artyom Beilis (Tonkikh) +// +// Distributed under the Boost Software License, Version 1.0. (See +// accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) +// +#ifndef BOOST_LOCALE_BOUNDARY_INDEX_HPP_INCLUDED +#define BOOST_LOCALE_BOUNDARY_INDEX_HPP_INCLUDED + +#include <boost/locale/config.hpp> +#include <boost/locale/boundary/types.hpp> +#include <boost/locale/boundary/facets.hpp> +#include <boost/locale/boundary/segment.hpp> +#include <boost/locale/boundary/boundary_point.hpp> +#include <boost/iterator/iterator_facade.hpp> +#include <boost/shared_ptr.hpp> +#include <boost/cstdint.hpp> +#include <boost/assert.hpp> +#ifdef BOOST_MSVC +# pragma warning(push) +# pragma warning(disable : 4275 4251 4231 4660) +#endif +#include <string> +#include <locale> +#include <vector> +#include <iterator> +#include <algorithm> +#include <stdexcept> + +#include <iostream> + +namespace boost { + + namespace locale { + + namespace boundary { + /// + /// \defgroup boundary Boundary Analysis + /// + /// This module contains all operations required for %boundary analysis of text: character, word, like and sentence boundaries + /// + /// @{ + /// + + /// \cond INTERNAL + + namespace details { + + template<typename IteratorType,typename CategoryType = typename std::iterator_traits<IteratorType>::iterator_category> + struct mapping_traits { + typedef typename std::iterator_traits<IteratorType>::value_type char_type; + static index_type map(boundary_type t,IteratorType b,IteratorType e,std::locale const &l) + { + std::basic_string<char_type> str(b,e); + return std::use_facet<boundary_indexing<char_type> >(l).map(t,str.c_str(),str.c_str()+str.size()); + } + }; + + template<typename CharType,typename SomeIteratorType> + struct linear_iterator_traits { + static const bool is_linear = false; + }; + + template<typename CharType> + struct linear_iterator_traits<CharType,typename std::basic_string<CharType>::iterator> { + static const bool is_linear = true; + }; + + template<typename CharType> + struct linear_iterator_traits<CharType,typename std::basic_string<CharType>::const_iterator> { + static const bool is_linear = true; + }; + + template<typename CharType> + struct linear_iterator_traits<CharType,typename std::vector<CharType>::iterator> { + static const bool is_linear = true; + }; + + template<typename CharType> + struct linear_iterator_traits<CharType,typename std::vector<CharType>::const_iterator> { + static const bool is_linear = true; + }; + + template<typename CharType> + struct linear_iterator_traits<CharType,CharType *> { + static const bool is_linear = true; + }; + + template<typename CharType> + struct linear_iterator_traits<CharType,CharType const *> { + static const bool is_linear = true; + }; + + + template<typename IteratorType> + struct mapping_traits<IteratorType,std::random_access_iterator_tag> { + + typedef typename std::iterator_traits<IteratorType>::value_type char_type; + + + + static index_type map(boundary_type t,IteratorType b,IteratorType e,std::locale const &l) + { + index_type result; + + // + // Optimize for most common cases + // + // C++0x requires that string is continious in memory and all known + // string implementations + // do this because of c_str() support. + // + + if(linear_iterator_traits<char_type,IteratorType>::is_linear && b!=e) + { + char_type const *begin = &*b; + char_type const *end = begin + (e-b); + index_type tmp=std::use_facet<boundary_indexing<char_type> >(l).map(t,begin,end); + result.swap(tmp); + } + else { + std::basic_string<char_type> str(b,e); + index_type tmp = std::use_facet<boundary_indexing<char_type> >(l).map(t,str.c_str(),str.c_str()+str.size()); + result.swap(tmp); + } + return result; + } + }; + + template<typename BaseIterator> + class mapping { + public: + typedef BaseIterator base_iterator; + typedef typename std::iterator_traits<base_iterator>::value_type char_type; + + + mapping(boundary_type type, + base_iterator begin, + base_iterator end, + std::locale const &loc) + : + index_(new index_type()), + begin_(begin), + end_(end) + { + index_type idx=details::mapping_traits<base_iterator>::map(type,begin,end,loc); + index_->swap(idx); + } + + mapping() + { + } + + index_type const &index() const + { + return *index_; + } + + base_iterator begin() const + { + return begin_; + } + + base_iterator end() const + { + return end_; + } + + private: + boost::shared_ptr<index_type> index_; + base_iterator begin_,end_; + }; + + template<typename BaseIterator> + class segment_index_iterator : + public boost::iterator_facade< + segment_index_iterator<BaseIterator>, + segment<BaseIterator>, + boost::bidirectional_traversal_tag, + segment<BaseIterator> const & + > + { + public: + typedef BaseIterator base_iterator; + typedef mapping<base_iterator> mapping_type; + typedef segment<base_iterator> segment_type; + + segment_index_iterator() : current_(0,0),map_(0) + { + } + + segment_index_iterator(base_iterator p,mapping_type const *map,rule_type mask,bool full_select) : + map_(map), + mask_(mask), + full_select_(full_select) + { + set(p); + } + segment_index_iterator(bool is_begin,mapping_type const *map,rule_type mask,bool full_select) : + map_(map), + mask_(mask), + full_select_(full_select) + { + if(is_begin) + set_begin(); + else + set_end(); + } + + segment_type const &dereference() const + { + return value_; + } + + bool equal(segment_index_iterator const &other) const + { + return map_ == other.map_ && current_.second == other.current_.second; + } + + void increment() + { + std::pair<size_t,size_t> next = current_; + if(full_select_) { + next.first = next.second; + while(next.second < size()) { + next.second++; + if(valid_offset(next.second)) + break; + } + if(next.second == size()) + next.first = next.second - 1; + } + else { + while(next.second < size()) { + next.first = next.second; + next.second++; + if(valid_offset(next.second)) + break; + } + } + update_current(next); + } + + void decrement() + { + std::pair<size_t,size_t> next = current_; + if(full_select_) { + while(next.second >1) { + next.second--; + if(valid_offset(next.second)) + break; + } + next.first = next.second; + while(next.first >0) { + next.first--; + if(valid_offset(next.first)) + break; + } + } + else { + while(next.second >1) { + next.second--; + if(valid_offset(next.second)) + break; + } + next.first = next.second - 1; + } + update_current(next); + } + + private: + + void set_end() + { + current_.first = size() - 1; + current_.second = size(); + value_ = segment_type(map_->end(),map_->end(),0); + } + void set_begin() + { + current_.first = current_.second = 0; + value_ = segment_type(map_->begin(),map_->begin(),0); + increment(); + } + + void set(base_iterator p) + { + size_t dist=std::distance(map_->begin(),p); + index_type::const_iterator b=map_->index().begin(),e=map_->index().end(); + index_type::const_iterator + boundary_point=std::upper_bound(b,e,break_info(dist)); + while(boundary_point != e && (boundary_point->rule & mask_)==0) + boundary_point++; + + current_.first = current_.second = boundary_point - b; + + if(full_select_) { + while(current_.first > 0) { + current_.first --; + if(valid_offset(current_.first)) + break; + } + } + else { + if(current_.first > 0) + current_.first --; + } + value_.first = map_->begin(); + std::advance(value_.first,get_offset(current_.first)); + value_.second = value_.first; + std::advance(value_.second,get_offset(current_.second) - get_offset(current_.first)); + + update_rule(); + } + + void update_current(std::pair<size_t,size_t> pos) + { + std::ptrdiff_t first_diff = get_offset(pos.first) - get_offset(current_.first); + std::ptrdiff_t second_diff = get_offset(pos.second) - get_offset(current_.second); + std::advance(value_.first,first_diff); + std::advance(value_.second,second_diff); + current_ = pos; + update_rule(); + } + + void update_rule() + { + if(current_.second != size()) { + value_.rule(index()[current_.second].rule); + } + } + size_t get_offset(size_t ind) const + { + if(ind == size()) + return index().back().offset; + return index()[ind].offset; + } + + bool valid_offset(size_t offset) const + { + return offset == 0 + || offset == size() // make sure we not acess index[size] + || (index()[offset].rule & mask_)!=0; + } + + size_t size() const + { + return index().size(); + } + + index_type const &index() const + { + return map_->index(); + } + + + segment_type value_; + std::pair<size_t,size_t> current_; + mapping_type const *map_; + rule_type mask_; + bool full_select_; + }; + + template<typename BaseIterator> + class boundary_point_index_iterator : + public boost::iterator_facade< + boundary_point_index_iterator<BaseIterator>, + boundary_point<BaseIterator>, + boost::bidirectional_traversal_tag, + boundary_point<BaseIterator> const & + > + { + public: + typedef BaseIterator base_iterator; + typedef mapping<base_iterator> mapping_type; + typedef boundary_point<base_iterator> boundary_point_type; + + boundary_point_index_iterator() : current_(0),map_(0) + { + } + + boundary_point_index_iterator(bool is_begin,mapping_type const *map,rule_type mask) : + map_(map), + mask_(mask) + { + if(is_begin) + set_begin(); + else + set_end(); + } + boundary_point_index_iterator(base_iterator p,mapping_type const *map,rule_type mask) : + map_(map), + mask_(mask) + { + set(p); + } + + boundary_point_type const &dereference() const + { + return value_; + } + + bool equal(boundary_point_index_iterator const &other) const + { + return map_ == other.map_ && current_ == other.current_; + } + + void increment() + { + size_t next = current_; + while(next < size()) { + next++; + if(valid_offset(next)) + break; + } + update_current(next); + } + + void decrement() + { + size_t next = current_; + while(next>0) { + next--; + if(valid_offset(next)) + break; + } + update_current(next); + } + + private: + void set_end() + { + current_ = size(); + value_ = boundary_point_type(map_->end(),0); + } + void set_begin() + { + current_ = 0; + value_ = boundary_point_type(map_->begin(),0); + } + + void set(base_iterator p) + { + size_t dist = std::distance(map_->begin(),p); + + index_type::const_iterator b=index().begin(); + index_type::const_iterator e=index().end(); + index_type::const_iterator ptr = std::lower_bound(b,e,break_info(dist)); + + if(ptr==index().end()) + current_=size()-1; + else + current_=ptr - index().begin(); + + while(!valid_offset(current_)) + current_ ++; + + std::ptrdiff_t diff = get_offset(current_) - dist; + std::advance(p,diff); + value_.iterator(p); + update_rule(); + } + + void update_current(size_t pos) + { + std::ptrdiff_t diff = get_offset(pos) - get_offset(current_); + base_iterator i=value_.iterator(); + std::advance(i,diff); + current_ = pos; + value_.iterator(i); + update_rule(); + } + + void update_rule() + { + if(current_ != size()) { + value_.rule(index()[current_].rule); + } + } + size_t get_offset(size_t ind) const + { + if(ind == size()) + return index().back().offset; + return index()[ind].offset; + } + + bool valid_offset(size_t offset) const + { + return offset == 0 + || offset + 1 >= size() // last and first are always valid regardless of mark + || (index()[offset].rule & mask_)!=0; + } + + size_t size() const + { + return index().size(); + } + + index_type const &index() const + { + return map_->index(); + } + + + boundary_point_type value_; + size_t current_; + mapping_type const *map_; + rule_type mask_; + }; + + + } // details + + /// \endcond + + template<typename BaseIterator> + class segment_index; + + template<typename BaseIterator> + class boundary_point_index; + + + /// + /// \brief This class holds an index of segments in the text range and allows to iterate over them + /// + /// This class is provides \ref begin() and \ref end() member functions that return bidirectional iterators + /// to the \ref segment objects. + /// + /// It provides two options on way of selecting segments: + /// + /// - \ref rule(rule_type mask) - a mask that allows to select only specific types of segments according to + /// various masks %as \ref word_any. + /// \n + /// The default is to select any types of boundaries. + /// \n + /// For example: using word %boundary analysis, when the provided mask is \ref word_kana then the iterators + /// would iterate only over the words containing Kana letters and \ref word_any would select all types of + /// words excluding ranges that consist of white space and punctuation marks. So iterating over the text + /// "to be or not to be?" with \ref word_any rule would return segments "to", "be", "or", "not", "to", "be", instead + /// of default "to", " ", "be", " ", "or", " ", "not", " ", "to", " ", "be", "?". + /// - \ref full_select(bool how) - a flag that defines the way a range is selected if the rule of the previous + /// %boundary point does not fit the selected rule. + /// \n + /// For example: We want to fetch all sentences from the following text: "Hello! How\nare you?". + /// \n + /// This text contains three %boundary points separating it to sentences by different rules: + /// - The exclamation mark "!" ends the sentence "Hello!" + /// - The line feed that splits the sentence "How\nare you?" into two parts. + /// - The question mark that ends the second sentence. + /// \n + /// If you would only change the \ref rule() to \ref sentence_term then the segment_index would + /// provide two sentences "Hello!" and "are you?" %as only them actually terminated with required + /// terminator "!" or "?". But changing \ref full_select() to true, the selected segment would include + /// all the text up to previous valid %boundary point and would return two expected sentences: + /// "Hello!" and "How\nare you?". + /// + /// This class allows to find a segment according to the given iterator in range using \ref find() member + /// function. + /// + /// \note + /// + /// - Changing any of the options - \ref rule() or \ref full_select() and of course re-indexing the text + /// invalidates existing iterators and they can't be used any more. + /// - segment_index can be created from boundary_point_index or other segment_index that was created with + /// same \ref boundary_type. This is very fast operation %as they shared same index + /// and it does not require its regeneration. + /// + /// \see + /// + /// - \ref boundary_point_index + /// - \ref segment + /// - \ref boundary_point + /// + + template<typename BaseIterator> + class segment_index { + public: + + /// + /// The type of the iterator used to iterate over the original text + /// + typedef BaseIterator base_iterator; + #ifdef BOOST_LOCALE_DOXYGEN + /// + /// The bidirectional iterator that iterates over \ref value_type objects. + /// + /// - The iterators may be invalidated by use of any non-const member function + /// including but not limited to \ref rule(rule_type) and \ref full_select(bool). + /// - The returned value_type object is valid %as long %as iterator points to it. + /// So this following code is wrong %as t used after p was updated: + /// \code + /// segment_index<some_iterator>::iterator p=index.begin(); + /// segment<some_iterator> &t = *p; + /// ++p; + /// cout << t.str() << endl; + /// \endcode + /// + typedef unspecified_iterator_type iterator; + /// + /// \copydoc iterator + /// + typedef unspecified_iterator_type const_iterator; + #else + typedef details::segment_index_iterator<base_iterator> iterator; + typedef details::segment_index_iterator<base_iterator> const_iterator; + #endif + /// + /// The type dereferenced by the \ref iterator and \ref const_iterator. It is + /// an object that represents selected segment. + /// + typedef segment<base_iterator> value_type; + + /// + /// Default constructor. + /// + /// \note + /// + /// When this object is constructed by default it does not include a valid index, thus + /// calling \ref begin(), \ref end() or \ref find() member functions would lead to undefined + /// behavior + /// + segment_index() : mask_(0xFFFFFFFFu),full_select_(false) + { + } + /// + /// Create a segment_index for %boundary analysis \ref boundary_type "type" of the text + /// in range [begin,end) using a rule \a mask for locale \a loc. + /// + segment_index(boundary_type type, + base_iterator begin, + base_iterator end, + rule_type mask, + std::locale const &loc=std::locale()) + : + map_(type,begin,end,loc), + mask_(mask), + full_select_(false) + { + } + /// + /// Create a segment_index for %boundary analysis \ref boundary_type "type" of the text + /// in range [begin,end) selecting all possible segments (full mask) for locale \a loc. + /// + segment_index(boundary_type type, + base_iterator begin, + base_iterator end, + std::locale const &loc=std::locale()) + : + map_(type,begin,end,loc), + mask_(0xFFFFFFFFu), + full_select_(false) + { + } + + /// + /// Create a segment_index from a \ref boundary_point_index. It copies all indexing information + /// and used default rule (all possible segments) + /// + /// This operation is very cheap, so if you use boundary_point_index and segment_index on same text + /// range it is much better to create one from another rather then indexing the same + /// range twice. + /// + /// \note \ref rule() flags are not copied + /// + segment_index(boundary_point_index<base_iterator> const &); + /// + /// Copy an index from a \ref boundary_point_index. It copies all indexing information + /// and uses the default rule (all possible segments) + /// + /// This operation is very cheap, so if you use boundary_point_index and segment_index on same text + /// range it is much better to create one from another rather then indexing the same + /// range twice. + /// + /// \note \ref rule() flags are not copied + /// + segment_index const &operator = (boundary_point_index<base_iterator> const &); + + + /// + /// Create a new index for %boundary analysis \ref boundary_type "type" of the text + /// in range [begin,end) for locale \a loc. + /// + /// \note \ref rule() and \ref full_select() remain unchanged. + /// + void map(boundary_type type,base_iterator begin,base_iterator end,std::locale const &loc=std::locale()) + { + map_ = mapping_type(type,begin,end,loc); + } + + /// + /// Get the \ref iterator on the beginning of the segments range. + /// + /// Preconditions: the segment_index should have a mapping + /// + /// \note + /// + /// The returned iterator is invalidated by access to any non-const member functions of this object + /// + iterator begin() const + { + return iterator(true,&map_,mask_,full_select_); + } + + /// + /// Get the \ref iterator on the ending of the segments range. + /// + /// Preconditions: the segment_index should have a mapping + /// + /// The returned iterator is invalidated by access to any non-const member functions of this object + /// + iterator end() const + { + return iterator(false,&map_,mask_,full_select_); + } + + /// + /// Find a first valid segment following a position \a p. + /// + /// If \a p is inside a valid segment this segment is selected: + /// + /// For example: For \ref word %boundary analysis with \ref word_any rule(): + /// + /// - "to| be or ", would point to "be", + /// - "t|o be or ", would point to "to", + /// - "to be or| ", would point to end. + /// + /// + /// Preconditions: the segment_index should have a mapping and \a p should be valid iterator + /// to the text in the mapped range. + /// + /// The returned iterator is invalidated by access to any non-const member functions of this object + /// + iterator find(base_iterator p) const + { + return iterator(p,&map_,mask_,full_select_); + } + + /// + /// Get the mask of rules that are used + /// + rule_type rule() const + { + return mask_; + } + /// + /// Set the mask of rules that are used + /// + void rule(rule_type v) + { + mask_ = v; + } + + /// + /// Get the full_select property value - should segment include in the range + /// values that not belong to specific \ref rule() or not. + /// + /// The default value is false. + /// + /// For example for \ref sentence %boundary with rule \ref sentence_term the segments + /// of text "Hello! How\nare you?" are "Hello!\", "are you?" when full_select() is false + /// because "How\n" is selected %as sentence by a rule spits the text by line feed. If full_select() + /// is true the returned segments are "Hello! ", "How\nare you?" where "How\n" is joined with the + /// following part "are you?" + /// + + bool full_select() const + { + return full_select_; + } + + /// + /// Set the full_select property value - should segment include in the range + /// values that not belong to specific \ref rule() or not. + /// + /// The default value is false. + /// + /// For example for \ref sentence %boundary with rule \ref sentence_term the segments + /// of text "Hello! How\nare you?" are "Hello!\", "are you?" when full_select() is false + /// because "How\n" is selected %as sentence by a rule spits the text by line feed. If full_select() + /// is true the returned segments are "Hello! ", "How\nare you?" where "How\n" is joined with the + /// following part "are you?" + /// + + void full_select(bool v) + { + full_select_ = v; + } + + private: + friend class boundary_point_index<base_iterator>; + typedef details::mapping<base_iterator> mapping_type; + mapping_type map_; + rule_type mask_; + bool full_select_; + }; + + /// + /// \brief This class holds an index of \ref boundary_point "boundary points" and allows iterating + /// over them. + /// + /// This class is provides \ref begin() and \ref end() member functions that return bidirectional iterators + /// to the \ref boundary_point objects. + /// + /// It provides an option that affects selecting %boundary points according to different rules: + /// using \ref rule(rule_type mask) member function. It allows to set a mask that select only specific + /// types of %boundary points like \ref sentence_term. + /// + /// For example for a sentence %boundary analysis of a text "Hello! How\nare you?" when the default + /// rule is used the %boundary points would be: + /// + /// - "|Hello! How\nare you?" + /// - "Hello! |How\nare you?" + /// - "Hello! How\n|are you?" + /// - "Hello! How\nare you?|" + /// + /// However if \ref rule() is set to \ref sentence_term then the selected %boundary points would be: + /// + /// - "|Hello! How\nare you?" + /// - "Hello! |How\nare you?" + /// - "Hello! How\nare you?|" + /// + /// Such that a %boundary point defined by a line feed character would be ignored. + /// + /// This class allows to find a boundary_point according to the given iterator in range using \ref find() member + /// function. + /// + /// \note + /// - Even an empty text range [x,x) considered to have a one %boundary point x. + /// - \a a and \a b points of the range [a,b) are always considered %boundary points + /// regardless the rules used. + /// - Changing any of the option \ref rule() or course re-indexing the text + /// invalidates existing iterators and they can't be used any more. + /// - boundary_point_index can be created from segment_index or other boundary_point_index that was created with + /// same \ref boundary_type. This is very fast operation %as they shared same index + /// and it does not require its regeneration. + /// + /// \see + /// + /// - \ref segment_index + /// - \ref boundary_point + /// - \ref segment + /// + + + template<typename BaseIterator> + class boundary_point_index { + public: + /// + /// The type of the iterator used to iterate over the original text + /// + typedef BaseIterator base_iterator; + #ifdef BOOST_LOCALE_DOXYGEN + /// + /// The bidirectional iterator that iterates over \ref value_type objects. + /// + /// - The iterators may be invalidated by use of any non-const member function + /// including but not limited to \ref rule(rule_type) member function. + /// - The returned value_type object is valid %as long %as iterator points to it. + /// So this following code is wrong %as t used after p was updated: + /// \code + /// boundary_point_index<some_iterator>::iterator p=index.begin(); + /// boundary_point<some_iterator> &t = *p; + /// ++p; + /// rule_type r = t->rule(); + /// \endcode + /// + typedef unspecified_iterator_type iterator; + /// + /// \copydoc iterator + /// + typedef unspecified_iterator_type const_iterator; + #else + typedef details::boundary_point_index_iterator<base_iterator> iterator; + typedef details::boundary_point_index_iterator<base_iterator> const_iterator; + #endif + /// + /// The type dereferenced by the \ref iterator and \ref const_iterator. It is + /// an object that represents the selected \ref boundary_point "boundary point". + /// + typedef boundary_point<base_iterator> value_type; + + /// + /// Default constructor. + /// + /// \note + /// + /// When this object is constructed by default it does not include a valid index, thus + /// calling \ref begin(), \ref end() or \ref find() member functions would lead to undefined + /// behavior + /// + boundary_point_index() : mask_(0xFFFFFFFFu) + { + } + + /// + /// Create a segment_index for %boundary analysis \ref boundary_type "type" of the text + /// in range [begin,end) using a rule \a mask for locale \a loc. + /// + boundary_point_index(boundary_type type, + base_iterator begin, + base_iterator end, + rule_type mask, + std::locale const &loc=std::locale()) + : + map_(type,begin,end,loc), + mask_(mask) + { + } + /// + /// Create a segment_index for %boundary analysis \ref boundary_type "type" of the text + /// in range [begin,end) selecting all possible %boundary points (full mask) for locale \a loc. + /// + boundary_point_index(boundary_type type, + base_iterator begin, + base_iterator end, + std::locale const &loc=std::locale()) + : + map_(type,begin,end,loc), + mask_(0xFFFFFFFFu) + { + } + + /// + /// Create a boundary_point_index from a \ref segment_index. It copies all indexing information + /// and uses the default rule (all possible %boundary points) + /// + /// This operation is very cheap, so if you use boundary_point_index and segment_index on same text + /// range it is much better to create one from another rather then indexing the same + /// range twice. + /// + /// \note \ref rule() flags are not copied + /// + boundary_point_index(segment_index<base_iterator> const &other); + /// + /// Copy a boundary_point_index from a \ref segment_index. It copies all indexing information + /// and keeps the current \ref rule() unchanged + /// + /// This operation is very cheap, so if you use boundary_point_index and segment_index on same text + /// range it is much better to create one from another rather then indexing the same + /// range twice. + /// + /// \note \ref rule() flags are not copied + /// + boundary_point_index const &operator=(segment_index<base_iterator> const &other); + + /// + /// Create a new index for %boundary analysis \ref boundary_type "type" of the text + /// in range [begin,end) for locale \a loc. + /// + /// \note \ref rule() remains unchanged. + /// + void map(boundary_type type,base_iterator begin,base_iterator end,std::locale const &loc=std::locale()) + { + map_ = mapping_type(type,begin,end,loc); + } + + /// + /// Get the \ref iterator on the beginning of the %boundary points range. + /// + /// Preconditions: this boundary_point_index should have a mapping + /// + /// \note + /// + /// The returned iterator is invalidated by access to any non-const member functions of this object + /// + iterator begin() const + { + return iterator(true,&map_,mask_); + } + + /// + /// Get the \ref iterator on the ending of the %boundary points range. + /// + /// Preconditions: this boundary_point_index should have a mapping + /// + /// \note + /// + /// The returned iterator is invalidated by access to any non-const member functions of this object + /// + iterator end() const + { + return iterator(false,&map_,mask_); + } + + /// + /// Find a first valid %boundary point on a position \a p or following it. + /// + /// For example: For \ref word %boundary analysis of the text "to be or" + /// + /// - "|to be", would return %boundary point at "|to be", + /// - "t|o be", would point to "to| be" + /// + /// Preconditions: the boundary_point_index should have a mapping and \a p should be valid iterator + /// to the text in the mapped range. + /// + /// The returned iterator is invalidated by access to any non-const member functions of this object + /// + iterator find(base_iterator p) const + { + return iterator(p,&map_,mask_); + } + + /// + /// Get the mask of rules that are used + /// + rule_type rule() const + { + return mask_; + } + /// + /// Set the mask of rules that are used + /// + void rule(rule_type v) + { + mask_ = v; + } + + private: + + friend class segment_index<base_iterator>; + typedef details::mapping<base_iterator> mapping_type; + mapping_type map_; + rule_type mask_; + }; + + /// \cond INTERNAL + template<typename BaseIterator> + segment_index<BaseIterator>::segment_index(boundary_point_index<BaseIterator> const &other) : + map_(other.map_), + mask_(0xFFFFFFFFu), + full_select_(false) + { + } + + template<typename BaseIterator> + boundary_point_index<BaseIterator>::boundary_point_index(segment_index<BaseIterator> const &other) : + map_(other.map_), + mask_(0xFFFFFFFFu) + { + } + + template<typename BaseIterator> + segment_index<BaseIterator> const &segment_index<BaseIterator>::operator=(boundary_point_index<BaseIterator> const &other) + { + map_ = other.map_; + return *this; + } + + template<typename BaseIterator> + boundary_point_index<BaseIterator> const &boundary_point_index<BaseIterator>::operator=(segment_index<BaseIterator> const &other) + { + map_ = other.map_; + return *this; + } + /// \endcond + + typedef segment_index<std::string::const_iterator> ssegment_index; ///< convenience typedef + typedef segment_index<std::wstring::const_iterator> wssegment_index; ///< convenience typedef + #ifdef BOOST_HAS_CHAR16_T + typedef segment_index<std::u16string::const_iterator> u16ssegment_index;///< convenience typedef + #endif + #ifdef BOOST_HAS_CHAR32_T + typedef segment_index<std::u32string::const_iterator> u32ssegment_index;///< convenience typedef + #endif + + typedef segment_index<char const *> csegment_index; ///< convenience typedef + typedef segment_index<wchar_t const *> wcsegment_index; ///< convenience typedef + #ifdef BOOST_HAS_CHAR16_T + typedef segment_index<char16_t const *> u16csegment_index; ///< convenience typedef + #endif + #ifdef BOOST_HAS_CHAR32_T + typedef segment_index<char32_t const *> u32csegment_index; ///< convenience typedef + #endif + + typedef boundary_point_index<std::string::const_iterator> sboundary_point_index;///< convenience typedef + typedef boundary_point_index<std::wstring::const_iterator> wsboundary_point_index;///< convenience typedef + #ifdef BOOST_HAS_CHAR16_T + typedef boundary_point_index<std::u16string::const_iterator> u16sboundary_point_index;///< convenience typedef + #endif + #ifdef BOOST_HAS_CHAR32_T + typedef boundary_point_index<std::u32string::const_iterator> u32sboundary_point_index;///< convenience typedef + #endif + + typedef boundary_point_index<char const *> cboundary_point_index; ///< convenience typedef + typedef boundary_point_index<wchar_t const *> wcboundary_point_index; ///< convenience typedef + #ifdef BOOST_HAS_CHAR16_T + typedef boundary_point_index<char16_t const *> u16cboundary_point_index;///< convenience typedef + #endif + #ifdef BOOST_HAS_CHAR32_T + typedef boundary_point_index<char32_t const *> u32cboundary_point_index;///< convenience typedef + #endif + + + + } // boundary + + } // locale +} // boost + +/// +/// \example boundary.cpp +/// Example of using segment_index +/// \example wboundary.cpp +/// Example of using segment_index over wide strings +/// + +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#endif +// vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 |