summaryrefslogtreecommitdiff
path: root/libs/regex/doc/html/boost_regex/ref/non_std_strings/icu/unicode_types.html
blob: 34ca7a556c2b6d0a40d4f3b8119033274c48b561 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=US-ASCII">
<title>Unicode regular expression types</title>
<link rel="stylesheet" href="../../../../../../../../doc/src/boostbook.css" type="text/css">
<meta name="generator" content="DocBook XSL Stylesheets V1.76.1">
<link rel="home" href="../../../../index.html" title="Boost.Regex">
<link rel="up" href="../icu.html" title="Working With Unicode and ICU String Types">
<link rel="prev" href="intro.html" title="Introduction to using Regex with ICU">
<link rel="next" href="unicode_algo.html" title="Unicode Regular Expression Algorithms">
</head>
<body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF">
<table cellpadding="2" width="100%"><tr>
<td valign="top"><img alt="Boost C++ Libraries" width="277" height="86" src="../../../../../../../../boost.png"></td>
<td align="center"><a href="../../../../../../../../index.html">Home</a></td>
<td align="center"><a href="../../../../../../../../libs/libraries.htm">Libraries</a></td>
<td align="center"><a href="http://www.boost.org/users/people.html">People</a></td>
<td align="center"><a href="http://www.boost.org/users/faq.html">FAQ</a></td>
<td align="center"><a href="../../../../../../../../more/index.htm">More</a></td>
</tr></table>
<hr>
<div class="spirit-nav">
<a accesskey="p" href="intro.html"><img src="../../../../../../../../doc/src/images/prev.png" alt="Prev"></a><a accesskey="u" href="../icu.html"><img src="../../../../../../../../doc/src/images/up.png" alt="Up"></a><a accesskey="h" href="../../../../index.html"><img src="../../../../../../../../doc/src/images/home.png" alt="Home"></a><a accesskey="n" href="unicode_algo.html"><img src="../../../../../../../../doc/src/images/next.png" alt="Next"></a>
</div>
<div class="section">
<div class="titlepage"><div><div><h5 class="title">
<a name="boost_regex.ref.non_std_strings.icu.unicode_types"></a><a class="link" href="unicode_types.html" title="Unicode regular expression types">Unicode
          regular expression types</a>
</h5></div></div></div>
<p>
            Header <code class="computeroutput"><span class="special">&lt;</span><span class="identifier">boost</span><span class="special">/</span><span class="identifier">regex</span><span class="special">/</span><span class="identifier">icu</span><span class="special">.</span><span class="identifier">hpp</span><span class="special">&gt;</span></code> provides a regular expression traits
            class that handles UTF-32 characters:
          </p>
<pre class="programlisting"><span class="keyword">class</span> <span class="identifier">icu_regex_traits</span><span class="special">;</span>
</pre>
<p>
            and a regular expression type based upon that:
          </p>
<pre class="programlisting"><span class="keyword">typedef</span> <span class="identifier">basic_regex</span><span class="special">&lt;</span><span class="identifier">UChar32</span><span class="special">,</span><span class="identifier">icu_regex_traits</span><span class="special">&gt;</span> <span class="identifier">u32regex</span><span class="special">;</span>
</pre>
<p>
            The type <code class="computeroutput"><span class="identifier">u32regex</span></code> is
            regular expression type to use for all Unicode regular expressions; internally
            it uses UTF-32 code points, but can be created from, and used to search,
            either UTF-8, or UTF-16 encoded strings as well as UTF-32 ones.
          </p>
<p>
            The constructors, and assign member functions of <code class="computeroutput"><span class="identifier">u32regex</span></code>,
            require UTF-32 encoded strings, but there are a series of overloaded
            algorithms called <code class="computeroutput"><span class="identifier">make_u32regex</span></code>
            which allow regular expressions to be created from UTF-8, UTF-16, or
            UTF-32 encoded strings:
          </p>
<pre class="programlisting"><span class="keyword">template</span> <span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">InputIterator</span><span class="special">&gt;</span>
<span class="identifier">u32regex</span> <span class="identifier">make_u32regex</span><span class="special">(</span><span class="identifier">InputIterator</span> <span class="identifier">i</span><span class="special">,</span>
                       <span class="identifier">InputIterator</span> <span class="identifier">j</span><span class="special">,</span>
                       <span class="identifier">boost</span><span class="special">::</span><span class="identifier">regex_constants</span><span class="special">::</span><span class="identifier">syntax_option_type</span> <span class="identifier">opt</span><span class="special">);</span>
</pre>
<p>
            <span class="bold"><strong>Effects</strong></span>: Creates a regular expression
            object from the iterator sequence [i,j). The character encoding of the
            sequence is determined based upon sizeof(*i): 1 implies UTF-8, 2 implies
            UTF-16, and 4 implies UTF-32.
          </p>
<pre class="programlisting"><span class="identifier">u32regex</span> <span class="identifier">make_u32regex</span><span class="special">(</span><span class="keyword">const</span> <span class="keyword">char</span><span class="special">*</span> <span class="identifier">p</span><span class="special">,</span>
                       <span class="identifier">boost</span><span class="special">::</span><span class="identifier">regex_constants</span><span class="special">::</span><span class="identifier">syntax_option_type</span> <span class="identifier">opt</span>
                           <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">regex_constants</span><span class="special">::</span><span class="identifier">perl</span><span class="special">);</span>
</pre>
<p>
            <span class="bold"><strong>Effects</strong></span>: Creates a regular expression
            object from the Null-terminated UTF-8 characater sequence <span class="emphasis"><em>p</em></span>.
          </p>
<pre class="programlisting"><span class="identifier">u32regex</span> <span class="identifier">make_u32regex</span><span class="special">(</span><span class="keyword">const</span> <span class="keyword">unsigned</span> <span class="keyword">char</span><span class="special">*</span> <span class="identifier">p</span><span class="special">,</span>
                       <span class="identifier">boost</span><span class="special">::</span><span class="identifier">regex_constants</span><span class="special">::</span><span class="identifier">syntax_option_type</span> <span class="identifier">opt</span>
                           <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">regex_constants</span><span class="special">::</span><span class="identifier">perl</span><span class="special">);</span>
</pre>
<p>
            <span class="bold"><strong>Effects</strong></span>: Creates a regular expression
            object from the Null-terminated UTF-8 characater sequence p.
          </p>
<pre class="programlisting"><span class="identifier">u32regex</span> <span class="identifier">make_u32regex</span><span class="special">(</span><span class="keyword">const</span> <span class="keyword">wchar_t</span><span class="special">*</span> <span class="identifier">p</span><span class="special">,</span>
                       <span class="identifier">boost</span><span class="special">::</span><span class="identifier">regex_constants</span><span class="special">::</span><span class="identifier">syntax_option_type</span> <span class="identifier">opt</span>
                           <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">regex_constants</span><span class="special">::</span><span class="identifier">perl</span><span class="special">);</span>
</pre>
<p>
            <span class="bold"><strong>Effects</strong></span>: Creates a regular expression
            object from the Null-terminated characater sequence p. The character
            encoding of the sequence is determined based upon sizeof(wchar_t): 1
            implies UTF-8, 2 implies UTF-16, and 4 implies UTF-32.
          </p>
<pre class="programlisting"><span class="identifier">u32regex</span> <span class="identifier">make_u32regex</span><span class="special">(</span><span class="keyword">const</span> <span class="identifier">UChar</span><span class="special">*</span> <span class="identifier">p</span><span class="special">,</span>
                       <span class="identifier">boost</span><span class="special">::</span><span class="identifier">regex_constants</span><span class="special">::</span><span class="identifier">syntax_option_type</span> <span class="identifier">opt</span>
                           <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">regex_constants</span><span class="special">::</span><span class="identifier">perl</span><span class="special">);</span>
</pre>
<p>
            <span class="bold"><strong>Effects</strong></span>: Creates a regular expression
            object from the Null-terminated UTF-16 characater sequence p.
          </p>
<pre class="programlisting"><span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">C</span><span class="special">,</span> <span class="keyword">class</span> <span class="identifier">T</span><span class="special">,</span> <span class="keyword">class</span> <span class="identifier">A</span><span class="special">&gt;</span>
<span class="identifier">u32regex</span> <span class="identifier">make_u32regex</span><span class="special">(</span><span class="keyword">const</span> <span class="identifier">std</span><span class="special">::</span><span class="identifier">basic_string</span><span class="special">&lt;</span><span class="identifier">C</span><span class="special">,</span> <span class="identifier">T</span><span class="special">,</span> <span class="identifier">A</span><span class="special">&gt;&amp;</span> <span class="identifier">s</span><span class="special">,</span>
                       <span class="identifier">boost</span><span class="special">::</span><span class="identifier">regex_constants</span><span class="special">::</span><span class="identifier">syntax_option_type</span> <span class="identifier">opt</span>
                           <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">regex_constants</span><span class="special">::</span><span class="identifier">perl</span><span class="special">);</span>
</pre>
<p>
            <span class="bold"><strong>Effects</strong></span>: Creates a regular expression
            object from the string s. The character encoding of the string is determined
            based upon sizeof(C): 1 implies UTF-8, 2 implies UTF-16, and 4 implies
            UTF-32.
          </p>
<pre class="programlisting"><span class="identifier">u32regex</span> <span class="identifier">make_u32regex</span><span class="special">(</span><span class="keyword">const</span> <span class="identifier">UnicodeString</span><span class="special">&amp;</span> <span class="identifier">s</span><span class="special">,</span>
                       <span class="identifier">boost</span><span class="special">::</span><span class="identifier">regex_constants</span><span class="special">::</span><span class="identifier">syntax_option_type</span> <span class="identifier">opt</span>
                           <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">regex_constants</span><span class="special">::</span><span class="identifier">perl</span><span class="special">);</span>
</pre>
<p>
            <span class="bold"><strong>Effects</strong></span>: Creates a regular expression
            object from the UTF-16 encoding string s.
          </p>
</div>
<table xmlns:rev="http://www.cs.rpi.edu/~gregod/boost/tools/doc/revision" width="100%"><tr>
<td align="left"></td>
<td align="right"><div class="copyright-footer">Copyright &#169; 1998-2010 John Maddock<p>
        Distributed under the Boost Software License, Version 1.0. (See accompanying
        file LICENSE_1_0.txt or copy at <a href="http://www.boost.org/LICENSE_1_0.txt" target="_top">http://www.boost.org/LICENSE_1_0.txt</a>)
      </p>
</div></td>
</tr></table>
<hr>
<div class="spirit-nav">
<a accesskey="p" href="intro.html"><img src="../../../../../../../../doc/src/images/prev.png" alt="Prev"></a><a accesskey="u" href="../icu.html"><img src="../../../../../../../../doc/src/images/up.png" alt="Up"></a><a accesskey="h" href="../../../../index.html"><img src="../../../../../../../../doc/src/images/home.png" alt="Home"></a><a accesskey="n" href="unicode_algo.html"><img src="../../../../../../../../doc/src/images/next.png" alt="Next"></a>
</div>
</body>
</html>