summaryrefslogtreecommitdiff
path: root/src/corefx/System.Globalization.Native/normalization.cpp
blob: 014894a5edc57d82c58c123680350bf39e0f27fd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
//

#include <stdint.h>
#include "icushim.h"

/*
 * These values should be kept in sync with System.Text.NormalizationForm
 */
enum class NormalizationForm : int32_t
{
    C = 0x1,
    D = 0x2,
    KC = 0x5,
    KD = 0x6
};

const UNormalizer2* GetNormalizerForForm(NormalizationForm normalizationForm, UErrorCode* pErrorCode)
{
    switch (normalizationForm)
    {
        case NormalizationForm::C:
            return unorm2_getNFCInstance(pErrorCode);
        case NormalizationForm::D:
            return unorm2_getNFDInstance(pErrorCode);
        case NormalizationForm::KC:
            return unorm2_getNFKCInstance(pErrorCode);
        case NormalizationForm::KD:
            return unorm2_getNFKDInstance(pErrorCode);
    }

    *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
    return nullptr;
}

/*
Function:
IsNormalized

Used by System.StringNormalizationExtensions.IsNormalized to detect if a string
is in a certain
Unicode Normalization Form.

Return values:
0: lpStr is not normalized.
1: lpStr is normalized.
-1: internal error during normalization.
*/
extern "C" int32_t GlobalizationNative_IsNormalized(
    NormalizationForm normalizationForm, const UChar* lpStr, int32_t cwStrLength)
{
    UErrorCode err = U_ZERO_ERROR;
    const UNormalizer2* pNormalizer = GetNormalizerForForm(normalizationForm, &err);
    UBool isNormalized = unorm2_isNormalized(pNormalizer, lpStr, cwStrLength, &err);

    if (U_SUCCESS(err))
    {
        return isNormalized == TRUE ? 1 : 0;
    }
    else
    {
        return -1;
    }
}

/*
Function:
NormalizeString

Used by System.StringNormalizationExtensions.Normalize to normalize a string
into a certain
Unicode Normalization Form.

Return values:
0: internal error during normalization.
>0: the length of the normalized string (not counting the null terminator).
*/
extern "C" int32_t GlobalizationNative_NormalizeString(
    NormalizationForm normalizationForm, const UChar* lpSrc, int32_t cwSrcLength, UChar* lpDst, int32_t cwDstLength)
{
    UErrorCode err = U_ZERO_ERROR;
    const UNormalizer2* pNormalizer = GetNormalizerForForm(normalizationForm, &err);
    int32_t normalizedLen = unorm2_normalize(pNormalizer, lpSrc, cwSrcLength, lpDst, cwDstLength, &err);

    return (U_SUCCESS(err) || (err == U_BUFFER_OVERFLOW_ERROR)) ? normalizedLen : 0;
}