diff options
Diffstat (limited to 'src/pal/tests/palsuite/locale_info/MultiByteToWideChar/test4/test4.cpp')
-rw-r--r-- | src/pal/tests/palsuite/locale_info/MultiByteToWideChar/test4/test4.cpp | 230 |
1 files changed, 230 insertions, 0 deletions
diff --git a/src/pal/tests/palsuite/locale_info/MultiByteToWideChar/test4/test4.cpp b/src/pal/tests/palsuite/locale_info/MultiByteToWideChar/test4/test4.cpp new file mode 100644 index 0000000000..1d7234640b --- /dev/null +++ b/src/pal/tests/palsuite/locale_info/MultiByteToWideChar/test4/test4.cpp @@ -0,0 +1,230 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +/*============================================================================ +** +** Source: test4.c +** +** Purpose: Tests MultiByteToWideChar with a UTF-8 encoding +** +** +**==========================================================================*/ + +#include <palsuite.h> + +int __cdecl main(int argc, char *argv[]) +{ + int ret; + int ret2; + + if (PAL_Initialize(argc, argv)) + { + return FAIL; + } + + const char * const utf8Strings[] = + { + // Correct strings + + // Empty string + "", + // 1 byte encoded 1 character long string + "A", + // 2 byte encoded 1 character long string + "\xC2\x80", + // 3 byte encoded 1 character long string + "\xE0\xA0\x80", + // 1 byte encoded characters only + "ABCDEFGHIJKLMNOPQRSTUVWXYZ", + // valid 2 byte encoded characters only + "\xC2\x80\xC3\xBF\xC7\x81\xDF\xBF", + // valid 3 byte encoded characters only + "\xE0\xA0\x80\xE1\xB6\x88\xE1\x80\x80\xEF\xBF\xBF", + // 1 byte and 2 byte encoded characters interleaved 1:1 starting and ending with 1 byte char + "\x41\xC2\x80\x42\xC3\xBF\x43\xC7\x81\x44\xDF\xBF\x45", + // 1 byte and 2 byte encoded characters interleaved 1:1 starting with 1 byte char, ending with 2 byte one + "\x41\xC2\x80\x42\xC3\xBF\x43\xC7\x81\x44\xDF\xBF", + // 1 byte and 2 byte encoded characters interleaved 1:1 starting with 2 byte char, ending with 1 byte one + "\xC2\x80\x42\xC3\xBF\x43\xC7\x81\x44\xDF\xBF\x45", + // 1 byte and 2 byte encoded characters interleaved 1:1 starting and ending with 2 byte char + "\xC2\x80\x42\xC3\xBF\x43\xC7\x81\x44\xDF\xBF", + // 1 byte and 2 byte encoded characters interleaved 2:2 starting and ending with 1 byte char + "\x41\x42\xC2\x80\xC3\xBF\x43\x44\xC7\x81\xDF\xBF\x45\x46", + // 1 byte and 2 byte encoded characters interleaved 2:2 starting with 1 byte char, ending with 2 byte one + "\x41\x42\xC2\x80\xC3\xBF\x43\x44\xC7\x81\xDF\xBF", + // 1 byte and 2 byte encoded characters interleaved 2:2 starting with 2 byte char, ending with 1 byte one + "\xC2\x80\xC3\xBF\x43\x44\xC7\x81\xDF\xBF\x45\x46", + // 1 byte and 2 byte encoded characters interleaved 2:2 starting and ending with 2 byte char + "\xC2\x80\xC3\xBF\x43\x44\xC7\x81\xDF\xBF", + // surrogates + "\xF0\x90\x80\x80\xF0\x90\x89\x80\xF3\x80\x8E\xB0\xF4\x8F\xBF\xBF", + + // Strings with errors + // Incomplete 2 byte encoded character 1 byte missing standalone + "\xC2", + // Incomplete 3 byte encoded character 1 byte missing standalone + "\xE0\xA0", + // Incomplete 3 byte encoded character 2 bytes missing standalone + "\xE0", + // Incomplete surrogate character 1 byte missing standalone + "\xF0\x90\x80", + // Incomplete surrogate character 2 bytes missing standalone + "\xF0\x90", + // Incomplete surrogate character 3 bytes missing standalone + "\xF0", + // Trailing byte with no lead byte standalone + "\x80", + // Incomplete 2 byte encoded character 1 byte missing between 1 byte chars + "\x41\xC2\x42", + // Incomplete 3 byte encoded character 1 byte missing between 1 byte chars + "\x41\xE0\xA0\x42", + // Incomplete 3 byte encoded character 2 bytes missing between 1 byte chars + "\x41\xE0\x42", + // Trailing byte with no lead byte between 1 byte chars + "\x41\x80\x42", + // Incomplete 2 byte encoded character 1 byte missing before 1 byte char + "\xC2\x42", + // Incomplete 3 byte encoded character 1 byte missing before 1 byte char + "\xE0\xA0\x42", + // Incomplete 3 byte encoded character 2 bytes missing before 1 byte char + "\xE0\x42", + // Trailing byte with no lead byte before 1 byte char + "\x80\x42", + // Incomplete 2 byte encoded character 1 byte missing after 1 byte char + "\x41\xC2", + // Incomplete 3 byte encoded character 1 byte missing after 1 byte char + "\x41\xE0\xA0", + // Incomplete 3 byte encoded character 2 bytes missing after 1 byte char + "\x41\xE0", + // Trailing byte with no lead byte after 1 byte char + "\x41\x80", + // Incomplete 2 byte encoded character 1 byte missing between 2 byte chars + "\xC2\x80\xC2\xC3\xBF", + // Incomplete 3 byte encoded character 1 byte missing between 2 byte chars + "\xC2\x80\xE0\xA0\xC3\xBF", + // Incomplete 3 byte encoded character 2 bytes missing between 2 byte chars + "\xC2\x80\xE0\xC3\xBF", + // Trailing byte with no lead byte between 2 byte chars + "\xC2\x80\x80\xC3\xBF", + // 2 byte encoded character in non-shortest form encodings (these are not allowed) + "\xC0\x80", + // 3 byte encoded character in non-shortest form encodings (these are not allowed) + "\xE0\x80\x80", + // 4 byte encoded character in non-shortest form encodings (these are not allowed) + "\xF0\x80\x80\x80", + }; + + const WCHAR * const unicodeStrings[] = + { + // Empty string + W(""), + // 1 byte encoded 1 character long string + W("A"), + // 2 byte encoded 1 character long string + W("\x0080"), + // 3 byte encoded 1 character long string + W("\x0800"), + // 1 byte encoded characters only + W("ABCDEFGHIJKLMNOPQRSTUVWXYZ"), + // 2 byte encoded characters only + W("\x0080\x00FF\x01C1\x07FF"), + // valid 3 byte encoded characters only + W("\x0800\x1D88\x1000\xFFFF"), + // 1 byte and 2 byte encoded characters interleaved 1:1 starting and ending with 1 byte char + W("\x0041\x0080\x0042\x00FF\x0043\x01C1\x0044\x07FF\x0045"), + // 1 byte and 2 byte encoded characters interleaved 1:1 starting with 1 byte char, ending with 2 byte one + W("\x0041\x0080\x0042\x00FF\x0043\x01C1\x0044\x07FF"), + // 1 byte and 2 byte encoded characters interleaved 1:1 starting with 2 byte char, ending with 1 byte one + W("\x0080\x0042\x00FF\x0043\x01C1\x0044\x07FF\x0045"), + // 1 byte and 2 byte encoded characters interleaved 1:1 starting and ending with 2 byte char + W("\x0080\x0042\x00FF\x0043\x01C1\x0044\x07FF"), + // 1 byte and 2 byte encoded characters interleaved 2:2 starting and ending with 1 byte char + W("\x0041\x0042\x0080\x00FF\x0043\x0044\x01C1\x07FF\x0045\x0046"), + // 1 byte and 2 byte encoded characters interleaved 2:2 starting with 1 byte char, ending with 2 byte one + W("\x0041\x0042\x0080\x00FF\x0043\x0044\x01C1\x07FF"), + // 1 byte and 2 byte encoded characters interleaved 2:2 starting with 2 byte char, ending with 1 byte one + W("\x0080\x00FF\x0043\x0044\x01C1\x07FF\x0045\x0046"), + // 1 byte and 2 byte encoded characters interleaved 2:2 starting and ending with 2 byte char + W("\x0080\x00FF\x0043\x0044\x01C1\x07FF"), + // surrogates + W("\xD800\xDC00\xD800\xDE40\xDAC0\xDFB0\xDBFF\xDFFF"), + + // Strings with errors + // Incomplete 2 byte encoded character standalone + W(""), + // Incomplete 3 byte encoded character 1 byte missing standalone + W(""), + // Incomplete 3 byte encoded character 2 bytes missing standalone + W(""), + // Incomplete surrogate character 1 byte missing standalone + W(""), + // Incomplete surrogate character 2 bytes missing standalone + W(""), + // Incomplete surrogate character 3 bytes missing standalone + W(""), + // Trailing byte with no lead byte standalone + W(""), + // Incomplete 2 byte encoded character 1 byte missing between 1 byte chars + W("\x0041\x0042"), + // Incomplete 3 byte encoded character 1 byte missing between 1 byte chars + W("\x0041\x0042"), + // Incomplete 3 byte encoded character 2 bytes missing between 1 byte chars + W("\x0041\x0042"), + // Trailing byte with no lead byte between 1 byte chars + W("\x0041\x0042"), + // Incomplete 2 byte encoded character 1 byte missing before 1 byte char + W("\x0042"), + // Incomplete 3 byte encoded character 1 byte missing before 1 byte char + W("\x0042"), + // Incomplete 3 byte encoded character 2 bytes missing before 1 byte char + W("\x0042"), + // Trailing byte with no lead byte before 1 byte char + W("\x0042"), + // Incomplete 2 byte encoded character 1 byte missing after 1 byte char + W("\x0041"), + // Incomplete 3 byte encoded character 1 byte missing after 1 byte char + W("\x0041"), + // Incomplete 3 byte encoded character 2 bytes missing after 1 byte char + W("\x0041"), + // Trailing byte with no lead byte after 1 byte char + W("\x0041"), + // Incomplete 2 byte encoded character 1 byte missing between 2 byte chars + W("\x0080\x00FF"), + // Incomplete 3 byte encoded character 1 byte missing between 2 byte chars + W("\x0080\x00FF"), + // Incomplete 3 byte encoded character 2 bytes missing between 2 byte chars + W("\x0080\x00FF"), + // Trailing byte with no lead byte between 2 byte chars + W("\x0080\x00FF"), + // 2 byte encoded character in non-shortest form encodings (these are not allowed) + W(""), + // 3 byte encoded character in non-shortest form encodings (these are not allowed) + W(""), + // 4 byte encoded character in non-shortest form encodings (these are not allowed) + W(""), + }; + + for (int i = 0; i < (sizeof(utf8Strings) / sizeof(utf8Strings[0])); i++) + { + ret = MultiByteToWideChar(CP_UTF8, 0, utf8Strings[i], -1, NULL, 0); + WCHAR* wideBuffer = (WCHAR*)malloc(ret * sizeof(WCHAR)); + ret2 = MultiByteToWideChar(CP_UTF8, 0, utf8Strings[i], -1, wideBuffer, ret); + if (ret != ret2) + { + Fail("MultiByteToWideChar string %d: returned different string length for empty and real dest buffers!\n" + "Got %d for the empty one, %d for real one.\n", i, ret2, ret); + } + + if (wcscmp(wideBuffer, unicodeStrings[i]) != 0) + { + Fail("MultiByteToWideChar string %d: the resulting string doesn't match the expected one!\n", i); + } + + free(wideBuffer); + } + + PAL_Terminate(); + + return PASS; +} |