diff options
-rw-r--r-- | src/pal/src/locale/utf8.cpp | 47 | ||||
-rw-r--r-- | src/pal/tests/palsuite/locale_info/MultiByteToWideChar/test4/test4.cpp | 52 | ||||
-rw-r--r-- | tests/src/Interop/StringMarshalling/UTF8/UTF8Test.cs | 52 |
3 files changed, 112 insertions, 39 deletions
diff --git a/src/pal/src/locale/utf8.cpp b/src/pal/src/locale/utf8.cpp index 87493a9673..d797f4557b 100644 --- a/src/pal/src/locale/utf8.cpp +++ b/src/pal/src/locale/utf8.cpp @@ -346,7 +346,7 @@ protected: throw ArgumentException("String 'chars' contains invalid Unicode code points."); // Now we aren't going to be false, so its OK to update chars - chars = &charTemp; + *chars = charTemp; } return true; @@ -412,7 +412,7 @@ protected: class DecoderReplacementFallbackBuffer : public DecoderFallbackBuffer { // Store our default string - WCHAR strDefault[4]; + WCHAR strDefault[2]; int strDefaultLength; int fallbackCount = -1; int fallbackIndex = -1; @@ -421,11 +421,8 @@ public: // Construction DecoderReplacementFallbackBuffer(DecoderReplacementFallback* fallback) { - // 2X in case we're a surrogate pair wcscpy_s(strDefault, sizeof(strDefault), fallback->GetDefaultString()); - wcscat_s(strDefault, sizeof(strDefault), fallback->GetDefaultString()); - strDefaultLength = 2 * PAL_wcslen((const WCHAR *)fallback->GetDefaultString()); - + strDefaultLength = PAL_wcslen((const WCHAR *)fallback->GetDefaultString()); } // Fallback Methods @@ -1081,9 +1078,14 @@ class UTF8Encoding return begin <= c && c <= end; } - size_t PtrDiff(void* ptr1, void* ptr2) + size_t PtrDiff(WCHAR* ptr1, WCHAR* ptr2) { - return (BYTE*)ptr2 - (BYTE*)ptr1; + return ptr1 - ptr2; + } + + size_t PtrDiff(BYTE* ptr1, BYTE* ptr2) + { + return ptr1 - ptr2; } void ThrowBytesOverflow() @@ -1118,6 +1120,28 @@ class UTF8Encoding } } + // During GetChars we had an invalid byte sequence + // pSrc is backed up to the start of the bad sequence if we didn't have room to + // fall it back. Otherwise pSrc remains where it is. + bool FallbackInvalidByteSequence(BYTE** pSrc, int ch, DecoderFallbackBuffer* fallback, WCHAR** pTarget) + { + // Get our byte[] + BYTE* pStart = *pSrc; + BYTE* bytesUnknown; + int size = GetBytesUnknown(pStart, ch, &bytesUnknown); + + // Do the actual fallback + if (!fallback->InternalFallback(bytesUnknown, *pSrc, pTarget, size)) + { + // Oops, it failed, back up to pStart + *pSrc = pStart; + return false; + } + + // It worked + return true; + } + int FallbackInvalidByteSequence(BYTE* pSrc, int ch, DecoderFallbackBuffer *fallback) { // Get our byte[] @@ -1211,7 +1235,7 @@ class UTF8Encoding public: UTF8Encoding(bool isThrowException) - : encoderReplacementFallback(W("\xFFFD")) + : encoderReplacementFallback(W("\xFFFD")), decoderReplacementFallback(W("\xFFFD")) { if (isThrowException) { @@ -1704,8 +1728,9 @@ public: fallback = decoderFallback->CreateFallbackBuffer(); fallback->InternalInitialize(bytes, pAllocatedBufferEnd); } - // This'll back us up the appropriate # of bytes if we didn't get anywhere - if (!FallbackInvalidByteSequence(pSrc, ch, fallback)) + + // That'll back us up the appropriate # of bytes if we didn't get anywhere + if (!FallbackInvalidByteSequence(&pSrc, ch, fallback, &pTarget)) { // Ran out of buffer space // Need to throw an exception? diff --git a/src/pal/tests/palsuite/locale_info/MultiByteToWideChar/test4/test4.cpp b/src/pal/tests/palsuite/locale_info/MultiByteToWideChar/test4/test4.cpp index 1d7234640b..7d382de298 100644 --- a/src/pal/tests/palsuite/locale_info/MultiByteToWideChar/test4/test4.cpp +++ b/src/pal/tests/palsuite/locale_info/MultiByteToWideChar/test4/test4.cpp @@ -152,57 +152,57 @@ int __cdecl main(int argc, char *argv[]) // Strings with errors // Incomplete 2 byte encoded character standalone - W(""), + W("\xFFFD"), // Incomplete 3 byte encoded character 1 byte missing standalone - W(""), + W("\xFFFD"), // Incomplete 3 byte encoded character 2 bytes missing standalone - W(""), + W("\xFFFD"), // Incomplete surrogate character 1 byte missing standalone - W(""), + W("\xFFFD"), // Incomplete surrogate character 2 bytes missing standalone - W(""), + W("\xFFFD"), // Incomplete surrogate character 3 bytes missing standalone - W(""), + W("\xFFFD"), // Trailing byte with no lead byte standalone - W(""), + W("\xFFFD"), // Incomplete 2 byte encoded character 1 byte missing between 1 byte chars - W("\x0041\x0042"), + W("\x0041\xFFFD\x0042"), // Incomplete 3 byte encoded character 1 byte missing between 1 byte chars - W("\x0041\x0042"), + W("\x0041\xFFFD\x0042"), // Incomplete 3 byte encoded character 2 bytes missing between 1 byte chars - W("\x0041\x0042"), + W("\x0041\xFFFD\x0042"), // Trailing byte with no lead byte between 1 byte chars - W("\x0041\x0042"), + W("\x0041\xFFFD\x0042"), // Incomplete 2 byte encoded character 1 byte missing before 1 byte char - W("\x0042"), + W("\xFFFD\x0042"), // Incomplete 3 byte encoded character 1 byte missing before 1 byte char - W("\x0042"), + W("\xFFFD\x0042"), // Incomplete 3 byte encoded character 2 bytes missing before 1 byte char - W("\x0042"), + W("\xFFFD\x0042"), // Trailing byte with no lead byte before 1 byte char - W("\x0042"), + W("\xFFFD\x0042"), // Incomplete 2 byte encoded character 1 byte missing after 1 byte char - W("\x0041"), + W("\x0041\xFFFD"), // Incomplete 3 byte encoded character 1 byte missing after 1 byte char - W("\x0041"), + W("\x0041\xFFFD"), // Incomplete 3 byte encoded character 2 bytes missing after 1 byte char - W("\x0041"), + W("\x0041\xFFFD"), // Trailing byte with no lead byte after 1 byte char - W("\x0041"), + W("\x0041\xFFFD"), // Incomplete 2 byte encoded character 1 byte missing between 2 byte chars - W("\x0080\x00FF"), + W("\x0080\xFFFD\x00FF"), // Incomplete 3 byte encoded character 1 byte missing between 2 byte chars - W("\x0080\x00FF"), + W("\x0080\xFFFD\x00FF"), // Incomplete 3 byte encoded character 2 bytes missing between 2 byte chars - W("\x0080\x00FF"), + W("\x0080\xFFFD\x00FF"), // Trailing byte with no lead byte between 2 byte chars - W("\x0080\x00FF"), + W("\x0080\xFFFD\x00FF"), // 2 byte encoded character in non-shortest form encodings (these are not allowed) - W(""), + W("\xFFFD\xFFFD"), // 3 byte encoded character in non-shortest form encodings (these are not allowed) - W(""), + W("\xFFFD\xFFFD"), // 4 byte encoded character in non-shortest form encodings (these are not allowed) - W(""), + W("\xFFFD\xFFFD\xFFFD"), }; for (int i = 0; i < (sizeof(utf8Strings) / sizeof(utf8Strings[0])); i++) diff --git a/tests/src/Interop/StringMarshalling/UTF8/UTF8Test.cs b/tests/src/Interop/StringMarshalling/UTF8/UTF8Test.cs index 7bfe19fdac..2e9e6858f8 100644 --- a/tests/src/Interop/StringMarshalling/UTF8/UTF8Test.cs +++ b/tests/src/Interop/StringMarshalling/UTF8/UTF8Test.cs @@ -126,8 +126,21 @@ class UTF8StructMarshalling public int index; } + unsafe struct UnmanagedStruct + { + public fixed byte psz[8]; + } + + [StructLayout(LayoutKind.Sequential, CharSet = CharSet.Ansi)] + struct ManagedStruct + { + [MarshalAs(UnmanagedType.ByValTStr, SizeConst = 8)] + public string str; + } + [DllImport("UTF8TestNative", CallingConvention = CallingConvention.Cdecl)] public static extern void TestStructWithUtf8Field(Utf8Struct utfStruct); + public static void TestUTF8StructMarshalling(string[] utf8Strings) { Utf8Struct utf8Struct = new Utf8Struct(); @@ -137,7 +150,40 @@ class UTF8StructMarshalling utf8Struct.index = i; TestStructWithUtf8Field(utf8Struct); } - } + if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) + CompareWithUTF8Encoding(); + } + + unsafe static void CompareWithUTF8Encoding() + { + // Compare results with UTF8Encoding + UnmanagedStruct ums; + ums.psz[0] = 0xFF; + ums.psz[1] = (byte)'a'; + ums.psz[2] = (byte)'b'; + ums.psz[3] = (byte)'c'; + ums.psz[4] = (byte)'d'; + ums.psz[5] = 0; + + IntPtr ptr = (IntPtr)(&ums); + ManagedStruct ms = Marshal.PtrToStructure<ManagedStruct>(ptr); + string actual = ms.str; + + UTF8Encoding uTF8Encoding = new UTF8Encoding(); + byte [] b = new byte[5]; + b[0] = 0xFF; + b[1] = (byte)'a'; + b[2] = (byte)'b'; + b[3] = (byte)'c'; + b[4] = (byte)'d'; + string expected = uTF8Encoding.GetString(b); + if (actual != expected) + { + Console.WriteLine("Actual:" + actual + " Length:" + actual.Length); + Console.WriteLine("Expected:" + expected + " Length:" + expected.Length); + throw new Exception("UTF8Encoding.GetString doesn't match with Utf8 String Marshaller result"); + } + } } // UTF8 string as delegate parameter @@ -165,6 +211,7 @@ class UTF8DelegateMarshalling } } + class Test { //test strings @@ -216,6 +263,7 @@ class Test // String.Empty tests UTF8StringTests.EmptyStringTest(); + return 100; } -}
\ No newline at end of file +} |