summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/pal/src/locale/utf8.cpp47
-rw-r--r--src/pal/tests/palsuite/locale_info/MultiByteToWideChar/test4/test4.cpp52
-rw-r--r--tests/src/Interop/StringMarshalling/UTF8/UTF8Test.cs52
3 files changed, 112 insertions, 39 deletions
diff --git a/src/pal/src/locale/utf8.cpp b/src/pal/src/locale/utf8.cpp
index 87493a9673..d797f4557b 100644
--- a/src/pal/src/locale/utf8.cpp
+++ b/src/pal/src/locale/utf8.cpp
@@ -346,7 +346,7 @@ protected:
throw ArgumentException("String 'chars' contains invalid Unicode code points.");
// Now we aren't going to be false, so its OK to update chars
- chars = &charTemp;
+ *chars = charTemp;
}
return true;
@@ -412,7 +412,7 @@ protected:
class DecoderReplacementFallbackBuffer : public DecoderFallbackBuffer
{
// Store our default string
- WCHAR strDefault[4];
+ WCHAR strDefault[2];
int strDefaultLength;
int fallbackCount = -1;
int fallbackIndex = -1;
@@ -421,11 +421,8 @@ public:
// Construction
DecoderReplacementFallbackBuffer(DecoderReplacementFallback* fallback)
{
- // 2X in case we're a surrogate pair
wcscpy_s(strDefault, sizeof(strDefault), fallback->GetDefaultString());
- wcscat_s(strDefault, sizeof(strDefault), fallback->GetDefaultString());
- strDefaultLength = 2 * PAL_wcslen((const WCHAR *)fallback->GetDefaultString());
-
+ strDefaultLength = PAL_wcslen((const WCHAR *)fallback->GetDefaultString());
}
// Fallback Methods
@@ -1081,9 +1078,14 @@ class UTF8Encoding
return begin <= c && c <= end;
}
- size_t PtrDiff(void* ptr1, void* ptr2)
+ size_t PtrDiff(WCHAR* ptr1, WCHAR* ptr2)
{
- return (BYTE*)ptr2 - (BYTE*)ptr1;
+ return ptr1 - ptr2;
+ }
+
+ size_t PtrDiff(BYTE* ptr1, BYTE* ptr2)
+ {
+ return ptr1 - ptr2;
}
void ThrowBytesOverflow()
@@ -1118,6 +1120,28 @@ class UTF8Encoding
}
}
+ // During GetChars we had an invalid byte sequence
+ // pSrc is backed up to the start of the bad sequence if we didn't have room to
+ // fall it back. Otherwise pSrc remains where it is.
+ bool FallbackInvalidByteSequence(BYTE** pSrc, int ch, DecoderFallbackBuffer* fallback, WCHAR** pTarget)
+ {
+ // Get our byte[]
+ BYTE* pStart = *pSrc;
+ BYTE* bytesUnknown;
+ int size = GetBytesUnknown(pStart, ch, &bytesUnknown);
+
+ // Do the actual fallback
+ if (!fallback->InternalFallback(bytesUnknown, *pSrc, pTarget, size))
+ {
+ // Oops, it failed, back up to pStart
+ *pSrc = pStart;
+ return false;
+ }
+
+ // It worked
+ return true;
+ }
+
int FallbackInvalidByteSequence(BYTE* pSrc, int ch, DecoderFallbackBuffer *fallback)
{
// Get our byte[]
@@ -1211,7 +1235,7 @@ class UTF8Encoding
public:
UTF8Encoding(bool isThrowException)
- : encoderReplacementFallback(W("\xFFFD"))
+ : encoderReplacementFallback(W("\xFFFD")), decoderReplacementFallback(W("\xFFFD"))
{
if (isThrowException)
{
@@ -1704,8 +1728,9 @@ public:
fallback = decoderFallback->CreateFallbackBuffer();
fallback->InternalInitialize(bytes, pAllocatedBufferEnd);
}
- // This'll back us up the appropriate # of bytes if we didn't get anywhere
- if (!FallbackInvalidByteSequence(pSrc, ch, fallback))
+
+ // That'll back us up the appropriate # of bytes if we didn't get anywhere
+ if (!FallbackInvalidByteSequence(&pSrc, ch, fallback, &pTarget))
{
// Ran out of buffer space
// Need to throw an exception?
diff --git a/src/pal/tests/palsuite/locale_info/MultiByteToWideChar/test4/test4.cpp b/src/pal/tests/palsuite/locale_info/MultiByteToWideChar/test4/test4.cpp
index 1d7234640b..7d382de298 100644
--- a/src/pal/tests/palsuite/locale_info/MultiByteToWideChar/test4/test4.cpp
+++ b/src/pal/tests/palsuite/locale_info/MultiByteToWideChar/test4/test4.cpp
@@ -152,57 +152,57 @@ int __cdecl main(int argc, char *argv[])
// Strings with errors
// Incomplete 2 byte encoded character standalone
- W(""),
+ W("\xFFFD"),
// Incomplete 3 byte encoded character 1 byte missing standalone
- W(""),
+ W("\xFFFD"),
// Incomplete 3 byte encoded character 2 bytes missing standalone
- W(""),
+ W("\xFFFD"),
// Incomplete surrogate character 1 byte missing standalone
- W(""),
+ W("\xFFFD"),
// Incomplete surrogate character 2 bytes missing standalone
- W(""),
+ W("\xFFFD"),
// Incomplete surrogate character 3 bytes missing standalone
- W(""),
+ W("\xFFFD"),
// Trailing byte with no lead byte standalone
- W(""),
+ W("\xFFFD"),
// Incomplete 2 byte encoded character 1 byte missing between 1 byte chars
- W("\x0041\x0042"),
+ W("\x0041\xFFFD\x0042"),
// Incomplete 3 byte encoded character 1 byte missing between 1 byte chars
- W("\x0041\x0042"),
+ W("\x0041\xFFFD\x0042"),
// Incomplete 3 byte encoded character 2 bytes missing between 1 byte chars
- W("\x0041\x0042"),
+ W("\x0041\xFFFD\x0042"),
// Trailing byte with no lead byte between 1 byte chars
- W("\x0041\x0042"),
+ W("\x0041\xFFFD\x0042"),
// Incomplete 2 byte encoded character 1 byte missing before 1 byte char
- W("\x0042"),
+ W("\xFFFD\x0042"),
// Incomplete 3 byte encoded character 1 byte missing before 1 byte char
- W("\x0042"),
+ W("\xFFFD\x0042"),
// Incomplete 3 byte encoded character 2 bytes missing before 1 byte char
- W("\x0042"),
+ W("\xFFFD\x0042"),
// Trailing byte with no lead byte before 1 byte char
- W("\x0042"),
+ W("\xFFFD\x0042"),
// Incomplete 2 byte encoded character 1 byte missing after 1 byte char
- W("\x0041"),
+ W("\x0041\xFFFD"),
// Incomplete 3 byte encoded character 1 byte missing after 1 byte char
- W("\x0041"),
+ W("\x0041\xFFFD"),
// Incomplete 3 byte encoded character 2 bytes missing after 1 byte char
- W("\x0041"),
+ W("\x0041\xFFFD"),
// Trailing byte with no lead byte after 1 byte char
- W("\x0041"),
+ W("\x0041\xFFFD"),
// Incomplete 2 byte encoded character 1 byte missing between 2 byte chars
- W("\x0080\x00FF"),
+ W("\x0080\xFFFD\x00FF"),
// Incomplete 3 byte encoded character 1 byte missing between 2 byte chars
- W("\x0080\x00FF"),
+ W("\x0080\xFFFD\x00FF"),
// Incomplete 3 byte encoded character 2 bytes missing between 2 byte chars
- W("\x0080\x00FF"),
+ W("\x0080\xFFFD\x00FF"),
// Trailing byte with no lead byte between 2 byte chars
- W("\x0080\x00FF"),
+ W("\x0080\xFFFD\x00FF"),
// 2 byte encoded character in non-shortest form encodings (these are not allowed)
- W(""),
+ W("\xFFFD\xFFFD"),
// 3 byte encoded character in non-shortest form encodings (these are not allowed)
- W(""),
+ W("\xFFFD\xFFFD"),
// 4 byte encoded character in non-shortest form encodings (these are not allowed)
- W(""),
+ W("\xFFFD\xFFFD\xFFFD"),
};
for (int i = 0; i < (sizeof(utf8Strings) / sizeof(utf8Strings[0])); i++)
diff --git a/tests/src/Interop/StringMarshalling/UTF8/UTF8Test.cs b/tests/src/Interop/StringMarshalling/UTF8/UTF8Test.cs
index 7bfe19fdac..2e9e6858f8 100644
--- a/tests/src/Interop/StringMarshalling/UTF8/UTF8Test.cs
+++ b/tests/src/Interop/StringMarshalling/UTF8/UTF8Test.cs
@@ -126,8 +126,21 @@ class UTF8StructMarshalling
public int index;
}
+ unsafe struct UnmanagedStruct
+ {
+ public fixed byte psz[8];
+ }
+
+ [StructLayout(LayoutKind.Sequential, CharSet = CharSet.Ansi)]
+ struct ManagedStruct
+ {
+ [MarshalAs(UnmanagedType.ByValTStr, SizeConst = 8)]
+ public string str;
+ }
+
[DllImport("UTF8TestNative", CallingConvention = CallingConvention.Cdecl)]
public static extern void TestStructWithUtf8Field(Utf8Struct utfStruct);
+
public static void TestUTF8StructMarshalling(string[] utf8Strings)
{
Utf8Struct utf8Struct = new Utf8Struct();
@@ -137,7 +150,40 @@ class UTF8StructMarshalling
utf8Struct.index = i;
TestStructWithUtf8Field(utf8Struct);
}
- }
+ if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
+ CompareWithUTF8Encoding();
+ }
+
+ unsafe static void CompareWithUTF8Encoding()
+ {
+ // Compare results with UTF8Encoding
+ UnmanagedStruct ums;
+ ums.psz[0] = 0xFF;
+ ums.psz[1] = (byte)'a';
+ ums.psz[2] = (byte)'b';
+ ums.psz[3] = (byte)'c';
+ ums.psz[4] = (byte)'d';
+ ums.psz[5] = 0;
+
+ IntPtr ptr = (IntPtr)(&ums);
+ ManagedStruct ms = Marshal.PtrToStructure<ManagedStruct>(ptr);
+ string actual = ms.str;
+
+ UTF8Encoding uTF8Encoding = new UTF8Encoding();
+ byte [] b = new byte[5];
+ b[0] = 0xFF;
+ b[1] = (byte)'a';
+ b[2] = (byte)'b';
+ b[3] = (byte)'c';
+ b[4] = (byte)'d';
+ string expected = uTF8Encoding.GetString(b);
+ if (actual != expected)
+ {
+ Console.WriteLine("Actual:" + actual + " Length:" + actual.Length);
+ Console.WriteLine("Expected:" + expected + " Length:" + expected.Length);
+ throw new Exception("UTF8Encoding.GetString doesn't match with Utf8 String Marshaller result");
+ }
+ }
}
// UTF8 string as delegate parameter
@@ -165,6 +211,7 @@ class UTF8DelegateMarshalling
}
}
+
class Test
{
//test strings
@@ -216,6 +263,7 @@ class Test
// String.Empty tests
UTF8StringTests.EmptyStringTest();
+
return 100;
}
-} \ No newline at end of file
+}