summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
Diffstat (limited to 'test')
-rw-r--r--[-rwxr-xr-x]test/Makefile0
-rw-r--r--[-rwxr-xr-x]test/chard7.txt0
-rw-r--r--[-rwxr-xr-x]test/chardos.txt0
-rw-r--r--[-rwxr-xr-x]test/charu7.txt0
-rw-r--r--[-rwxr-xr-x]test/dos_bin.txtbin144 -> 144 bytes
-rw-r--r--[-rwxr-xr-x]test/gb18030.txt0
-rw-r--r--[-rwxr-xr-x]test/gb18030b.txt0
-rw-r--r--[-rwxr-xr-x]test/gb18030u.txt0
-rw-r--r--[-rwxr-xr-x]test/invalhig.txtbin30 -> 30 bytes
-rw-r--r--[-rwxr-xr-x]test/invallow.txtbin30 -> 30 bytes
-rw-r--r--test/setlocale.c89
-rw-r--r--test/setlocale.pngbin0 -> 18506 bytes
-rwxr-xr-xtest/testcmd.bat2
-rwxr-xr-xtest/testps16.ps1bin846 -> 840 bytes
-rwxr-xr-xtest/testps8.ps17
-rw-r--r--[-rwxr-xr-x]test/testu16.c37
-rw-r--r--[-rwxr-xr-x]test/uni_nl_ëäï.txt0
-rw-r--r--[-rwxr-xr-x]test/unix_bin.txtbin136 -> 136 bytes
-rw-r--r--[-rwxr-xr-x]test/utf16.txtbin66 -> 66 bytes
-rw-r--r--[-rwxr-xr-x]test/utf16be.txtbin1926 -> 1926 bytes
-rw-r--r--[-rwxr-xr-x]test/utf16bin.txtbin290 -> 290 bytes
-rw-r--r--[-rwxr-xr-x]test/utf16le.txtbin1926 -> 1926 bytes
-rw-r--r--[-rwxr-xr-x]test/utf16m.txtbin54 -> 54 bytes
-rw-r--r--[-rwxr-xr-x]test/utf16u.txtbin54 -> 54 bytes
-rw-r--r--[-rwxr-xr-x]test/utf8dos.txt0
-rw-r--r--[-rwxr-xr-x]test/utf8dosn.txt0
-rw-r--r--[-rwxr-xr-x]test/utf8unix.txt0
-rw-r--r--[-rwxr-xr-x]test/utf8unxb.txt0
28 files changed, 105 insertions, 30 deletions
diff --git a/test/Makefile b/test/Makefile
index e2a55e9..e2a55e9 100755..100644
--- a/test/Makefile
+++ b/test/Makefile
diff --git a/test/chard7.txt b/test/chard7.txt
index 3b3c9fc..3b3c9fc 100755..100644
--- a/test/chard7.txt
+++ b/test/chard7.txt
diff --git a/test/chardos.txt b/test/chardos.txt
index ff85358..ff85358 100755..100644
--- a/test/chardos.txt
+++ b/test/chardos.txt
diff --git a/test/charu7.txt b/test/charu7.txt
index 0073580..0073580 100755..100644
--- a/test/charu7.txt
+++ b/test/charu7.txt
diff --git a/test/dos_bin.txt b/test/dos_bin.txt
index 82c9864..82c9864 100755..100644
--- a/test/dos_bin.txt
+++ b/test/dos_bin.txt
Binary files differ
diff --git a/test/gb18030.txt b/test/gb18030.txt
index 9e294aa..9e294aa 100755..100644
--- a/test/gb18030.txt
+++ b/test/gb18030.txt
diff --git a/test/gb18030b.txt b/test/gb18030b.txt
index 2b08b78..2b08b78 100755..100644
--- a/test/gb18030b.txt
+++ b/test/gb18030b.txt
diff --git a/test/gb18030u.txt b/test/gb18030u.txt
index 7687092..7687092 100755..100644
--- a/test/gb18030u.txt
+++ b/test/gb18030u.txt
diff --git a/test/invalhig.txt b/test/invalhig.txt
index da9ce0f..da9ce0f 100755..100644
--- a/test/invalhig.txt
+++ b/test/invalhig.txt
Binary files differ
diff --git a/test/invallow.txt b/test/invallow.txt
index c870871..c870871 100755..100644
--- a/test/invallow.txt
+++ b/test/invallow.txt
Binary files differ
diff --git a/test/setlocale.c b/test/setlocale.c
new file mode 100644
index 0000000..005f0fb
--- /dev/null
+++ b/test/setlocale.c
@@ -0,0 +1,89 @@
+#include <stdio.h>
+#include <fcntl.h>
+#include <windows.h>
+#include <locale.h>
+
+/*
+
+ This program proves that when you set the locale to "", the Chinese ANSI CP936
+ encoded text is printed wrongly in a simplified Chinese regional setting.
+ UTF-8 is also printed wrongly.
+
+To test this program you first need to change the Windows locale setting to
+simplified Chinese. There is no problem doing that, because the "display
+language" will stay the original language, and you can still use your Windows.
+
+Control Panel > Region and Language > Administrative > Change system locale
+
+Select simplified Chinese and reboot your PC.
+
+For example output see setlocale.png.
+
+ */
+
+print_string(const char* str, const char* ustr, const wchar_t *wstr) {
+ int prevmode;
+ UINT outputCP;
+ int utf8 = 0;
+
+ /* Set utf8 to 1 to print UTF-8 text.
+ If you print both UTF-16 en UTF-8 in one program the console gets
+ mixed up. When the UTF-8 is printed after UTF-16 the UTF-16 text
+ is displayed wrongly.
+ */
+
+ if ( ! utf8 ) {
+ /* When the locale is set to "" the following line will produce wrong output */
+ printf ("ANSI CP936 %s\n",str);
+
+ /* UTF-16 will produce correct output in all cases. */
+ prevmode = _setmode(_fileno(stdout), _O_U16TEXT);
+ wprintf(L"UTF-16 %ls\n",wstr);
+ _setmode(_fileno(stdout), prevmode);
+ } else {
+
+ /* UTF-8 will produce wrong output when the locale is "".
+ When the locale is "C" wrong output with raster font, and correct output
+ with TrueType font. */
+ outputCP = GetConsoleOutputCP();
+ SetConsoleOutputCP(CP_UTF8);
+ wprintf(L"UTF-8 %S\n",ustr);
+ SetConsoleOutputCP(outputCP);
+
+ /* The code below produces wrong output in all cases */
+ // prevmode = _setmode(_fileno(stdout), _O_U8TEXT);
+ // wprintf(L"UTF-8 %S",ustr);
+ // _setmode(_fileno(stdout), prevmode);
+ }
+
+}
+
+int main() {
+
+ char str[5]; /* ANSI CP936 */
+ char ustr[15]; /* UTF-8 */
+ wchar_t wstr[10]; /* UTF-16 */
+
+/* Create ANSI CP936 string (meaning: Western-European). */
+ str[0] = 0xce;
+ str[1] = 0xf7;
+ str[2] = 0xc5;
+ str[3] = 0xb7;
+ str[4] = '\0';
+/* Convert CP936 to UTF-16. */
+ MultiByteToWideChar(936, 0, str, -1, wstr, sizeof(wstr));
+/* Convert UTF-16 to UTF-8 */
+ WideCharToMultiByte(CP_UTF8, 0, wstr, -1, ustr, sizeof(ustr), NULL, NULL);
+
+
+
+ setlocale (LC_ALL, "");
+ printf("==> setlocale (LC_ALL, \"\");\n");
+ print_string(str, ustr, wstr);
+
+ setlocale (LC_ALL, "C");
+ printf("\n==> setlocale (LC_ALL, \"C\");\n");
+ print_string(str, ustr, wstr);
+
+ return 0;
+}
diff --git a/test/setlocale.png b/test/setlocale.png
new file mode 100644
index 0000000..9920216
--- /dev/null
+++ b/test/setlocale.png
Binary files differ
diff --git a/test/testcmd.bat b/test/testcmd.bat
index 9431352..8acea08 100755
--- a/test/testcmd.bat
+++ b/test/testcmd.bat
@@ -8,7 +8,7 @@ REM This script will only run on Windows 7 and higher.
REM switch to UTF-8 code page
chcp 65001
-dos2unix -i uni_el_αρχείο.txt uni_zh_文件.txt
+dos2unix -D unicode -i uni_el_αρχείο.txt uni_zh_文件.txt
REM set code page back to original value
chcp 850
diff --git a/test/testps16.ps1 b/test/testps16.ps1
index 059ec03..c927442 100755
--- a/test/testps16.ps1
+++ b/test/testps16.ps1
Binary files differ
diff --git a/test/testps8.ps1 b/test/testps8.ps1
index 7bf72db..dc2bcfe 100755
--- a/test/testps8.ps1
+++ b/test/testps8.ps1
@@ -8,5 +8,8 @@ dos2unix -i uni_el_αρχείο.txt uni_zh_文件.txt
echo "test select-string:"
-# The following is not working. Why?
-dos2unix -i uni* | select-string -encoding utf8 -pattern αρχ \ No newline at end of file
+# select-string requires a BOM.
+$env:DOS2UNIX_DISPLAY_ENC = "utf8bom"
+dos2unix -i uni* | select-string -encoding utf8 -pattern αρχ
+
+$env:DOS2UNIX_DISPLAY_ENC = ""
diff --git a/test/testu16.c b/test/testu16.c
index 8ed706a..d0a8f9e 100755..100644
--- a/test/testu16.c
+++ b/test/testu16.c
@@ -3,44 +3,27 @@
#include <fcntl.h>
#include <io.h>
-/* This program demonstrates that when Unicode printed text is redirected to
- a file, the file is not in correct UTF-16.
+/* This program demonstrates Unicode UTF-16 printed text redirected to a
+ correct UTF-16 file.
+
+ .\testu16.exe > out.txt
+
*/
int main () {
int prevmode;
-/*
- When the output of this program is redirected to a file in Windows Command
- Prompt you get for line breaks 0d0a 00, while it should be 0d00 0a00.
-
-c:\test>.\testu16.exe > o.txt
-
-c:\test>xxd o.txt
-0000000: 6f00 6e00 6500 0d0a 0074 0077 006f 000d o.n.e....t.w.o..
-0000010: 0a00 7400 6800 7200 6500 6500 0d0a 00 ..t.h.r.e.e....
-
-
-
- When the output is redirected to a file in PowerShell, null characters
- 0000 are inserted. It looks like UTF-32 with an UTF-16 BOM and UTF-16 line
- breaks.
-
-PS C:\test> .\testu16.exe > p.txt
-PS C:\test> xxd p.txt
-0000000: fffe 6f00 0000 6e00 0000 6500 0000 0d00 ..o...n...e.....
-0000010: 0a00 0000 7400 0000 7700 0000 6f00 0000 ....t...w...o...
-0000020: 0d00 0a00 0000 7400 0000 6800 0000 7200 ......t...h...r.
-0000030: 0000 6500 0000 6500 0000 0d00 0a00 0000 ..e...e.........
-0000040: 0d00 0a00 ....
-
-*/
prevmode = _setmode(_fileno(stdout), _O_U16TEXT);
+ /* We need to print an UTF-16 BOM for correct redirection in PowerShell. */
+ fwprintf(stdout, L"\xfeff");
fwprintf(stdout,L"one\n");
fwprintf(stdout,L"two\n");
fwprintf(stdout,L"three\n");
+ /* Flushing stdout is required to get correct UTF-16.
+ This is required for both CMD.exe and PowerShell. */
+ fflush(stdout);
_setmode(_fileno(stdout), prevmode);
diff --git a/test/uni_nl_ëäï.txt b/test/uni_nl_ëäï.txt
index a9cd879..a9cd879 100755..100644
--- a/test/uni_nl_ëäï.txt
+++ b/test/uni_nl_ëäï.txt
diff --git a/test/unix_bin.txt b/test/unix_bin.txt
index b2cafbc..b2cafbc 100755..100644
--- a/test/unix_bin.txt
+++ b/test/unix_bin.txt
Binary files differ
diff --git a/test/utf16.txt b/test/utf16.txt
index 868b5af..868b5af 100755..100644
--- a/test/utf16.txt
+++ b/test/utf16.txt
Binary files differ
diff --git a/test/utf16be.txt b/test/utf16be.txt
index 1be5db9..1be5db9 100755..100644
--- a/test/utf16be.txt
+++ b/test/utf16be.txt
Binary files differ
diff --git a/test/utf16bin.txt b/test/utf16bin.txt
index 6c4216b..6c4216b 100755..100644
--- a/test/utf16bin.txt
+++ b/test/utf16bin.txt
Binary files differ
diff --git a/test/utf16le.txt b/test/utf16le.txt
index db9f535..db9f535 100755..100644
--- a/test/utf16le.txt
+++ b/test/utf16le.txt
Binary files differ
diff --git a/test/utf16m.txt b/test/utf16m.txt
index cd523f8..cd523f8 100755..100644
--- a/test/utf16m.txt
+++ b/test/utf16m.txt
Binary files differ
diff --git a/test/utf16u.txt b/test/utf16u.txt
index 2bd9b3b..2bd9b3b 100755..100644
--- a/test/utf16u.txt
+++ b/test/utf16u.txt
Binary files differ
diff --git a/test/utf8dos.txt b/test/utf8dos.txt
index 76afa4f..76afa4f 100755..100644
--- a/test/utf8dos.txt
+++ b/test/utf8dos.txt
diff --git a/test/utf8dosn.txt b/test/utf8dosn.txt
index 154aa88..154aa88 100755..100644
--- a/test/utf8dosn.txt
+++ b/test/utf8dosn.txt
diff --git a/test/utf8unix.txt b/test/utf8unix.txt
index 8b93c0c..8b93c0c 100755..100644
--- a/test/utf8unix.txt
+++ b/test/utf8unix.txt
diff --git a/test/utf8unxb.txt b/test/utf8unxb.txt
index f670e9b..f670e9b 100755..100644
--- a/test/utf8unxb.txt
+++ b/test/utf8unxb.txt