summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorDongHun Kwak <dh0128.kwak@samsung.com>2016-10-20 13:28:30 +0900
committerDongHun Kwak <dh0128.kwak@samsung.com>2016-10-20 13:28:33 +0900
commit6ad0574136d3b23b486db4fa359672b9f731e235 (patch)
treeae96a2384615b0132969083bc097c918acf4e9f7 /test
parentde90208a6d3f0b5c5944c4aa4d4ea95fc2611470 (diff)
downloaddos2unix-6ad0574136d3b23b486db4fa359672b9f731e235.tar.gz
dos2unix-6ad0574136d3b23b486db4fa359672b9f731e235.tar.bz2
dos2unix-6ad0574136d3b23b486db4fa359672b9f731e235.zip
Imported Upstream version 7.0
Change-Id: Ib83b48cbb624fbb64cd653899c84ddffdcd21860 Signed-off-by: DongHun Kwak <dh0128.kwak@samsung.com>
Diffstat (limited to 'test')
-rwxr-xr-xtest/Makefile35
-rwxr-xr-xtest/ascii.t84
-rwxr-xr-xtest/chard7.txt14
-rwxr-xr-xtest/chardos.txt14
-rwxr-xr-xtest/charu7.txt14
-rw-r--r--test/charunix.txt14
-rw-r--r--test/cp_1252.txt14
-rw-r--r--test/cp_437.txt14
-rw-r--r--test/cp_850.txt14
-rw-r--r--test/cp_860.txt14
-rw-r--r--test/cp_863.txt14
-rw-r--r--test/cp_865.txt14
-rw-r--r--test/dos.txt6
-rw-r--r--test/dos_bom.txt6
-rw-r--r--test/dos_dbl.txt12
-rwxr-xr-xtest/iso.t84
-rw-r--r--test/iso_1252.txt14
-rw-r--r--test/iso_437.txt14
-rw-r--r--test/iso_850.txt14
-rw-r--r--test/iso_860.txt14
-rw-r--r--test/iso_863.txt14
-rw-r--r--test/iso_865.txt14
-rw-r--r--test/mac.txt1
-rw-r--r--test/mac_dbl.txt1
-rwxr-xr-xtest/misc.t92
-rw-r--r--test/mixed.txt13
-rw-r--r--test/mixedd2u.txt13
-rw-r--r--test/mixedm2u.txt18
-rw-r--r--test/mixedu2d.txt13
-rw-r--r--test/mixedu2m.txt7
-rwxr-xr-xtest/symlink.t119
-rw-r--r--test/unix.txt6
-rw-r--r--test/unix_bom.txt6
-rw-r--r--test/unix_dbl.txt12
-rwxr-xr-xtest/utf16.t74
-rwxr-xr-xtest/utf16.txtbin0 -> 66 bytes
-rwxr-xr-xtest/utf16be.txt (renamed from test/utf16_be.txt)bin1926 -> 1926 bytes
-rw-r--r--test/utf16ben.txt (renamed from test/utf16_be_nobom.txt)bin1924 -> 1924 bytes
-rwxr-xr-xtest/utf16le.txt (renamed from test/utf16_le.txt)bin1926 -> 1926 bytes
-rw-r--r--test/utf16len.txt (renamed from test/utf16_le_nobom.txt)bin1924 -> 1924 bytes
-rwxr-xr-xtest/utf16m.txtbin0 -> 54 bytes
-rwxr-xr-xtest/utf16u.txtbin0 -> 54 bytes
-rwxr-xr-xtest/utf8.t33
-rwxr-xr-xtest/utf8dos.txt51
-rwxr-xr-xtest/utf8dosn.txt51
-rwxr-xr-xtest/utf8unix.txt51
-rwxr-xr-xtest/utf8unxb.txt51
47 files changed, 1063 insertions, 0 deletions
diff --git a/test/Makefile b/test/Makefile
new file mode 100755
index 0000000..25832a5
--- /dev/null
+++ b/test/Makefile
@@ -0,0 +1,35 @@
+TESTS = ascii.t iso.t utf8.t utf16.t misc.t
+
+ifneq ($(findstring MINGW,$(shell uname)),MINGW)
+TESTS += symlink.t
+endif
+
+
+all: test
+
+check: test
+
+test:
+ prove -v $(TESTS)
+
+
+ascii:
+ prove -v $@.t
+
+iso:
+ prove -v $@.t
+
+misc:
+ prove -v $@.t
+
+utf8:
+ prove -v $@.t
+
+utf16:
+ prove -v $@.t
+
+symlink:
+ prove -v $@.t
+
+clean:
+ rm -f out*.txt in*.txt
diff --git a/test/ascii.t b/test/ascii.t
new file mode 100755
index 0000000..64f6c7b
--- /dev/null
+++ b/test/ascii.t
@@ -0,0 +1,84 @@
+#!/usr/bin/perl
+
+# Requires perl-Test-Simple installation.
+use Test::More tests => 30;
+
+$suffix = "";
+if (-e "../dos2unix.exe") {
+ $suffix = ".exe";
+}
+$DOS2UNIX = "../dos2unix" . $suffix;
+$MAC2UNIX = "../mac2unix" . $suffix;
+$UNIX2DOS = "../unix2dos" . $suffix;
+$UNIX2MAC = "../unix2mac" . $suffix;
+
+system("$DOS2UNIX -v -n dos.txt out_unix.txt; cmp out_unix.txt unix.txt");
+ok( $? == 0, 'DOS to Unix conversion' );
+
+system("$MAC2UNIX -v -n mac.txt out_unix.txt; cmp out_unix.txt unix.txt");
+ok( $? == 0, 'DOS to Unix conversion' );
+
+system("$UNIX2DOS -v -n unix.txt out_dos.txt; cmp out_dos.txt dos.txt");
+ok( $? == 0, 'Unix to DOS conversion' );
+
+system("$UNIX2MAC -v -n unix.txt out_mac.txt; cmp out_mac.txt mac.txt");
+ok( $? == 0, 'Unix to Mac conversion' );
+
+system("cp -f dos.txt out_unix.txt; $DOS2UNIX -v out_unix.txt; cmp out_unix.txt unix.txt");
+ok( $? == 0, 'DOS to Unix conversion, old file mode' );
+
+system("cp -f unix.txt out_dos.txt; $UNIX2DOS -v out_dos.txt; cmp out_dos.txt dos.txt");
+ok( $? == 0, 'Unix to DOS conversion, old file mode' );
+
+system("$DOS2UNIX -v -n unix.txt out_unix.txt; cmp out_unix.txt unix.txt");
+ok( $? == 0, 'dos2unix must not change unix line breaks');
+system("$DOS2UNIX -v -n mac.txt out_unix.txt; cmp out_unix.txt mac.txt");
+ok( $? == 0, 'dos2unix must not change mac line breaks');
+system("$MAC2UNIX -v -n unix.txt out_unix.txt; cmp out_unix.txt unix.txt");
+ok( $? == 0, 'mac2unix must not change unix line breaks');
+system("$MAC2UNIX -v -n dos.txt out_unix.txt; cmp out_unix.txt dos.txt");
+ok( $? == 0, 'mac2unix must not change dos line breaks');
+system("$UNIX2DOS -v -n dos.txt out_dos.txt; cmp out_dos.txt dos.txt");
+ok( $? == 0, 'unix2dos must not change dos line breaks');
+system("$UNIX2DOS -v -n mac.txt out_dos.txt; cmp out_dos.txt mac.txt");
+ok( $? == 0, 'unix2dos must not change mac line breaks');
+system("$UNIX2MAC -v -n dos.txt out_mac.txt; cmp out_mac.txt dos.txt");
+ok( $? == 0, 'unix2mac must not change dos line breaks');
+system("$UNIX2MAC -v -n mac.txt out_mac.txt; cmp out_mac.txt mac.txt");
+ok( $? == 0, 'unix2mac must not change mac line breaks');
+
+system("$DOS2UNIX -v -n mixed.txt out.txt; cmp out.txt mixedd2u.txt");
+ok( $? == 0, 'DOS to Unix conversion mixed');
+system("$MAC2UNIX -v -n mixed.txt out.txt; cmp out.txt mixedm2u.txt");
+ok( $? == 0, 'DOS to Unix conversion mixed');
+system("$UNIX2DOS -v -n mixed.txt out.txt; cmp out.txt mixedu2d.txt");
+ok( $? == 0, 'Unix to DOS conversion mixed');
+system("$UNIX2MAC -v -n mixed.txt out.txt; cmp out.txt mixedu2m.txt");
+ok( $? == 0, 'Unix to Mac conversion mixed');
+
+system("$DOS2UNIX -v -l -n dos.txt out_unix.txt; cmp out_unix.txt unix_dbl.txt");
+ok( $? == 0, 'DOS to Unix conversion with line doubling');
+system("$MAC2UNIX -v -l -n mac.txt out_unix.txt; cmp out_unix.txt unix_dbl.txt");
+ok( $? == 0, 'DOS to Unix conversion with line doubling');
+system("$UNIX2DOS -v -l -n unix.txt out_dos.txt; cmp out_dos.txt dos_dbl.txt");
+ok( $? == 0, 'Unix to DOS conversion with line doubling');
+system("$UNIX2MAC -v -l -n unix.txt out_mac.txt; cmp out_mac.txt mac_dbl.txt");
+ok( $? == 0, 'Unix to Mac conversion with line doubling');
+
+system("$DOS2UNIX -v -l -n unix.txt out_unix.txt; cmp out_unix.txt unix.txt");
+ok( $? == 0, 'dos2unix -l must not change unix line breaks');
+system("$DOS2UNIX -v -l -n mac.txt out_unix.txt; cmp out_unix.txt mac.txt");
+ok( $? == 0, 'dos2unix -l must not change mac line breaks');
+system("$MAC2UNIX -v -l -n unix.txt out_unix.txt; cmp out_unix.txt unix.txt");
+ok( $? == 0, 'mac2unix -l must not change unix line breaks');
+system("$MAC2UNIX -v -l -n dos.txt out_unix.txt; cmp out_unix.txt dos.txt");
+ok( $? == 0, 'mac2unix -l must not change dos line breaks');
+system("$UNIX2DOS -v -l -n dos.txt out_dos.txt; cmp out_dos.txt dos.txt");
+ok( $? == 0, 'unix2dos -l must not change dos line breaks');
+system("$UNIX2DOS -v -l -n mac.txt out_dos.txt; cmp out_dos.txt mac.txt");
+ok( $? == 0, 'unix2dos -l must not change mac line breaks');
+system("$UNIX2MAC -v -l -n dos.txt out_mac.txt; cmp out_mac.txt dos.txt");
+ok( $? == 0, 'unix2mac -l must not change dos line breaks');
+system("$UNIX2MAC -v -l -n mac.txt out_mac.txt; cmp out_mac.txt mac.txt");
+ok( $? == 0, 'unix2mac -l must not change mac line breaks');
+
diff --git a/test/chard7.txt b/test/chard7.txt
new file mode 100755
index 0000000..3b3c9fc
--- /dev/null
+++ b/test/chard7.txt
@@ -0,0 +1,14 @@
+ !"#$%&'()*+,-./
+0123456789:;<=>?
+@ABCDEFGHIJKLMNO
+PQRSTUVWXYZ[\]^_
+`abcdefghijklmno
+pqrstuvwxyz{|}~
+
+
+
+
+
+
+
+
diff --git a/test/chardos.txt b/test/chardos.txt
new file mode 100755
index 0000000..ff85358
--- /dev/null
+++ b/test/chardos.txt
@@ -0,0 +1,14 @@
+ !"#$%&'()*+,-./
+0123456789:;<=>?
+@ABCDEFGHIJKLMNO
+PQRSTUVWXYZ[\]^_
+`abcdefghijklmno
+pqrstuvwxyz{|}~
+
+
+
+
+
+
+
+
diff --git a/test/charu7.txt b/test/charu7.txt
new file mode 100755
index 0000000..0073580
--- /dev/null
+++ b/test/charu7.txt
@@ -0,0 +1,14 @@
+ !"#$%&'()*+,-./
+0123456789:;<=>?
+@ABCDEFGHIJKLMNO
+PQRSTUVWXYZ[\]^_
+`abcdefghijklmno
+pqrstuvwxyz{|}~
+
+
+
+
+
+
+
+
diff --git a/test/charunix.txt b/test/charunix.txt
new file mode 100644
index 0000000..e78f391
--- /dev/null
+++ b/test/charunix.txt
@@ -0,0 +1,14 @@
+ !"#$%&'()*+,-./
+0123456789:;<=>?
+@ABCDEFGHIJKLMNO
+PQRSTUVWXYZ[\]^_
+`abcdefghijklmno
+pqrstuvwxyz{|}~
+
+
+
+
+
+
+
+
diff --git a/test/cp_1252.txt b/test/cp_1252.txt
new file mode 100644
index 0000000..5448e22
--- /dev/null
+++ b/test/cp_1252.txt
@@ -0,0 +1,14 @@
+ !"#$%&'()*+,-./
+0123456789:;<=>?
+@ABCDEFGHIJKLMNO
+PQRSTUVWXYZ[\]^_
+`abcdefghijklmno
+pqrstuvwxyz{|}~
+................
+................
+
+
+
+
+
+
diff --git a/test/cp_437.txt b/test/cp_437.txt
new file mode 100644
index 0000000..ed80363
--- /dev/null
+++ b/test/cp_437.txt
@@ -0,0 +1,14 @@
+ !"#$%&'()*+,-./
+0123456789:;<=>?
+@ABCDEFGHIJKLMNO
+PQRSTUVWXYZ[\]^_
+`abcdefghijklmno
+pqrstuvwxyz{|}~
+................
+................
+........
+......
+...........
+............
+.
+.....
diff --git a/test/cp_850.txt b/test/cp_850.txt
new file mode 100644
index 0000000..6cf3427
--- /dev/null
+++ b/test/cp_850.txt
@@ -0,0 +1,14 @@
+ !"#$%&'()*+,-./
+0123456789:;<=>?
+@ABCDEFGHIJKLMNO
+PQRSTUVWXYZ[\]^_
+`abcdefghijklmno
+pqrstuvwxyz{|}~
+................
+................
+
+ǎԐ
+ѥ噞
diff --git a/test/cp_860.txt b/test/cp_860.txt
new file mode 100644
index 0000000..69904de
--- /dev/null
+++ b/test/cp_860.txt
@@ -0,0 +1,14 @@
+ !"#$%&'()*+,-./
+0123456789:;<=>?
+@ABCDEFGHIJKLMNO
+PQRSTUVWXYZ[\]^_
+`abcdefghijklmno
+pqrstuvwxyz{|}~
+................
+................
+.........
+......
+......
+.......
+......
+.......
diff --git a/test/cp_863.txt b/test/cp_863.txt
new file mode 100644
index 0000000..cba6911
--- /dev/null
+++ b/test/cp_863.txt
@@ -0,0 +1,14 @@
+ !"#$%&'()*+,-./
+0123456789:;<=>?
+@ABCDEFGHIJKLMNO
+PQRSTUVWXYZ[\]^_
+`abcdefghijklmno
+pqrstuvwxyz{|}~
+................
+................
+...
+.
+....
+.........
+.
+.....
diff --git a/test/cp_865.txt b/test/cp_865.txt
new file mode 100644
index 0000000..6148fe7
--- /dev/null
+++ b/test/cp_865.txt
@@ -0,0 +1,14 @@
+ !"#$%&'()*+,-./
+0123456789:;<=>?
+@ABCDEFGHIJKLMNO
+PQRSTUVWXYZ[\]^_
+`abcdefghijklmno
+pqrstuvwxyz{|}~
+................
+................
+.........
+.......
+...........
+...........
+.
+....
diff --git a/test/dos.txt b/test/dos.txt
new file mode 100644
index 0000000..fc8b4db
--- /dev/null
+++ b/test/dos.txt
@@ -0,0 +1,6 @@
+hello
+world
+
+take
+a
+break
diff --git a/test/dos_bom.txt b/test/dos_bom.txt
new file mode 100644
index 0000000..1216471
--- /dev/null
+++ b/test/dos_bom.txt
@@ -0,0 +1,6 @@
+hello
+world
+
+take
+a
+break
diff --git a/test/dos_dbl.txt b/test/dos_dbl.txt
new file mode 100644
index 0000000..651b0b3
--- /dev/null
+++ b/test/dos_dbl.txt
@@ -0,0 +1,12 @@
+hello
+
+world
+
+
+
+take
+
+a
+
+break
+
diff --git a/test/iso.t b/test/iso.t
new file mode 100755
index 0000000..69af494
--- /dev/null
+++ b/test/iso.t
@@ -0,0 +1,84 @@
+#!/usr/bin/perl
+
+# Requires perl-Test-Simple installation.
+use Test::More tests => 12;
+
+$suffix = "";
+if (-e "../dos2unix.exe") {
+ $suffix = ".exe";
+}
+$DOS2UNIX = "../dos2unix" . $suffix;
+$MAC2UNIX = "../mac2unix" . $suffix;
+$UNIX2DOS = "../unix2dos" . $suffix;
+$UNIX2MAC = "../unix2mac" . $suffix;
+
+# To check for instance cp850 to iso88591 conversion
+# you can do a visual check like this (on Windows).
+#
+# In a Windows Command Prompt, set font to Lucida Console.
+# Then set the code page to 850:
+# chcp 850
+# Display complete cp850 code page:
+# type chardos.txt
+#
+# In a Cygwin Mintty terminal, under Options->Text
+# set Character set to ISO-8859-1
+# Display converted character set:
+# cat iso_850.txt
+#
+# You now see the same characters as in the Windows Command Prompt
+# with the non-convertable characters replaced with a dot.
+
+system("$DOS2UNIX -v -437 -n chardos.txt out_unix.txt; cmp out_unix.txt iso_437.txt");
+ok( $? == 0, 'DOS to Unix conversion, cp437 to iso88591' );
+
+system("$DOS2UNIX -v -850 -n chardos.txt out_unix.txt; cmp out_unix.txt iso_850.txt");
+ok( $? == 0, 'DOS to Unix conversion, cp850 to iso88591' );
+
+system("$DOS2UNIX -v -860 -n chardos.txt out_unix.txt; cmp out_unix.txt iso_860.txt");
+ok( $? == 0, 'DOS to Unix conversion, cp860 to iso88591' );
+
+system("$DOS2UNIX -v -863 -n chardos.txt out_unix.txt; cmp out_unix.txt iso_863.txt");
+ok( $? == 0, 'DOS to Unix conversion, cp863 to iso88591' );
+
+system("$DOS2UNIX -v -865 -n chardos.txt out_unix.txt; cmp out_unix.txt iso_865.txt");
+ok( $? == 0, 'DOS to Unix conversion, cp865 to iso88591' );
+
+system("$DOS2UNIX -v -1252 -n chardos.txt out_unix.txt; cmp out_unix.txt iso_1252.txt");
+ok( $? == 0, 'DOS to Unix conversion, cp1252 to iso88591' );
+
+
+# To check for instance iso88591 to cp850 conversion
+# you can do a visual check like this (on Windows).
+#
+# In a Cygwin Mintty terminal, under Options->Text
+# set Character set to ISO-8859-1
+# Display complete ISO-8859-1 character set:
+# cat charunix.txt
+#
+# In a Windows Command Prompt, set font to Lucida Console.
+# Then set the code page to 850:
+# chcp 850
+# Display converted cp850 code page:
+# type cp_850.txt
+#
+# You now see the same characters as in the Mintty terminal
+# with the non-convertable characters replaced with a dot.
+
+system("$UNIX2DOS -v -437 -n charunix.txt out_dos.txt; cmp out_dos.txt cp_437.txt");
+ok( $? == 0, 'Unix to DOS conversion, iso88591 to cp437' );
+
+system("$UNIX2DOS -v -850 -n charunix.txt out_dos.txt; cmp out_dos.txt cp_850.txt");
+ok( $? == 0, 'Unix to DOS conversion, iso88591 to cp850' );
+
+system("$UNIX2DOS -v -860 -n charunix.txt out_dos.txt; cmp out_dos.txt cp_860.txt");
+ok( $? == 0, 'Unix to DOS conversion, iso88591 to cp860' );
+
+system("$UNIX2DOS -v -863 -n charunix.txt out_dos.txt; cmp out_dos.txt cp_863.txt");
+ok( $? == 0, 'Unix to DOS conversion, iso88591 to cp863' );
+
+system("$UNIX2DOS -v -865 -n charunix.txt out_dos.txt; cmp out_dos.txt cp_865.txt");
+ok( $? == 0, 'Unix to DOS conversion, iso88591 to cp865' );
+
+system("$UNIX2DOS -v -1252 -n charunix.txt out_dos.txt; cmp out_dos.txt cp_1252.txt");
+ok( $? == 0, 'Unix to DOS conversion, iso88591 to cp1252' );
diff --git a/test/iso_1252.txt b/test/iso_1252.txt
new file mode 100644
index 0000000..99b9fce
--- /dev/null
+++ b/test/iso_1252.txt
@@ -0,0 +1,14 @@
+ !"#$%&'()*+,-./
+0123456789:;<=>?
+@ABCDEFGHIJKLMNO
+PQRSTUVWXYZ[\]^_
+`abcdefghijklmno
+pqrstuvwxyz{|}~
+................
+................
+
+
+
+
+
+
diff --git a/test/iso_437.txt b/test/iso_437.txt
new file mode 100644
index 0000000..ea47879
--- /dev/null
+++ b/test/iso_437.txt
@@ -0,0 +1,14 @@
+ !"#$%&'()*+,-./
+0123456789:;<=>?
+@ABCDEFGHIJKLMNO
+PQRSTUVWXYZ[\]^_
+`abcdefghijklmno
+pqrstuvwxyz{|}~
+
+ܢ..
+Ѫ.
+................
+................
+................
+..............
+..........
diff --git a/test/iso_850.txt b/test/iso_850.txt
new file mode 100644
index 0000000..4ed6141
--- /dev/null
+++ b/test/iso_850.txt
@@ -0,0 +1,14 @@
+ !"#$%&'()*+,-./
+0123456789:;<=>?
+@ABCDEFGHIJKLMNO
+PQRSTUVWXYZ[\]^_
+`abcdefghijklmno
+pqrstuvwxyz{|}~
+
+.
+..........
+.............
+......
+յݯ
+..
diff --git a/test/iso_860.txt b/test/iso_860.txt
new file mode 100644
index 0000000..045bc44
--- /dev/null
+++ b/test/iso_860.txt
@@ -0,0 +1,14 @@
+ !"#$%&'()*+,-./
+0123456789:;<=>?
+@ABCDEFGHIJKLMNO
+PQRSTUVWXYZ[\]^_
+`abcdefghijklmno
+pqrstuvwxyz{|}~
+
+ܢ.
+ѪҬ
+................
+................
+................
+..............
+..........
diff --git a/test/iso_863.txt b/test/iso_863.txt
new file mode 100644
index 0000000..ec1e86f
--- /dev/null
+++ b/test/iso_863.txt
@@ -0,0 +1,14 @@
+ !"#$%&'()*+,-./
+0123456789:;<=>?
+@ABCDEFGHIJKLMNO
+PQRSTUVWXYZ[\]^_
+`abcdefghijklmno
+pqrstuvwxyz{|}~
+.
+ܢ.
+.
+................
+................
+................
+..............
+..........
diff --git a/test/iso_865.txt b/test/iso_865.txt
new file mode 100644
index 0000000..4a4fad6
--- /dev/null
+++ b/test/iso_865.txt
@@ -0,0 +1,14 @@
+ !"#$%&'()*+,-./
+0123456789:;<=>?
+@ABCDEFGHIJKLMNO
+PQRSTUVWXYZ[\]^_
+`abcdefghijklmno
+pqrstuvwxyz{|}~
+
+..
+Ѫ.
+................
+................
+................
+..............
+..........
diff --git a/test/mac.txt b/test/mac.txt
new file mode 100644
index 0000000..50c0936
--- /dev/null
+++ b/test/mac.txt
@@ -0,0 +1 @@
+hello world take a break \ No newline at end of file
diff --git a/test/mac_dbl.txt b/test/mac_dbl.txt
new file mode 100644
index 0000000..ebff9eb
--- /dev/null
+++ b/test/mac_dbl.txt
@@ -0,0 +1 @@
+hello world take a break \ No newline at end of file
diff --git a/test/misc.t b/test/misc.t
new file mode 100755
index 0000000..b8da955
--- /dev/null
+++ b/test/misc.t
@@ -0,0 +1,92 @@
+#!/usr/bin/perl
+
+# Requires perl-Test-Simple installation.
+use Test::More tests => 16;
+
+$suffix = "";
+if (-e "../dos2unix.exe") {
+ $suffix = ".exe";
+}
+$DOS2UNIX = "../dos2unix" . $suffix;
+$MAC2UNIX = "../mac2unix" . $suffix;
+$UNIX2DOS = "../unix2dos" . $suffix;
+$UNIX2MAC = "../unix2mac" . $suffix;
+
+$ENV{'LC_ALL'} = 'en_US.UTF-8';
+
+system("$DOS2UNIX -v -7 -n chardos.txt out_unix.txt; cmp out_unix.txt charu7.txt");
+ok( $? == 0, '7bit');
+
+system("$DOS2UNIX -v < dos.txt > out_unix.txt; cmp out_unix.txt unix.txt");
+ok( $? == 0, 'DOS to Unix conversion, stdin/out' );
+
+system("$UNIX2DOS -v < unix.txt > out_dos.txt; cmp out_dos.txt dos.txt");
+ok( $? == 0, 'Unix to DOS conversion, stdin/out' );
+
+system("cat utf16le.txt | $DOS2UNIX -v > out_unix.txt; cmp out_unix.txt utf8unix.txt");
+ok( $? == 0, 'UTF-16LE with BOM to UTF-8, stdin/out' );
+
+system("cat utf16u.txt | $UNIX2DOS -v -u > out_dos.txt; cmp out_dos.txt utf16.txt");
+ok( $? == 0, 'UTF-16LE with BOM to UTF-16LE, stdin/out' );
+
+system("$DOS2UNIX -v -n utf16len.txt out_bin.txt");
+# file out_bin.txt may not exist.
+if (-e "out_bin.txt") {
+ $exists = "1";
+} else {
+ $exists = "0";
+}
+ok( $exists == 0, 'dos2unix skip binary file.' );
+
+system("$UNIX2DOS -v -n utf16len.txt out_bin.txt");
+# file out_bin.txt may not exist.
+if (-e "out_bin.txt") {
+ $exists = "1";
+} else {
+ $exists = "0";
+}
+ok( $exists == 0, 'unix2dos skip binary file.' );
+
+system("$DOS2UNIX -v < utf16len.txt > out.txt");
+$result = ($? >> 8);
+ok( $result == 1, 'Dos2unix stdio returns error on binary input.' );
+
+system("$UNIX2DOS -v < utf16len.txt > out.txt");
+$result = ($? >> 8);
+ok( $result == 1, 'Unix2dos stdio returns error on binary input.' );
+
+system("rm -f out_forc.txt");
+system("$DOS2UNIX -v -n utf16len.txt out_forc.txt");
+# file out_bin.txt may not exist.
+if (-e "out_forc.txt") {
+ $exists = "1";
+} else {
+ $exists = "0";
+}
+ok( $exists == 0, 'dos2unix force binary file.' );
+
+system("rm -f out_forc.txt");
+system("$UNIX2DOS -v -n utf16len.txt out_forc.txt");
+# file out_bin.txt may not exist.
+if (-e "out_forc.txt") {
+ $exists = "1";
+} else {
+ $exists = "0";
+}
+ok( $exists == 0, 'unix2dos force binary file.' );
+
+system("$DOS2UNIX -v -7 -n utf16le.txt out_unix.txt chardos.txt out_u7.txt; cmp out_unix.txt utf8unix.txt");
+ok( $? == 0, '7bit disabled for utf16');
+
+system("cmp out_u7.txt charu7.txt");
+ok( $? == 0, '7bit enabled again, dos2unix');
+
+system("$UNIX2DOS -v -7 -n utf8unxb.txt out_dos.txt charunix.txt out_d7.txt; cmp out_dos.txt utf8dos.txt");
+ok( $? == 0, '7bit disabled for utf8 with BOM');
+
+system("cmp out_d7.txt chard7.txt");
+ok( $? == 0, '7bit enabled again, unix2dos');
+
+system("$UNIX2DOS -v -u -m -n unix.txt out_dos.txt; cmp out_dos.txt dos_bom.txt");
+ok( $? == 0, 'Option -u must not disable -m on ASCII input');
+
diff --git a/test/mixed.txt b/test/mixed.txt
new file mode 100644
index 0000000..51ac4c3
--- /dev/null
+++ b/test/mixed.txt
@@ -0,0 +1,13 @@
+hello
+world
+
+take
+a
+break
+hello
+world
+
+take
+a
+break
+hello world take a break \ No newline at end of file
diff --git a/test/mixedd2u.txt b/test/mixedd2u.txt
new file mode 100644
index 0000000..45b737b
--- /dev/null
+++ b/test/mixedd2u.txt
@@ -0,0 +1,13 @@
+hello
+world
+
+take
+a
+break
+hello
+world
+
+take
+a
+break
+hello world take a break \ No newline at end of file
diff --git a/test/mixedm2u.txt b/test/mixedm2u.txt
new file mode 100644
index 0000000..f0aa9e4
--- /dev/null
+++ b/test/mixedm2u.txt
@@ -0,0 +1,18 @@
+hello
+world
+
+take
+a
+break
+hello
+world
+
+take
+a
+break
+hello
+world
+
+take
+a
+break
diff --git a/test/mixedu2d.txt b/test/mixedu2d.txt
new file mode 100644
index 0000000..7f2d646
--- /dev/null
+++ b/test/mixedu2d.txt
@@ -0,0 +1,13 @@
+hello
+world
+
+take
+a
+break
+hello
+world
+
+take
+a
+break
+hello world take a break \ No newline at end of file
diff --git a/test/mixedu2m.txt b/test/mixedu2m.txt
new file mode 100644
index 0000000..12c97a8
--- /dev/null
+++ b/test/mixedu2m.txt
@@ -0,0 +1,7 @@
+hello world take a break hello
+world
+
+take
+a
+break
+hello world take a break \ No newline at end of file
diff --git a/test/symlink.t b/test/symlink.t
new file mode 100755
index 0000000..6baa0a4
--- /dev/null
+++ b/test/symlink.t
@@ -0,0 +1,119 @@
+#!/usr/bin/perl
+
+# Requires perl-Test-Simple installation.
+use Test::More tests => 14;
+
+$suffix = "";
+if (-e "../dos2unix.exe") {
+ $suffix = ".exe";
+}
+$DOS2UNIX = "../dos2unix" . $suffix;
+$MAC2UNIX = "../mac2unix" . $suffix;
+$UNIX2DOS = "../unix2dos" . $suffix;
+$UNIX2MAC = "../unix2mac" . $suffix;
+
+# dos2unix skip symlink
+
+system("cp -f dos.txt out_link.txt");
+system("rm -f in_link.txt; ln -s out_link.txt in_link.txt");
+
+system("$DOS2UNIX -v in_link.txt; cmp out_link.txt dos.txt");
+ok( $? == 0, 'dos2unix, skip symlink, check symlink target.' );
+
+if (-l "in_link.txt") {
+ $symlink = "1";
+} else {
+ $symlink = "0";
+}
+
+ok( $symlink == 1, 'dos2unix, skip symlink, check symlink.' );
+
+
+# dos2unix replace symlink
+
+
+system("$DOS2UNIX -v -R in_link.txt; cmp out_link.txt dos.txt");
+ok( $? == 0, 'dos2unix, replace symlink, check symlink target.' );
+
+if (-l "in_link.txt") {
+ $symlink = "1";
+} else {
+ $symlink = "0";
+}
+
+ok( $symlink == 0, 'dos2unix, replace symlink, check symlink.' );
+
+system("cmp in_link.txt unix.txt");
+ok( $? == 0, 'dos2unix, replace symlink, check conversion.' );
+
+
+# dos2unix follow symlink
+
+
+system("cp -f dos.txt out_link.txt");
+system("rm -f in_link.txt; ln -s out_link.txt in_link.txt");
+
+system("$DOS2UNIX -v -F in_link.txt; cmp out_link.txt unix.txt");
+ok( $? == 0, 'dos2unix, follow symlink, check symlink target.' );
+
+if (-l "in_link.txt") {
+ $symlink = "1";
+} else {
+ $symlink = "0";
+}
+
+ok( $symlink == 1, 'dos2unix, follow symlink, check symlink.' );
+
+
+
+# unix2dos skip symlink
+
+system("cp -f unix.txt out_link.txt");
+system("rm -f in_link.txt; ln -s out_link.txt in_link.txt");
+
+system("$UNIX2DOS -v in_link.txt; cmp out_link.txt unix.txt");
+ok( $? == 0, 'unix2dos, skip symlink, check symlink target.' );
+
+if (-l "in_link.txt") {
+ $symlink = "1";
+} else {
+ $symlink = "0";
+}
+
+ok( $symlink == 1, 'unix2dos, skip symlink, check symlink.' );
+
+
+# unix2dos replace symlink
+
+
+system("$UNIX2DOS -v -R in_link.txt; cmp out_link.txt unix.txt");
+ok( $? == 0, 'unix2dos, replace symlink, check symlink target.' );
+
+if (-l "in_link.txt") {
+ $symlink = "1";
+} else {
+ $symlink = "0";
+}
+
+ok( $symlink == 0, 'unix2dos, replace symlink, check symlink.' );
+
+system("cmp in_link.txt dos.txt");
+ok( $? == 0, 'unix2dos, replace symlink, check conversion.' );
+
+
+# unix2dos follow symlink
+
+
+system("cp -f unix.txt out_link.txt");
+system("rm -f in_link.txt; ln -s out_link.txt in_link.txt");
+
+system("$UNIX2DOS -v -F in_link.txt; cmp out_link.txt dos.txt");
+ok( $? == 0, 'unix2dos, follow symlink, check symlink target.' );
+
+if (-l "in_link.txt") {
+ $symlink = "1";
+} else {
+ $symlink = "0";
+}
+
+ok( $symlink == 1, 'unix2dos, follow symlink, check symlink.' );
diff --git a/test/unix.txt b/test/unix.txt
new file mode 100644
index 0000000..f8fe2cd
--- /dev/null
+++ b/test/unix.txt
@@ -0,0 +1,6 @@
+hello
+world
+
+take
+a
+break
diff --git a/test/unix_bom.txt b/test/unix_bom.txt
new file mode 100644
index 0000000..f33510b
--- /dev/null
+++ b/test/unix_bom.txt
@@ -0,0 +1,6 @@
+hello
+world
+
+take
+a
+break
diff --git a/test/unix_dbl.txt b/test/unix_dbl.txt
new file mode 100644
index 0000000..35f45a4
--- /dev/null
+++ b/test/unix_dbl.txt
@@ -0,0 +1,12 @@
+hello
+
+world
+
+
+
+take
+
+a
+
+break
+
diff --git a/test/utf16.t b/test/utf16.t
new file mode 100755
index 0000000..3f4960a
--- /dev/null
+++ b/test/utf16.t
@@ -0,0 +1,74 @@
+#!/usr/bin/perl
+
+# Requires perl-Test-Simple installation.
+use Test::More tests => 19;
+
+$suffix = "";
+if (-e "../dos2unix.exe") {
+ $suffix = ".exe";
+}
+
+$system = `uname -s`;
+if ($system =~ m/MINGW/)
+{
+ $unix=0;
+} else {
+ $unix=1;
+}
+
+$DOS2UNIX = "../dos2unix" . $suffix;
+$MAC2UNIX = "../mac2unix" . $suffix;
+$UNIX2DOS = "../unix2dos" . $suffix;
+$UNIX2MAC = "../unix2mac" . $suffix;
+
+$ENV{'LC_ALL'} = 'en_US.UTF-8';
+
+system("$DOS2UNIX -v -n utf16le.txt out_unix.txt; cmp out_unix.txt utf8unix.txt");
+ok( $? == 0, 'DOS UTF-16LE to Unix UTF-8' );
+system("$DOS2UNIX -v -n utf16be.txt out_unix.txt; cmp out_unix.txt utf8unix.txt");
+ok( $? == 0, 'DOS UTF-16BE to Unix UTF-8' );
+system("$UNIX2DOS -v -n utf16le.txt out_dos.txt; cmp out_dos.txt utf8dos.txt");
+ok( $? == 0, 'DOS UTF-16LE to DOS UTF-8' );
+system("$UNIX2DOS -v -n utf16be.txt out_dos.txt; cmp out_dos.txt utf8dos.txt");
+ok( $? == 0, 'DOS UTF-16BE to DOS UTF-8' );
+
+system("$DOS2UNIX -v -ul -n utf16len.txt out_unix.txt; cmp out_unix.txt utf8unix.txt");
+ok( $? == 0, 'UTF-16LE without BOM to UTF-8' );
+system("$DOS2UNIX -v -ub -n utf16ben.txt out_unix.txt; cmp out_unix.txt utf8unix.txt");
+ok( $? == 0, 'UTF-16BE without BOM to UTF-8' );
+system("$DOS2UNIX -v -ul -n utf16be.txt out_unix.txt; cmp out_unix.txt utf8unix.txt");
+ok( $? == 0, 'BOM overrides -ul' );
+system("$DOS2UNIX -v -ub -n utf16le.txt out_unix.txt; cmp out_unix.txt utf8unix.txt");
+ok( $? == 0, 'BOM overrides -ub' );
+
+system("$DOS2UNIX -v -b -n utf16le.txt out_unix.txt; cmp out_unix.txt utf8unxb.txt");
+ok( $? == 0, 'DOS UTF-16LE to Unix UTF-8, keep BOM' );
+system("$UNIX2DOS -v -r -n utf16le.txt out_dos.txt; cmp out_dos.txt utf8dosn.txt");
+ok( $? == 0, 'DOS UTF-16LE to DOS UTF-8, remove BOM' );
+
+system("$MAC2UNIX -v -n utf16le.txt out_unix.txt; cmp out_unix.txt utf8dosn.txt");
+ok( $? == 0, 'mac2unix does not change utf16 DOS line breaks.' );
+system("$UNIX2MAC -v -n utf16le.txt out_mac.txt; cmp out_mac.txt utf8dos.txt");
+ok( $? == 0, 'unix2mac does not change utf16 DOS line breaks.' );
+
+system("$UNIX2DOS -v -u -n utf16le.txt out_dos.txt; cmp out_dos.txt utf16le.txt");
+ok( $? == 0, 'DOS UTF-16LE to DOS UTF-16' );
+system("$UNIX2DOS -v -u -n utf16be.txt out_dos.txt; cmp out_dos.txt utf16be.txt");
+ok( $? == 0, 'DOS UTF-16BE to DOS UTF-16' );
+system("$DOS2UNIX -v -b -u -n utf16.txt out_unix.txt; cmp out_unix.txt utf16u.txt");
+ok( $? == 0, 'DOS UTF-16LE to Unix UTF-16' );
+system("$MAC2UNIX -v -b -u -n utf16m.txt out_unix.txt; cmp out_unix.txt utf16u.txt");
+ok( $? == 0, 'Mac UTF-16LE to Unix UTF-16' );
+system("$UNIX2DOS -v -b -u -n utf16u.txt out_dos.txt; cmp out_dos.txt utf16.txt");
+ok( $? == 0, 'Unix UTF-16 to DOS UTF-16LE' );
+system("$UNIX2MAC -v -b -u -n utf16u.txt out_mac.txt; cmp out_mac.txt utf16m.txt");
+ok( $? == 0, 'Unix UTF-16 to Mac UTF-16LE' );
+
+$ENV{'LC_ALL'} = 'en_US.ISO-8859-1';
+
+system("$DOS2UNIX -v -n utf16le.txt out_unix.txt");
+$result = ($? >> 8);
+if ( $unix ) { $expected = 1; } else { $expected = 0 };
+print "UNIX" . $unix . "\n";
+print "EXP" . $expected . "\n";
+ok( $result == $expected, 'DOS UTF-16LE to Unix UTF-8, env is not UTF-8' );
diff --git a/test/utf16.txt b/test/utf16.txt
new file mode 100755
index 0000000..868b5af
--- /dev/null
+++ b/test/utf16.txt
Binary files differ
diff --git a/test/utf16_be.txt b/test/utf16be.txt
index 1be5db9..1be5db9 100755
--- a/test/utf16_be.txt
+++ b/test/utf16be.txt
Binary files differ
diff --git a/test/utf16_be_nobom.txt b/test/utf16ben.txt
index 2efd41a..2efd41a 100644
--- a/test/utf16_be_nobom.txt
+++ b/test/utf16ben.txt
Binary files differ
diff --git a/test/utf16_le.txt b/test/utf16le.txt
index db9f535..db9f535 100755
--- a/test/utf16_le.txt
+++ b/test/utf16le.txt
Binary files differ
diff --git a/test/utf16_le_nobom.txt b/test/utf16len.txt
index 1861149..1861149 100644
--- a/test/utf16_le_nobom.txt
+++ b/test/utf16len.txt
Binary files differ
diff --git a/test/utf16m.txt b/test/utf16m.txt
new file mode 100755
index 0000000..cd523f8
--- /dev/null
+++ b/test/utf16m.txt
Binary files differ
diff --git a/test/utf16u.txt b/test/utf16u.txt
new file mode 100755
index 0000000..2bd9b3b
--- /dev/null
+++ b/test/utf16u.txt
Binary files differ
diff --git a/test/utf8.t b/test/utf8.t
new file mode 100755
index 0000000..454ed99
--- /dev/null
+++ b/test/utf8.t
@@ -0,0 +1,33 @@
+#!/usr/bin/perl
+
+# Requires perl-Test-Simple installation.
+use Test::More tests => 6;
+
+$suffix = "";
+if (-e "../dos2unix.exe") {
+ $suffix = ".exe";
+}
+$DOS2UNIX = "../dos2unix" . $suffix;
+$MAC2UNIX = "../mac2unix" . $suffix;
+$UNIX2DOS = "../unix2dos" . $suffix;
+$UNIX2MAC = "../unix2mac" . $suffix;
+
+system("$DOS2UNIX -v -n dos_bom.txt out_unix.txt; cmp out_unix.txt unix.txt");
+ok( $? == 0, 'dos2unix removes BOM' );
+
+system("$DOS2UNIX -v -b -n dos_bom.txt out_unix.txt; cmp out_unix.txt unix_bom.txt");
+ok( $? == 0, 'dos2unix -b keeps BOM' );
+
+system("$DOS2UNIX -v -m -n dos.txt out_unix.txt; cmp out_unix.txt unix_bom.txt");
+ok( $? == 0, 'dos2unix -m adds BOM' );
+
+system("$UNIX2DOS -v -n unix_bom.txt out_dos.txt; cmp out_dos.txt dos_bom.txt");
+ok( $? == 0, 'unix2dos keeps BOM' );
+
+system("$UNIX2DOS -v -r -n unix_bom.txt out_dos.txt; cmp out_dos.txt dos.txt");
+ok( $? == 0, 'unix2dos -r removes BOM' );
+
+system("$UNIX2DOS -v -m -n unix.txt out_dos.txt; cmp out_dos.txt dos_bom.txt");
+ok( $? == 0, 'unix2dos -m adds BOM' );
+
+
diff --git a/test/utf8dos.txt b/test/utf8dos.txt
new file mode 100755
index 0000000..76afa4f
--- /dev/null
+++ b/test/utf8dos.txt
@@ -0,0 +1,51 @@
+萨尔茨堡,奥地利共和国萨尔茨堡州的首府,2007年人口约15万,是继维也纳、格拉茨和林茨之后的奥地利第四大城市。萨尔茨堡位于奥地利的西部,是阿尔卑斯山脉的门庭,城市的建筑风格以巴洛克为主,城市的历史相当悠久,据史料记载,萨尔茨堡是现今奥地利管辖地域内历史最悠久的城市。萨尔茨堡是音乐天才莫扎特的出生地,莫扎特不到36年的短暂生命中超过一半的岁月是在萨尔茨堡度过的。萨尔茨堡也是指挥家赫伯特·冯·卡拉扬的故乡,电影《音乐之声》的拍摄地。萨尔茨堡老城在1996年被联合国教科文组织列入世界遗产名录。
+
+𝌆
+Unicode
+Scalar
+Value UTF-8 NCR
+U+2070E 𠜎 𠜎
+U+20731 𠜱 𠜱
+U+20779 𠝹 𠝹
+U+20C53 𠱓 𠱓
+U+20C78 𠱸 𠱸
+U+20C96 𠲖 𠲖
+U+20CCF 𠳏 𠳏
+U+20CD5 𠳕 𠳕
+U+20D15 𠴕 𠴕
+U+20D7C 𠵼 𠵼
+U+20D7F 𠵿 𠵿
+U+20E0E 𠸎 𠸎
+U+20E0F 𠸏 𠸏
+U+20E77 𠹷 𠹷
+U+20E9D 𠺝 𠺝
+U+20EA2 𠺢 𠺢
+U+20ED7 𠻗 𠻗
+U+20EF9 𠻹 𠻹
+U+20EFA 𠻺 𠻺
+U+20F2D 𠼭 𠼭
+U+20F2E 𠼮 𠼮
+U+20F4C 𠽌 𠽌
+U+20FB4 𠾴 𠾴
+U+20FBC 𠾼 𠾼
+U+20FEA 𠿪 𠿪
+U+2105C 𡁜 𡁜
+U+2106F 𡁯 𡁯
+U+21075 𡁵 𡁵
+U+21076 𡁶 𡁶
+U+2107B 𡁻 𡁻
+U+210C1 𡃁 𡃁
+U+210C9 𡃉 𡃉
+U+211D9 𡇙 𡇙
+U+220C7 𢃇 𢃇
+U+227B5 𢞵 𢞵
+U+22AD5 𢫕 𢫕
+U+22B43 𢭃 𢭃
+U+22BCA 𢯊 𢯊
+U+22C51 𢱑 𢱑
+U+22C55 𢱕 𢱕
+U+22CC2 𢳂 𢳂
+U+22D08 𢴈 𢴈
+U+22D4C 𢵌 𢵌
+U+22D67 𢵧 𢵧
+U+22EB3 𢺳 𢺳 \ No newline at end of file
diff --git a/test/utf8dosn.txt b/test/utf8dosn.txt
new file mode 100755
index 0000000..154aa88
--- /dev/null
+++ b/test/utf8dosn.txt
@@ -0,0 +1,51 @@
+萨尔茨堡,奥地利共和国萨尔茨堡州的首府,2007年人口约15万,是继维也纳、格拉茨和林茨之后的奥地利第四大城市。萨尔茨堡位于奥地利的西部,是阿尔卑斯山脉的门庭,城市的建筑风格以巴洛克为主,城市的历史相当悠久,据史料记载,萨尔茨堡是现今奥地利管辖地域内历史最悠久的城市。萨尔茨堡是音乐天才莫扎特的出生地,莫扎特不到36年的短暂生命中超过一半的岁月是在萨尔茨堡度过的。萨尔茨堡也是指挥家赫伯特·冯·卡拉扬的故乡,电影《音乐之声》的拍摄地。萨尔茨堡老城在1996年被联合国教科文组织列入世界遗产名录。
+
+𝌆
+Unicode
+Scalar
+Value UTF-8 NCR
+U+2070E 𠜎 𠜎
+U+20731 𠜱 𠜱
+U+20779 𠝹 𠝹
+U+20C53 𠱓 𠱓
+U+20C78 𠱸 𠱸
+U+20C96 𠲖 𠲖
+U+20CCF 𠳏 𠳏
+U+20CD5 𠳕 𠳕
+U+20D15 𠴕 𠴕
+U+20D7C 𠵼 𠵼
+U+20D7F 𠵿 𠵿
+U+20E0E 𠸎 𠸎
+U+20E0F 𠸏 𠸏
+U+20E77 𠹷 𠹷
+U+20E9D 𠺝 𠺝
+U+20EA2 𠺢 𠺢
+U+20ED7 𠻗 𠻗
+U+20EF9 𠻹 𠻹
+U+20EFA 𠻺 𠻺
+U+20F2D 𠼭 𠼭
+U+20F2E 𠼮 𠼮
+U+20F4C 𠽌 𠽌
+U+20FB4 𠾴 𠾴
+U+20FBC 𠾼 𠾼
+U+20FEA 𠿪 𠿪
+U+2105C 𡁜 𡁜
+U+2106F 𡁯 𡁯
+U+21075 𡁵 𡁵
+U+21076 𡁶 𡁶
+U+2107B 𡁻 𡁻
+U+210C1 𡃁 𡃁
+U+210C9 𡃉 𡃉
+U+211D9 𡇙 𡇙
+U+220C7 𢃇 𢃇
+U+227B5 𢞵 𢞵
+U+22AD5 𢫕 𢫕
+U+22B43 𢭃 𢭃
+U+22BCA 𢯊 𢯊
+U+22C51 𢱑 𢱑
+U+22C55 𢱕 𢱕
+U+22CC2 𢳂 𢳂
+U+22D08 𢴈 𢴈
+U+22D4C 𢵌 𢵌
+U+22D67 𢵧 𢵧
+U+22EB3 𢺳 𢺳 \ No newline at end of file
diff --git a/test/utf8unix.txt b/test/utf8unix.txt
new file mode 100755
index 0000000..8b93c0c
--- /dev/null
+++ b/test/utf8unix.txt
@@ -0,0 +1,51 @@
+萨尔茨堡,奥地利共和国萨尔茨堡州的首府,2007年人口约15万,是继维也纳、格拉茨和林茨之后的奥地利第四大城市。萨尔茨堡位于奥地利的西部,是阿尔卑斯山脉的门庭,城市的建筑风格以巴洛克为主,城市的历史相当悠久,据史料记载,萨尔茨堡是现今奥地利管辖地域内历史最悠久的城市。萨尔茨堡是音乐天才莫扎特的出生地,莫扎特不到36年的短暂生命中超过一半的岁月是在萨尔茨堡度过的。萨尔茨堡也是指挥家赫伯特·冯·卡拉扬的故乡,电影《音乐之声》的拍摄地。萨尔茨堡老城在1996年被联合国教科文组织列入世界遗产名录。
+
+𝌆
+Unicode
+Scalar
+Value UTF-8 NCR
+U+2070E 𠜎 𠜎
+U+20731 𠜱 𠜱
+U+20779 𠝹 𠝹
+U+20C53 𠱓 𠱓
+U+20C78 𠱸 𠱸
+U+20C96 𠲖 𠲖
+U+20CCF 𠳏 𠳏
+U+20CD5 𠳕 𠳕
+U+20D15 𠴕 𠴕
+U+20D7C 𠵼 𠵼
+U+20D7F 𠵿 𠵿
+U+20E0E 𠸎 𠸎
+U+20E0F 𠸏 𠸏
+U+20E77 𠹷 𠹷
+U+20E9D 𠺝 𠺝
+U+20EA2 𠺢 𠺢
+U+20ED7 𠻗 𠻗
+U+20EF9 𠻹 𠻹
+U+20EFA 𠻺 𠻺
+U+20F2D 𠼭 𠼭
+U+20F2E 𠼮 𠼮
+U+20F4C 𠽌 𠽌
+U+20FB4 𠾴 𠾴
+U+20FBC 𠾼 𠾼
+U+20FEA 𠿪 𠿪
+U+2105C 𡁜 𡁜
+U+2106F 𡁯 𡁯
+U+21075 𡁵 𡁵
+U+21076 𡁶 𡁶
+U+2107B 𡁻 𡁻
+U+210C1 𡃁 𡃁
+U+210C9 𡃉 𡃉
+U+211D9 𡇙 𡇙
+U+220C7 𢃇 𢃇
+U+227B5 𢞵 𢞵
+U+22AD5 𢫕 𢫕
+U+22B43 𢭃 𢭃
+U+22BCA 𢯊 𢯊
+U+22C51 𢱑 𢱑
+U+22C55 𢱕 𢱕
+U+22CC2 𢳂 𢳂
+U+22D08 𢴈 𢴈
+U+22D4C 𢵌 𢵌
+U+22D67 𢵧 𢵧
+U+22EB3 𢺳 𢺳 \ No newline at end of file
diff --git a/test/utf8unxb.txt b/test/utf8unxb.txt
new file mode 100755
index 0000000..f670e9b
--- /dev/null
+++ b/test/utf8unxb.txt
@@ -0,0 +1,51 @@
+萨尔茨堡,奥地利共和国萨尔茨堡州的首府,2007年人口约15万,是继维也纳、格拉茨和林茨之后的奥地利第四大城市。萨尔茨堡位于奥地利的西部,是阿尔卑斯山脉的门庭,城市的建筑风格以巴洛克为主,城市的历史相当悠久,据史料记载,萨尔茨堡是现今奥地利管辖地域内历史最悠久的城市。萨尔茨堡是音乐天才莫扎特的出生地,莫扎特不到36年的短暂生命中超过一半的岁月是在萨尔茨堡度过的。萨尔茨堡也是指挥家赫伯特·冯·卡拉扬的故乡,电影《音乐之声》的拍摄地。萨尔茨堡老城在1996年被联合国教科文组织列入世界遗产名录。
+
+𝌆
+Unicode
+Scalar
+Value UTF-8 NCR
+U+2070E 𠜎 𠜎
+U+20731 𠜱 𠜱
+U+20779 𠝹 𠝹
+U+20C53 𠱓 𠱓
+U+20C78 𠱸 𠱸
+U+20C96 𠲖 𠲖
+U+20CCF 𠳏 𠳏
+U+20CD5 𠳕 𠳕
+U+20D15 𠴕 𠴕
+U+20D7C 𠵼 𠵼
+U+20D7F 𠵿 𠵿
+U+20E0E 𠸎 𠸎
+U+20E0F 𠸏 𠸏
+U+20E77 𠹷 𠹷
+U+20E9D 𠺝 𠺝
+U+20EA2 𠺢 𠺢
+U+20ED7 𠻗 𠻗
+U+20EF9 𠻹 𠻹
+U+20EFA 𠻺 𠻺
+U+20F2D 𠼭 𠼭
+U+20F2E 𠼮 𠼮
+U+20F4C 𠽌 𠽌
+U+20FB4 𠾴 𠾴
+U+20FBC 𠾼 𠾼
+U+20FEA 𠿪 𠿪
+U+2105C 𡁜 𡁜
+U+2106F 𡁯 𡁯
+U+21075 𡁵 𡁵
+U+21076 𡁶 𡁶
+U+2107B 𡁻 𡁻
+U+210C1 𡃁 𡃁
+U+210C9 𡃉 𡃉
+U+211D9 𡇙 𡇙
+U+220C7 𢃇 𢃇
+U+227B5 𢞵 𢞵
+U+22AD5 𢫕 𢫕
+U+22B43 𢭃 𢭃
+U+22BCA 𢯊 𢯊
+U+22C51 𢱑 𢱑
+U+22C55 𢱕 𢱕
+U+22CC2 𢳂 𢳂
+U+22D08 𢴈 𢴈
+U+22D4C 𢵌 𢵌
+U+22D67 𢵧 𢵧
+U+22EB3 𢺳 𢺳 \ No newline at end of file