diff options
author | DongHun Kwak <dh0128.kwak@samsung.com> | 2016-10-20 13:28:30 +0900 |
---|---|---|
committer | DongHun Kwak <dh0128.kwak@samsung.com> | 2016-10-20 13:28:33 +0900 |
commit | 6ad0574136d3b23b486db4fa359672b9f731e235 (patch) | |
tree | ae96a2384615b0132969083bc097c918acf4e9f7 /test | |
parent | de90208a6d3f0b5c5944c4aa4d4ea95fc2611470 (diff) | |
download | dos2unix-6ad0574136d3b23b486db4fa359672b9f731e235.tar.gz dos2unix-6ad0574136d3b23b486db4fa359672b9f731e235.tar.bz2 dos2unix-6ad0574136d3b23b486db4fa359672b9f731e235.zip |
Imported Upstream version 7.0
Change-Id: Ib83b48cbb624fbb64cd653899c84ddffdcd21860
Signed-off-by: DongHun Kwak <dh0128.kwak@samsung.com>
Diffstat (limited to 'test')
-rwxr-xr-x | test/Makefile | 35 | ||||
-rwxr-xr-x | test/ascii.t | 84 | ||||
-rwxr-xr-x | test/chard7.txt | 14 | ||||
-rwxr-xr-x | test/chardos.txt | 14 | ||||
-rwxr-xr-x | test/charu7.txt | 14 | ||||
-rw-r--r-- | test/charunix.txt | 14 | ||||
-rw-r--r-- | test/cp_1252.txt | 14 | ||||
-rw-r--r-- | test/cp_437.txt | 14 | ||||
-rw-r--r-- | test/cp_850.txt | 14 | ||||
-rw-r--r-- | test/cp_860.txt | 14 | ||||
-rw-r--r-- | test/cp_863.txt | 14 | ||||
-rw-r--r-- | test/cp_865.txt | 14 | ||||
-rw-r--r-- | test/dos.txt | 6 | ||||
-rw-r--r-- | test/dos_bom.txt | 6 | ||||
-rw-r--r-- | test/dos_dbl.txt | 12 | ||||
-rwxr-xr-x | test/iso.t | 84 | ||||
-rw-r--r-- | test/iso_1252.txt | 14 | ||||
-rw-r--r-- | test/iso_437.txt | 14 | ||||
-rw-r--r-- | test/iso_850.txt | 14 | ||||
-rw-r--r-- | test/iso_860.txt | 14 | ||||
-rw-r--r-- | test/iso_863.txt | 14 | ||||
-rw-r--r-- | test/iso_865.txt | 14 | ||||
-rw-r--r-- | test/mac.txt | 1 | ||||
-rw-r--r-- | test/mac_dbl.txt | 1 | ||||
-rwxr-xr-x | test/misc.t | 92 | ||||
-rw-r--r-- | test/mixed.txt | 13 | ||||
-rw-r--r-- | test/mixedd2u.txt | 13 | ||||
-rw-r--r-- | test/mixedm2u.txt | 18 | ||||
-rw-r--r-- | test/mixedu2d.txt | 13 | ||||
-rw-r--r-- | test/mixedu2m.txt | 7 | ||||
-rwxr-xr-x | test/symlink.t | 119 | ||||
-rw-r--r-- | test/unix.txt | 6 | ||||
-rw-r--r-- | test/unix_bom.txt | 6 | ||||
-rw-r--r-- | test/unix_dbl.txt | 12 | ||||
-rwxr-xr-x | test/utf16.t | 74 | ||||
-rwxr-xr-x | test/utf16.txt | bin | 0 -> 66 bytes | |||
-rwxr-xr-x | test/utf16be.txt (renamed from test/utf16_be.txt) | bin | 1926 -> 1926 bytes | |||
-rw-r--r-- | test/utf16ben.txt (renamed from test/utf16_be_nobom.txt) | bin | 1924 -> 1924 bytes | |||
-rwxr-xr-x | test/utf16le.txt (renamed from test/utf16_le.txt) | bin | 1926 -> 1926 bytes | |||
-rw-r--r-- | test/utf16len.txt (renamed from test/utf16_le_nobom.txt) | bin | 1924 -> 1924 bytes | |||
-rwxr-xr-x | test/utf16m.txt | bin | 0 -> 54 bytes | |||
-rwxr-xr-x | test/utf16u.txt | bin | 0 -> 54 bytes | |||
-rwxr-xr-x | test/utf8.t | 33 | ||||
-rwxr-xr-x | test/utf8dos.txt | 51 | ||||
-rwxr-xr-x | test/utf8dosn.txt | 51 | ||||
-rwxr-xr-x | test/utf8unix.txt | 51 | ||||
-rwxr-xr-x | test/utf8unxb.txt | 51 |
47 files changed, 1063 insertions, 0 deletions
diff --git a/test/Makefile b/test/Makefile new file mode 100755 index 0000000..25832a5 --- /dev/null +++ b/test/Makefile @@ -0,0 +1,35 @@ +TESTS = ascii.t iso.t utf8.t utf16.t misc.t + +ifneq ($(findstring MINGW,$(shell uname)),MINGW) +TESTS += symlink.t +endif + + +all: test + +check: test + +test: + prove -v $(TESTS) + + +ascii: + prove -v $@.t + +iso: + prove -v $@.t + +misc: + prove -v $@.t + +utf8: + prove -v $@.t + +utf16: + prove -v $@.t + +symlink: + prove -v $@.t + +clean: + rm -f out*.txt in*.txt diff --git a/test/ascii.t b/test/ascii.t new file mode 100755 index 0000000..64f6c7b --- /dev/null +++ b/test/ascii.t @@ -0,0 +1,84 @@ +#!/usr/bin/perl + +# Requires perl-Test-Simple installation. +use Test::More tests => 30; + +$suffix = ""; +if (-e "../dos2unix.exe") { + $suffix = ".exe"; +} +$DOS2UNIX = "../dos2unix" . $suffix; +$MAC2UNIX = "../mac2unix" . $suffix; +$UNIX2DOS = "../unix2dos" . $suffix; +$UNIX2MAC = "../unix2mac" . $suffix; + +system("$DOS2UNIX -v -n dos.txt out_unix.txt; cmp out_unix.txt unix.txt"); +ok( $? == 0, 'DOS to Unix conversion' ); + +system("$MAC2UNIX -v -n mac.txt out_unix.txt; cmp out_unix.txt unix.txt"); +ok( $? == 0, 'DOS to Unix conversion' ); + +system("$UNIX2DOS -v -n unix.txt out_dos.txt; cmp out_dos.txt dos.txt"); +ok( $? == 0, 'Unix to DOS conversion' ); + +system("$UNIX2MAC -v -n unix.txt out_mac.txt; cmp out_mac.txt mac.txt"); +ok( $? == 0, 'Unix to Mac conversion' ); + +system("cp -f dos.txt out_unix.txt; $DOS2UNIX -v out_unix.txt; cmp out_unix.txt unix.txt"); +ok( $? == 0, 'DOS to Unix conversion, old file mode' ); + +system("cp -f unix.txt out_dos.txt; $UNIX2DOS -v out_dos.txt; cmp out_dos.txt dos.txt"); +ok( $? == 0, 'Unix to DOS conversion, old file mode' ); + +system("$DOS2UNIX -v -n unix.txt out_unix.txt; cmp out_unix.txt unix.txt"); +ok( $? == 0, 'dos2unix must not change unix line breaks'); +system("$DOS2UNIX -v -n mac.txt out_unix.txt; cmp out_unix.txt mac.txt"); +ok( $? == 0, 'dos2unix must not change mac line breaks'); +system("$MAC2UNIX -v -n unix.txt out_unix.txt; cmp out_unix.txt unix.txt"); +ok( $? == 0, 'mac2unix must not change unix line breaks'); +system("$MAC2UNIX -v -n dos.txt out_unix.txt; cmp out_unix.txt dos.txt"); +ok( $? == 0, 'mac2unix must not change dos line breaks'); +system("$UNIX2DOS -v -n dos.txt out_dos.txt; cmp out_dos.txt dos.txt"); +ok( $? == 0, 'unix2dos must not change dos line breaks'); +system("$UNIX2DOS -v -n mac.txt out_dos.txt; cmp out_dos.txt mac.txt"); +ok( $? == 0, 'unix2dos must not change mac line breaks'); +system("$UNIX2MAC -v -n dos.txt out_mac.txt; cmp out_mac.txt dos.txt"); +ok( $? == 0, 'unix2mac must not change dos line breaks'); +system("$UNIX2MAC -v -n mac.txt out_mac.txt; cmp out_mac.txt mac.txt"); +ok( $? == 0, 'unix2mac must not change mac line breaks'); + +system("$DOS2UNIX -v -n mixed.txt out.txt; cmp out.txt mixedd2u.txt"); +ok( $? == 0, 'DOS to Unix conversion mixed'); +system("$MAC2UNIX -v -n mixed.txt out.txt; cmp out.txt mixedm2u.txt"); +ok( $? == 0, 'DOS to Unix conversion mixed'); +system("$UNIX2DOS -v -n mixed.txt out.txt; cmp out.txt mixedu2d.txt"); +ok( $? == 0, 'Unix to DOS conversion mixed'); +system("$UNIX2MAC -v -n mixed.txt out.txt; cmp out.txt mixedu2m.txt"); +ok( $? == 0, 'Unix to Mac conversion mixed'); + +system("$DOS2UNIX -v -l -n dos.txt out_unix.txt; cmp out_unix.txt unix_dbl.txt"); +ok( $? == 0, 'DOS to Unix conversion with line doubling'); +system("$MAC2UNIX -v -l -n mac.txt out_unix.txt; cmp out_unix.txt unix_dbl.txt"); +ok( $? == 0, 'DOS to Unix conversion with line doubling'); +system("$UNIX2DOS -v -l -n unix.txt out_dos.txt; cmp out_dos.txt dos_dbl.txt"); +ok( $? == 0, 'Unix to DOS conversion with line doubling'); +system("$UNIX2MAC -v -l -n unix.txt out_mac.txt; cmp out_mac.txt mac_dbl.txt"); +ok( $? == 0, 'Unix to Mac conversion with line doubling'); + +system("$DOS2UNIX -v -l -n unix.txt out_unix.txt; cmp out_unix.txt unix.txt"); +ok( $? == 0, 'dos2unix -l must not change unix line breaks'); +system("$DOS2UNIX -v -l -n mac.txt out_unix.txt; cmp out_unix.txt mac.txt"); +ok( $? == 0, 'dos2unix -l must not change mac line breaks'); +system("$MAC2UNIX -v -l -n unix.txt out_unix.txt; cmp out_unix.txt unix.txt"); +ok( $? == 0, 'mac2unix -l must not change unix line breaks'); +system("$MAC2UNIX -v -l -n dos.txt out_unix.txt; cmp out_unix.txt dos.txt"); +ok( $? == 0, 'mac2unix -l must not change dos line breaks'); +system("$UNIX2DOS -v -l -n dos.txt out_dos.txt; cmp out_dos.txt dos.txt"); +ok( $? == 0, 'unix2dos -l must not change dos line breaks'); +system("$UNIX2DOS -v -l -n mac.txt out_dos.txt; cmp out_dos.txt mac.txt"); +ok( $? == 0, 'unix2dos -l must not change mac line breaks'); +system("$UNIX2MAC -v -l -n dos.txt out_mac.txt; cmp out_mac.txt dos.txt"); +ok( $? == 0, 'unix2mac -l must not change dos line breaks'); +system("$UNIX2MAC -v -l -n mac.txt out_mac.txt; cmp out_mac.txt mac.txt"); +ok( $? == 0, 'unix2mac -l must not change mac line breaks'); + diff --git a/test/chard7.txt b/test/chard7.txt new file mode 100755 index 0000000..3b3c9fc --- /dev/null +++ b/test/chard7.txt @@ -0,0 +1,14 @@ + !"#$%&'()*+,-./
+0123456789:;<=>?
+@ABCDEFGHIJKLMNO
+PQRSTUVWXYZ[\]^_
+`abcdefghijklmno
+pqrstuvwxyz{|}~
+
+
+
+
+
+
+
+
diff --git a/test/chardos.txt b/test/chardos.txt new file mode 100755 index 0000000..ff85358 --- /dev/null +++ b/test/chardos.txt @@ -0,0 +1,14 @@ + !"#$%&'()*+,-./
+0123456789:;<=>?
+@ABCDEFGHIJKLMNO
+PQRSTUVWXYZ[\]^_
+`abcdefghijklmno
+pqrstuvwxyz{|}~
+
+
+
+
+
+
+
+
diff --git a/test/charu7.txt b/test/charu7.txt new file mode 100755 index 0000000..0073580 --- /dev/null +++ b/test/charu7.txt @@ -0,0 +1,14 @@ + !"#$%&'()*+,-./ +0123456789:;<=>? +@ABCDEFGHIJKLMNO +PQRSTUVWXYZ[\]^_ +`abcdefghijklmno +pqrstuvwxyz{|}~ + + + + + + + + diff --git a/test/charunix.txt b/test/charunix.txt new file mode 100644 index 0000000..e78f391 --- /dev/null +++ b/test/charunix.txt @@ -0,0 +1,14 @@ + !"#$%&'()*+,-./ +0123456789:;<=>? +@ABCDEFGHIJKLMNO +PQRSTUVWXYZ[\]^_ +`abcdefghijklmno +pqrstuvwxyz{|}~ + + + + + + + + diff --git a/test/cp_1252.txt b/test/cp_1252.txt new file mode 100644 index 0000000..5448e22 --- /dev/null +++ b/test/cp_1252.txt @@ -0,0 +1,14 @@ + !"#$%&'()*+,-./
+0123456789:;<=>?
+@ABCDEFGHIJKLMNO
+PQRSTUVWXYZ[\]^_
+`abcdefghijklmno
+pqrstuvwxyz{|}~
+................
+................
+
+
+
+
+
+
diff --git a/test/cp_437.txt b/test/cp_437.txt new file mode 100644 index 0000000..ed80363 --- /dev/null +++ b/test/cp_437.txt @@ -0,0 +1,14 @@ + !"#$%&'()*+,-./
+0123456789:;<=>?
+@ABCDEFGHIJKLMNO
+PQRSTUVWXYZ[\]^_
+`abcdefghijklmno
+pqrstuvwxyz{|}~
+................
+................
+........
+......
+...........
+............
+.
+.....
diff --git a/test/cp_850.txt b/test/cp_850.txt new file mode 100644 index 0000000..6cf3427 --- /dev/null +++ b/test/cp_850.txt @@ -0,0 +1,14 @@ + !"#$%&'()*+,-./
+0123456789:;<=>?
+@ABCDEFGHIJKLMNO
+PQRSTUVWXYZ[\]^_
+`abcdefghijklmno
+pqrstuvwxyz{|}~
+................
+................
+Ͼ
+
+ǎԐ
+ѥ噞
+Ƅ
+Ф
diff --git a/test/cp_860.txt b/test/cp_860.txt new file mode 100644 index 0000000..69904de --- /dev/null +++ b/test/cp_860.txt @@ -0,0 +1,14 @@ + !"#$%&'()*+,-./
+0123456789:;<=>?
+@ABCDEFGHIJKLMNO
+PQRSTUVWXYZ[\]^_
+`abcdefghijklmno
+pqrstuvwxyz{|}~
+................
+................
+.........
+......
+......
+.......
+......
+.......
diff --git a/test/cp_863.txt b/test/cp_863.txt new file mode 100644 index 0000000..cba6911 --- /dev/null +++ b/test/cp_863.txt @@ -0,0 +1,14 @@ + !"#$%&'()*+,-./
+0123456789:;<=>?
+@ABCDEFGHIJKLMNO
+PQRSTUVWXYZ[\]^_
+`abcdefghijklmno
+pqrstuvwxyz{|}~
+................
+................
+...
+.
+....
+.........
+.
+.....
diff --git a/test/cp_865.txt b/test/cp_865.txt new file mode 100644 index 0000000..6148fe7 --- /dev/null +++ b/test/cp_865.txt @@ -0,0 +1,14 @@ + !"#$%&'()*+,-./
+0123456789:;<=>?
+@ABCDEFGHIJKLMNO
+PQRSTUVWXYZ[\]^_
+`abcdefghijklmno
+pqrstuvwxyz{|}~
+................
+................
+.........
+.......
+...........
+...........
+.
+....
diff --git a/test/dos.txt b/test/dos.txt new file mode 100644 index 0000000..fc8b4db --- /dev/null +++ b/test/dos.txt @@ -0,0 +1,6 @@ +hello
+world
+
+take
+a
+break
diff --git a/test/dos_bom.txt b/test/dos_bom.txt new file mode 100644 index 0000000..1216471 --- /dev/null +++ b/test/dos_bom.txt @@ -0,0 +1,6 @@ +hello
+world
+
+take
+a
+break
diff --git a/test/dos_dbl.txt b/test/dos_dbl.txt new file mode 100644 index 0000000..651b0b3 --- /dev/null +++ b/test/dos_dbl.txt @@ -0,0 +1,12 @@ +hello
+
+world
+
+
+
+take
+
+a
+
+break
+
diff --git a/test/iso.t b/test/iso.t new file mode 100755 index 0000000..69af494 --- /dev/null +++ b/test/iso.t @@ -0,0 +1,84 @@ +#!/usr/bin/perl + +# Requires perl-Test-Simple installation. +use Test::More tests => 12; + +$suffix = ""; +if (-e "../dos2unix.exe") { + $suffix = ".exe"; +} +$DOS2UNIX = "../dos2unix" . $suffix; +$MAC2UNIX = "../mac2unix" . $suffix; +$UNIX2DOS = "../unix2dos" . $suffix; +$UNIX2MAC = "../unix2mac" . $suffix; + +# To check for instance cp850 to iso88591 conversion +# you can do a visual check like this (on Windows). +# +# In a Windows Command Prompt, set font to Lucida Console. +# Then set the code page to 850: +# chcp 850 +# Display complete cp850 code page: +# type chardos.txt +# +# In a Cygwin Mintty terminal, under Options->Text +# set Character set to ISO-8859-1 +# Display converted character set: +# cat iso_850.txt +# +# You now see the same characters as in the Windows Command Prompt +# with the non-convertable characters replaced with a dot. + +system("$DOS2UNIX -v -437 -n chardos.txt out_unix.txt; cmp out_unix.txt iso_437.txt"); +ok( $? == 0, 'DOS to Unix conversion, cp437 to iso88591' ); + +system("$DOS2UNIX -v -850 -n chardos.txt out_unix.txt; cmp out_unix.txt iso_850.txt"); +ok( $? == 0, 'DOS to Unix conversion, cp850 to iso88591' ); + +system("$DOS2UNIX -v -860 -n chardos.txt out_unix.txt; cmp out_unix.txt iso_860.txt"); +ok( $? == 0, 'DOS to Unix conversion, cp860 to iso88591' ); + +system("$DOS2UNIX -v -863 -n chardos.txt out_unix.txt; cmp out_unix.txt iso_863.txt"); +ok( $? == 0, 'DOS to Unix conversion, cp863 to iso88591' ); + +system("$DOS2UNIX -v -865 -n chardos.txt out_unix.txt; cmp out_unix.txt iso_865.txt"); +ok( $? == 0, 'DOS to Unix conversion, cp865 to iso88591' ); + +system("$DOS2UNIX -v -1252 -n chardos.txt out_unix.txt; cmp out_unix.txt iso_1252.txt"); +ok( $? == 0, 'DOS to Unix conversion, cp1252 to iso88591' ); + + +# To check for instance iso88591 to cp850 conversion +# you can do a visual check like this (on Windows). +# +# In a Cygwin Mintty terminal, under Options->Text +# set Character set to ISO-8859-1 +# Display complete ISO-8859-1 character set: +# cat charunix.txt +# +# In a Windows Command Prompt, set font to Lucida Console. +# Then set the code page to 850: +# chcp 850 +# Display converted cp850 code page: +# type cp_850.txt +# +# You now see the same characters as in the Mintty terminal +# with the non-convertable characters replaced with a dot. + +system("$UNIX2DOS -v -437 -n charunix.txt out_dos.txt; cmp out_dos.txt cp_437.txt"); +ok( $? == 0, 'Unix to DOS conversion, iso88591 to cp437' ); + +system("$UNIX2DOS -v -850 -n charunix.txt out_dos.txt; cmp out_dos.txt cp_850.txt"); +ok( $? == 0, 'Unix to DOS conversion, iso88591 to cp850' ); + +system("$UNIX2DOS -v -860 -n charunix.txt out_dos.txt; cmp out_dos.txt cp_860.txt"); +ok( $? == 0, 'Unix to DOS conversion, iso88591 to cp860' ); + +system("$UNIX2DOS -v -863 -n charunix.txt out_dos.txt; cmp out_dos.txt cp_863.txt"); +ok( $? == 0, 'Unix to DOS conversion, iso88591 to cp863' ); + +system("$UNIX2DOS -v -865 -n charunix.txt out_dos.txt; cmp out_dos.txt cp_865.txt"); +ok( $? == 0, 'Unix to DOS conversion, iso88591 to cp865' ); + +system("$UNIX2DOS -v -1252 -n charunix.txt out_dos.txt; cmp out_dos.txt cp_1252.txt"); +ok( $? == 0, 'Unix to DOS conversion, iso88591 to cp1252' ); diff --git a/test/iso_1252.txt b/test/iso_1252.txt new file mode 100644 index 0000000..99b9fce --- /dev/null +++ b/test/iso_1252.txt @@ -0,0 +1,14 @@ + !"#$%&'()*+,-./ +0123456789:;<=>? +@ABCDEFGHIJKLMNO +PQRSTUVWXYZ[\]^_ +`abcdefghijklmno +pqrstuvwxyz{|}~ +................ +................ + + + + + + diff --git a/test/iso_437.txt b/test/iso_437.txt new file mode 100644 index 0000000..ea47879 --- /dev/null +++ b/test/iso_437.txt @@ -0,0 +1,14 @@ + !"#$%&'()*+,-./ +0123456789:;<=>? +@ABCDEFGHIJKLMNO +PQRSTUVWXYZ[\]^_ +`abcdefghijklmno +pqrstuvwxyz{|}~ + +ܢ.. +Ѫ. +................ +................ +................ +.............. +.......... diff --git a/test/iso_850.txt b/test/iso_850.txt new file mode 100644 index 0000000..4ed6141 --- /dev/null +++ b/test/iso_850.txt @@ -0,0 +1,14 @@ + !"#$%&'()*+,-./ +0123456789:;<=>? +@ABCDEFGHIJKLMNO +PQRSTUVWXYZ[\]^_ +`abcdefghijklmno +pqrstuvwxyz{|}~ + +. +Ѫ +.......... +............. +...... +յݯ +.. diff --git a/test/iso_860.txt b/test/iso_860.txt new file mode 100644 index 0000000..045bc44 --- /dev/null +++ b/test/iso_860.txt @@ -0,0 +1,14 @@ + !"#$%&'()*+,-./ +0123456789:;<=>? +@ABCDEFGHIJKLMNO +PQRSTUVWXYZ[\]^_ +`abcdefghijklmno +pqrstuvwxyz{|}~ + +ܢ. +ѪҬ +................ +................ +................ +.............. +.......... diff --git a/test/iso_863.txt b/test/iso_863.txt new file mode 100644 index 0000000..ec1e86f --- /dev/null +++ b/test/iso_863.txt @@ -0,0 +1,14 @@ + !"#$%&'()*+,-./ +0123456789:;<=>? +@ABCDEFGHIJKLMNO +PQRSTUVWXYZ[\]^_ +`abcdefghijklmno +pqrstuvwxyz{|}~ +. +ܢ. +. +................ +................ +................ +.............. +.......... diff --git a/test/iso_865.txt b/test/iso_865.txt new file mode 100644 index 0000000..4a4fad6 --- /dev/null +++ b/test/iso_865.txt @@ -0,0 +1,14 @@ + !"#$%&'()*+,-./ +0123456789:;<=>? +@ABCDEFGHIJKLMNO +PQRSTUVWXYZ[\]^_ +`abcdefghijklmno +pqrstuvwxyz{|}~ + +.. +Ѫ. +................ +................ +................ +.............. +.......... diff --git a/test/mac.txt b/test/mac.txt new file mode 100644 index 0000000..50c0936 --- /dev/null +++ b/test/mac.txt @@ -0,0 +1 @@ +hello
world
take
a
break
\ No newline at end of file diff --git a/test/mac_dbl.txt b/test/mac_dbl.txt new file mode 100644 index 0000000..ebff9eb --- /dev/null +++ b/test/mac_dbl.txt @@ -0,0 +1 @@ +hello
world
take
a
break
\ No newline at end of file diff --git a/test/misc.t b/test/misc.t new file mode 100755 index 0000000..b8da955 --- /dev/null +++ b/test/misc.t @@ -0,0 +1,92 @@ +#!/usr/bin/perl + +# Requires perl-Test-Simple installation. +use Test::More tests => 16; + +$suffix = ""; +if (-e "../dos2unix.exe") { + $suffix = ".exe"; +} +$DOS2UNIX = "../dos2unix" . $suffix; +$MAC2UNIX = "../mac2unix" . $suffix; +$UNIX2DOS = "../unix2dos" . $suffix; +$UNIX2MAC = "../unix2mac" . $suffix; + +$ENV{'LC_ALL'} = 'en_US.UTF-8'; + +system("$DOS2UNIX -v -7 -n chardos.txt out_unix.txt; cmp out_unix.txt charu7.txt"); +ok( $? == 0, '7bit'); + +system("$DOS2UNIX -v < dos.txt > out_unix.txt; cmp out_unix.txt unix.txt"); +ok( $? == 0, 'DOS to Unix conversion, stdin/out' ); + +system("$UNIX2DOS -v < unix.txt > out_dos.txt; cmp out_dos.txt dos.txt"); +ok( $? == 0, 'Unix to DOS conversion, stdin/out' ); + +system("cat utf16le.txt | $DOS2UNIX -v > out_unix.txt; cmp out_unix.txt utf8unix.txt"); +ok( $? == 0, 'UTF-16LE with BOM to UTF-8, stdin/out' ); + +system("cat utf16u.txt | $UNIX2DOS -v -u > out_dos.txt; cmp out_dos.txt utf16.txt"); +ok( $? == 0, 'UTF-16LE with BOM to UTF-16LE, stdin/out' ); + +system("$DOS2UNIX -v -n utf16len.txt out_bin.txt"); +# file out_bin.txt may not exist. +if (-e "out_bin.txt") { + $exists = "1"; +} else { + $exists = "0"; +} +ok( $exists == 0, 'dos2unix skip binary file.' ); + +system("$UNIX2DOS -v -n utf16len.txt out_bin.txt"); +# file out_bin.txt may not exist. +if (-e "out_bin.txt") { + $exists = "1"; +} else { + $exists = "0"; +} +ok( $exists == 0, 'unix2dos skip binary file.' ); + +system("$DOS2UNIX -v < utf16len.txt > out.txt"); +$result = ($? >> 8); +ok( $result == 1, 'Dos2unix stdio returns error on binary input.' ); + +system("$UNIX2DOS -v < utf16len.txt > out.txt"); +$result = ($? >> 8); +ok( $result == 1, 'Unix2dos stdio returns error on binary input.' ); + +system("rm -f out_forc.txt"); +system("$DOS2UNIX -v -n utf16len.txt out_forc.txt"); +# file out_bin.txt may not exist. +if (-e "out_forc.txt") { + $exists = "1"; +} else { + $exists = "0"; +} +ok( $exists == 0, 'dos2unix force binary file.' ); + +system("rm -f out_forc.txt"); +system("$UNIX2DOS -v -n utf16len.txt out_forc.txt"); +# file out_bin.txt may not exist. +if (-e "out_forc.txt") { + $exists = "1"; +} else { + $exists = "0"; +} +ok( $exists == 0, 'unix2dos force binary file.' ); + +system("$DOS2UNIX -v -7 -n utf16le.txt out_unix.txt chardos.txt out_u7.txt; cmp out_unix.txt utf8unix.txt"); +ok( $? == 0, '7bit disabled for utf16'); + +system("cmp out_u7.txt charu7.txt"); +ok( $? == 0, '7bit enabled again, dos2unix'); + +system("$UNIX2DOS -v -7 -n utf8unxb.txt out_dos.txt charunix.txt out_d7.txt; cmp out_dos.txt utf8dos.txt"); +ok( $? == 0, '7bit disabled for utf8 with BOM'); + +system("cmp out_d7.txt chard7.txt"); +ok( $? == 0, '7bit enabled again, unix2dos'); + +system("$UNIX2DOS -v -u -m -n unix.txt out_dos.txt; cmp out_dos.txt dos_bom.txt"); +ok( $? == 0, 'Option -u must not disable -m on ASCII input'); + diff --git a/test/mixed.txt b/test/mixed.txt new file mode 100644 index 0000000..51ac4c3 --- /dev/null +++ b/test/mixed.txt @@ -0,0 +1,13 @@ +hello +world + +take +a +break +hello
+world
+
+take
+a
+break
+hello
world
take
a
break
\ No newline at end of file diff --git a/test/mixedd2u.txt b/test/mixedd2u.txt new file mode 100644 index 0000000..45b737b --- /dev/null +++ b/test/mixedd2u.txt @@ -0,0 +1,13 @@ +hello +world + +take +a +break +hello +world + +take +a +break +hello
world
take
a
break
\ No newline at end of file diff --git a/test/mixedm2u.txt b/test/mixedm2u.txt new file mode 100644 index 0000000..f0aa9e4 --- /dev/null +++ b/test/mixedm2u.txt @@ -0,0 +1,18 @@ +hello +world + +take +a +break +hello
+world
+
+take
+a
+break
+hello +world + +take +a +break diff --git a/test/mixedu2d.txt b/test/mixedu2d.txt new file mode 100644 index 0000000..7f2d646 --- /dev/null +++ b/test/mixedu2d.txt @@ -0,0 +1,13 @@ +hello
+world
+
+take
+a
+break
+hello
+world
+
+take
+a
+break
+hello
world
take
a
break
\ No newline at end of file diff --git a/test/mixedu2m.txt b/test/mixedu2m.txt new file mode 100644 index 0000000..12c97a8 --- /dev/null +++ b/test/mixedu2m.txt @@ -0,0 +1,7 @@ +hello
world
take
a
break
hello
+world
+
+take
+a
+break
+hello
world
take
a
break
\ No newline at end of file diff --git a/test/symlink.t b/test/symlink.t new file mode 100755 index 0000000..6baa0a4 --- /dev/null +++ b/test/symlink.t @@ -0,0 +1,119 @@ +#!/usr/bin/perl + +# Requires perl-Test-Simple installation. +use Test::More tests => 14; + +$suffix = ""; +if (-e "../dos2unix.exe") { + $suffix = ".exe"; +} +$DOS2UNIX = "../dos2unix" . $suffix; +$MAC2UNIX = "../mac2unix" . $suffix; +$UNIX2DOS = "../unix2dos" . $suffix; +$UNIX2MAC = "../unix2mac" . $suffix; + +# dos2unix skip symlink + +system("cp -f dos.txt out_link.txt"); +system("rm -f in_link.txt; ln -s out_link.txt in_link.txt"); + +system("$DOS2UNIX -v in_link.txt; cmp out_link.txt dos.txt"); +ok( $? == 0, 'dos2unix, skip symlink, check symlink target.' ); + +if (-l "in_link.txt") { + $symlink = "1"; +} else { + $symlink = "0"; +} + +ok( $symlink == 1, 'dos2unix, skip symlink, check symlink.' ); + + +# dos2unix replace symlink + + +system("$DOS2UNIX -v -R in_link.txt; cmp out_link.txt dos.txt"); +ok( $? == 0, 'dos2unix, replace symlink, check symlink target.' ); + +if (-l "in_link.txt") { + $symlink = "1"; +} else { + $symlink = "0"; +} + +ok( $symlink == 0, 'dos2unix, replace symlink, check symlink.' ); + +system("cmp in_link.txt unix.txt"); +ok( $? == 0, 'dos2unix, replace symlink, check conversion.' ); + + +# dos2unix follow symlink + + +system("cp -f dos.txt out_link.txt"); +system("rm -f in_link.txt; ln -s out_link.txt in_link.txt"); + +system("$DOS2UNIX -v -F in_link.txt; cmp out_link.txt unix.txt"); +ok( $? == 0, 'dos2unix, follow symlink, check symlink target.' ); + +if (-l "in_link.txt") { + $symlink = "1"; +} else { + $symlink = "0"; +} + +ok( $symlink == 1, 'dos2unix, follow symlink, check symlink.' ); + + + +# unix2dos skip symlink + +system("cp -f unix.txt out_link.txt"); +system("rm -f in_link.txt; ln -s out_link.txt in_link.txt"); + +system("$UNIX2DOS -v in_link.txt; cmp out_link.txt unix.txt"); +ok( $? == 0, 'unix2dos, skip symlink, check symlink target.' ); + +if (-l "in_link.txt") { + $symlink = "1"; +} else { + $symlink = "0"; +} + +ok( $symlink == 1, 'unix2dos, skip symlink, check symlink.' ); + + +# unix2dos replace symlink + + +system("$UNIX2DOS -v -R in_link.txt; cmp out_link.txt unix.txt"); +ok( $? == 0, 'unix2dos, replace symlink, check symlink target.' ); + +if (-l "in_link.txt") { + $symlink = "1"; +} else { + $symlink = "0"; +} + +ok( $symlink == 0, 'unix2dos, replace symlink, check symlink.' ); + +system("cmp in_link.txt dos.txt"); +ok( $? == 0, 'unix2dos, replace symlink, check conversion.' ); + + +# unix2dos follow symlink + + +system("cp -f unix.txt out_link.txt"); +system("rm -f in_link.txt; ln -s out_link.txt in_link.txt"); + +system("$UNIX2DOS -v -F in_link.txt; cmp out_link.txt dos.txt"); +ok( $? == 0, 'unix2dos, follow symlink, check symlink target.' ); + +if (-l "in_link.txt") { + $symlink = "1"; +} else { + $symlink = "0"; +} + +ok( $symlink == 1, 'unix2dos, follow symlink, check symlink.' ); diff --git a/test/unix.txt b/test/unix.txt new file mode 100644 index 0000000..f8fe2cd --- /dev/null +++ b/test/unix.txt @@ -0,0 +1,6 @@ +hello +world + +take +a +break diff --git a/test/unix_bom.txt b/test/unix_bom.txt new file mode 100644 index 0000000..f33510b --- /dev/null +++ b/test/unix_bom.txt @@ -0,0 +1,6 @@ +hello +world + +take +a +break diff --git a/test/unix_dbl.txt b/test/unix_dbl.txt new file mode 100644 index 0000000..35f45a4 --- /dev/null +++ b/test/unix_dbl.txt @@ -0,0 +1,12 @@ +hello + +world + + + +take + +a + +break + diff --git a/test/utf16.t b/test/utf16.t new file mode 100755 index 0000000..3f4960a --- /dev/null +++ b/test/utf16.t @@ -0,0 +1,74 @@ +#!/usr/bin/perl + +# Requires perl-Test-Simple installation. +use Test::More tests => 19; + +$suffix = ""; +if (-e "../dos2unix.exe") { + $suffix = ".exe"; +} + +$system = `uname -s`; +if ($system =~ m/MINGW/) +{ + $unix=0; +} else { + $unix=1; +} + +$DOS2UNIX = "../dos2unix" . $suffix; +$MAC2UNIX = "../mac2unix" . $suffix; +$UNIX2DOS = "../unix2dos" . $suffix; +$UNIX2MAC = "../unix2mac" . $suffix; + +$ENV{'LC_ALL'} = 'en_US.UTF-8'; + +system("$DOS2UNIX -v -n utf16le.txt out_unix.txt; cmp out_unix.txt utf8unix.txt"); +ok( $? == 0, 'DOS UTF-16LE to Unix UTF-8' ); +system("$DOS2UNIX -v -n utf16be.txt out_unix.txt; cmp out_unix.txt utf8unix.txt"); +ok( $? == 0, 'DOS UTF-16BE to Unix UTF-8' ); +system("$UNIX2DOS -v -n utf16le.txt out_dos.txt; cmp out_dos.txt utf8dos.txt"); +ok( $? == 0, 'DOS UTF-16LE to DOS UTF-8' ); +system("$UNIX2DOS -v -n utf16be.txt out_dos.txt; cmp out_dos.txt utf8dos.txt"); +ok( $? == 0, 'DOS UTF-16BE to DOS UTF-8' ); + +system("$DOS2UNIX -v -ul -n utf16len.txt out_unix.txt; cmp out_unix.txt utf8unix.txt"); +ok( $? == 0, 'UTF-16LE without BOM to UTF-8' ); +system("$DOS2UNIX -v -ub -n utf16ben.txt out_unix.txt; cmp out_unix.txt utf8unix.txt"); +ok( $? == 0, 'UTF-16BE without BOM to UTF-8' ); +system("$DOS2UNIX -v -ul -n utf16be.txt out_unix.txt; cmp out_unix.txt utf8unix.txt"); +ok( $? == 0, 'BOM overrides -ul' ); +system("$DOS2UNIX -v -ub -n utf16le.txt out_unix.txt; cmp out_unix.txt utf8unix.txt"); +ok( $? == 0, 'BOM overrides -ub' ); + +system("$DOS2UNIX -v -b -n utf16le.txt out_unix.txt; cmp out_unix.txt utf8unxb.txt"); +ok( $? == 0, 'DOS UTF-16LE to Unix UTF-8, keep BOM' ); +system("$UNIX2DOS -v -r -n utf16le.txt out_dos.txt; cmp out_dos.txt utf8dosn.txt"); +ok( $? == 0, 'DOS UTF-16LE to DOS UTF-8, remove BOM' ); + +system("$MAC2UNIX -v -n utf16le.txt out_unix.txt; cmp out_unix.txt utf8dosn.txt"); +ok( $? == 0, 'mac2unix does not change utf16 DOS line breaks.' ); +system("$UNIX2MAC -v -n utf16le.txt out_mac.txt; cmp out_mac.txt utf8dos.txt"); +ok( $? == 0, 'unix2mac does not change utf16 DOS line breaks.' ); + +system("$UNIX2DOS -v -u -n utf16le.txt out_dos.txt; cmp out_dos.txt utf16le.txt"); +ok( $? == 0, 'DOS UTF-16LE to DOS UTF-16' ); +system("$UNIX2DOS -v -u -n utf16be.txt out_dos.txt; cmp out_dos.txt utf16be.txt"); +ok( $? == 0, 'DOS UTF-16BE to DOS UTF-16' ); +system("$DOS2UNIX -v -b -u -n utf16.txt out_unix.txt; cmp out_unix.txt utf16u.txt"); +ok( $? == 0, 'DOS UTF-16LE to Unix UTF-16' ); +system("$MAC2UNIX -v -b -u -n utf16m.txt out_unix.txt; cmp out_unix.txt utf16u.txt"); +ok( $? == 0, 'Mac UTF-16LE to Unix UTF-16' ); +system("$UNIX2DOS -v -b -u -n utf16u.txt out_dos.txt; cmp out_dos.txt utf16.txt"); +ok( $? == 0, 'Unix UTF-16 to DOS UTF-16LE' ); +system("$UNIX2MAC -v -b -u -n utf16u.txt out_mac.txt; cmp out_mac.txt utf16m.txt"); +ok( $? == 0, 'Unix UTF-16 to Mac UTF-16LE' ); + +$ENV{'LC_ALL'} = 'en_US.ISO-8859-1'; + +system("$DOS2UNIX -v -n utf16le.txt out_unix.txt"); +$result = ($? >> 8); +if ( $unix ) { $expected = 1; } else { $expected = 0 }; +print "UNIX" . $unix . "\n"; +print "EXP" . $expected . "\n"; +ok( $result == $expected, 'DOS UTF-16LE to Unix UTF-8, env is not UTF-8' ); diff --git a/test/utf16.txt b/test/utf16.txt Binary files differnew file mode 100755 index 0000000..868b5af --- /dev/null +++ b/test/utf16.txt diff --git a/test/utf16_be.txt b/test/utf16be.txt Binary files differindex 1be5db9..1be5db9 100755 --- a/test/utf16_be.txt +++ b/test/utf16be.txt diff --git a/test/utf16_be_nobom.txt b/test/utf16ben.txt Binary files differindex 2efd41a..2efd41a 100644 --- a/test/utf16_be_nobom.txt +++ b/test/utf16ben.txt diff --git a/test/utf16_le.txt b/test/utf16le.txt Binary files differindex db9f535..db9f535 100755 --- a/test/utf16_le.txt +++ b/test/utf16le.txt diff --git a/test/utf16_le_nobom.txt b/test/utf16len.txt Binary files differindex 1861149..1861149 100644 --- a/test/utf16_le_nobom.txt +++ b/test/utf16len.txt diff --git a/test/utf16m.txt b/test/utf16m.txt Binary files differnew file mode 100755 index 0000000..cd523f8 --- /dev/null +++ b/test/utf16m.txt diff --git a/test/utf16u.txt b/test/utf16u.txt Binary files differnew file mode 100755 index 0000000..2bd9b3b --- /dev/null +++ b/test/utf16u.txt diff --git a/test/utf8.t b/test/utf8.t new file mode 100755 index 0000000..454ed99 --- /dev/null +++ b/test/utf8.t @@ -0,0 +1,33 @@ +#!/usr/bin/perl + +# Requires perl-Test-Simple installation. +use Test::More tests => 6; + +$suffix = ""; +if (-e "../dos2unix.exe") { + $suffix = ".exe"; +} +$DOS2UNIX = "../dos2unix" . $suffix; +$MAC2UNIX = "../mac2unix" . $suffix; +$UNIX2DOS = "../unix2dos" . $suffix; +$UNIX2MAC = "../unix2mac" . $suffix; + +system("$DOS2UNIX -v -n dos_bom.txt out_unix.txt; cmp out_unix.txt unix.txt"); +ok( $? == 0, 'dos2unix removes BOM' ); + +system("$DOS2UNIX -v -b -n dos_bom.txt out_unix.txt; cmp out_unix.txt unix_bom.txt"); +ok( $? == 0, 'dos2unix -b keeps BOM' ); + +system("$DOS2UNIX -v -m -n dos.txt out_unix.txt; cmp out_unix.txt unix_bom.txt"); +ok( $? == 0, 'dos2unix -m adds BOM' ); + +system("$UNIX2DOS -v -n unix_bom.txt out_dos.txt; cmp out_dos.txt dos_bom.txt"); +ok( $? == 0, 'unix2dos keeps BOM' ); + +system("$UNIX2DOS -v -r -n unix_bom.txt out_dos.txt; cmp out_dos.txt dos.txt"); +ok( $? == 0, 'unix2dos -r removes BOM' ); + +system("$UNIX2DOS -v -m -n unix.txt out_dos.txt; cmp out_dos.txt dos_bom.txt"); +ok( $? == 0, 'unix2dos -m adds BOM' ); + + diff --git a/test/utf8dos.txt b/test/utf8dos.txt new file mode 100755 index 0000000..76afa4f --- /dev/null +++ b/test/utf8dos.txt @@ -0,0 +1,51 @@ +萨尔茨堡,奥地利共和国萨尔茨堡州的首府,2007年人口约15万,是继维也纳、格拉茨和林茨之后的奥地利第四大城市。萨尔茨堡位于奥地利的西部,是阿尔卑斯山脉的门庭,城市的建筑风格以巴洛克为主,城市的历史相当悠久,据史料记载,萨尔茨堡是现今奥地利管辖地域内历史最悠久的城市。萨尔茨堡是音乐天才莫扎特的出生地,莫扎特不到36年的短暂生命中超过一半的岁月是在萨尔茨堡度过的。萨尔茨堡也是指挥家赫伯特·冯·卡拉扬的故乡,电影《音乐之声》的拍摄地。萨尔茨堡老城在1996年被联合国教科文组织列入世界遗产名录。
+
+𝌆
+Unicode
+Scalar
+Value UTF-8 NCR
+U+2070E 𠜎 𠜎
+U+20731 𠜱 𠜱
+U+20779 𠝹 𠝹
+U+20C53 𠱓 𠱓
+U+20C78 𠱸 𠱸
+U+20C96 𠲖 𠲖
+U+20CCF 𠳏 𠳏
+U+20CD5 𠳕 𠳕
+U+20D15 𠴕 𠴕
+U+20D7C 𠵼 𠵼
+U+20D7F 𠵿 𠵿
+U+20E0E 𠸎 𠸎
+U+20E0F 𠸏 𠸏
+U+20E77 𠹷 𠹷
+U+20E9D 𠺝 𠺝
+U+20EA2 𠺢 𠺢
+U+20ED7 𠻗 𠻗
+U+20EF9 𠻹 𠻹
+U+20EFA 𠻺 𠻺
+U+20F2D 𠼭 𠼭
+U+20F2E 𠼮 𠼮
+U+20F4C 𠽌 𠽌
+U+20FB4 𠾴 𠾴
+U+20FBC 𠾼 𠾼
+U+20FEA 𠿪 𠿪
+U+2105C 𡁜 𡁜
+U+2106F 𡁯 𡁯
+U+21075 𡁵 𡁵
+U+21076 𡁶 𡁶
+U+2107B 𡁻 𡁻
+U+210C1 𡃁 𡃁
+U+210C9 𡃉 𡃉
+U+211D9 𡇙 𡇙
+U+220C7 𢃇 𢃇
+U+227B5 𢞵 𢞵
+U+22AD5 𢫕 𢫕
+U+22B43 𢭃 𢭃
+U+22BCA 𢯊 𢯊
+U+22C51 𢱑 𢱑
+U+22C55 𢱕 𢱕
+U+22CC2 𢳂 𢳂
+U+22D08 𢴈 𢴈
+U+22D4C 𢵌 𢵌
+U+22D67 𢵧 𢵧
+U+22EB3 𢺳 𢺳
\ No newline at end of file diff --git a/test/utf8dosn.txt b/test/utf8dosn.txt new file mode 100755 index 0000000..154aa88 --- /dev/null +++ b/test/utf8dosn.txt @@ -0,0 +1,51 @@ +萨尔茨堡,奥地利共和国萨尔茨堡州的首府,2007年人口约15万,是继维也纳、格拉茨和林茨之后的奥地利第四大城市。萨尔茨堡位于奥地利的西部,是阿尔卑斯山脉的门庭,城市的建筑风格以巴洛克为主,城市的历史相当悠久,据史料记载,萨尔茨堡是现今奥地利管辖地域内历史最悠久的城市。萨尔茨堡是音乐天才莫扎特的出生地,莫扎特不到36年的短暂生命中超过一半的岁月是在萨尔茨堡度过的。萨尔茨堡也是指挥家赫伯特·冯·卡拉扬的故乡,电影《音乐之声》的拍摄地。萨尔茨堡老城在1996年被联合国教科文组织列入世界遗产名录。
+
+𝌆
+Unicode
+Scalar
+Value UTF-8 NCR
+U+2070E 𠜎 𠜎
+U+20731 𠜱 𠜱
+U+20779 𠝹 𠝹
+U+20C53 𠱓 𠱓
+U+20C78 𠱸 𠱸
+U+20C96 𠲖 𠲖
+U+20CCF 𠳏 𠳏
+U+20CD5 𠳕 𠳕
+U+20D15 𠴕 𠴕
+U+20D7C 𠵼 𠵼
+U+20D7F 𠵿 𠵿
+U+20E0E 𠸎 𠸎
+U+20E0F 𠸏 𠸏
+U+20E77 𠹷 𠹷
+U+20E9D 𠺝 𠺝
+U+20EA2 𠺢 𠺢
+U+20ED7 𠻗 𠻗
+U+20EF9 𠻹 𠻹
+U+20EFA 𠻺 𠻺
+U+20F2D 𠼭 𠼭
+U+20F2E 𠼮 𠼮
+U+20F4C 𠽌 𠽌
+U+20FB4 𠾴 𠾴
+U+20FBC 𠾼 𠾼
+U+20FEA 𠿪 𠿪
+U+2105C 𡁜 𡁜
+U+2106F 𡁯 𡁯
+U+21075 𡁵 𡁵
+U+21076 𡁶 𡁶
+U+2107B 𡁻 𡁻
+U+210C1 𡃁 𡃁
+U+210C9 𡃉 𡃉
+U+211D9 𡇙 𡇙
+U+220C7 𢃇 𢃇
+U+227B5 𢞵 𢞵
+U+22AD5 𢫕 𢫕
+U+22B43 𢭃 𢭃
+U+22BCA 𢯊 𢯊
+U+22C51 𢱑 𢱑
+U+22C55 𢱕 𢱕
+U+22CC2 𢳂 𢳂
+U+22D08 𢴈 𢴈
+U+22D4C 𢵌 𢵌
+U+22D67 𢵧 𢵧
+U+22EB3 𢺳 𢺳
\ No newline at end of file diff --git a/test/utf8unix.txt b/test/utf8unix.txt new file mode 100755 index 0000000..8b93c0c --- /dev/null +++ b/test/utf8unix.txt @@ -0,0 +1,51 @@ +萨尔茨堡,奥地利共和国萨尔茨堡州的首府,2007年人口约15万,是继维也纳、格拉茨和林茨之后的奥地利第四大城市。萨尔茨堡位于奥地利的西部,是阿尔卑斯山脉的门庭,城市的建筑风格以巴洛克为主,城市的历史相当悠久,据史料记载,萨尔茨堡是现今奥地利管辖地域内历史最悠久的城市。萨尔茨堡是音乐天才莫扎特的出生地,莫扎特不到36年的短暂生命中超过一半的岁月是在萨尔茨堡度过的。萨尔茨堡也是指挥家赫伯特·冯·卡拉扬的故乡,电影《音乐之声》的拍摄地。萨尔茨堡老城在1996年被联合国教科文组织列入世界遗产名录。 + +𝌆 +Unicode +Scalar +Value UTF-8 NCR +U+2070E 𠜎 𠜎 +U+20731 𠜱 𠜱 +U+20779 𠝹 𠝹 +U+20C53 𠱓 𠱓 +U+20C78 𠱸 𠱸 +U+20C96 𠲖 𠲖 +U+20CCF 𠳏 𠳏 +U+20CD5 𠳕 𠳕 +U+20D15 𠴕 𠴕 +U+20D7C 𠵼 𠵼 +U+20D7F 𠵿 𠵿 +U+20E0E 𠸎 𠸎 +U+20E0F 𠸏 𠸏 +U+20E77 𠹷 𠹷 +U+20E9D 𠺝 𠺝 +U+20EA2 𠺢 𠺢 +U+20ED7 𠻗 𠻗 +U+20EF9 𠻹 𠻹 +U+20EFA 𠻺 𠻺 +U+20F2D 𠼭 𠼭 +U+20F2E 𠼮 𠼮 +U+20F4C 𠽌 𠽌 +U+20FB4 𠾴 𠾴 +U+20FBC 𠾼 𠾼 +U+20FEA 𠿪 𠿪 +U+2105C 𡁜 𡁜 +U+2106F 𡁯 𡁯 +U+21075 𡁵 𡁵 +U+21076 𡁶 𡁶 +U+2107B 𡁻 𡁻 +U+210C1 𡃁 𡃁 +U+210C9 𡃉 𡃉 +U+211D9 𡇙 𡇙 +U+220C7 𢃇 𢃇 +U+227B5 𢞵 𢞵 +U+22AD5 𢫕 𢫕 +U+22B43 𢭃 𢭃 +U+22BCA 𢯊 𢯊 +U+22C51 𢱑 𢱑 +U+22C55 𢱕 𢱕 +U+22CC2 𢳂 𢳂 +U+22D08 𢴈 𢴈 +U+22D4C 𢵌 𢵌 +U+22D67 𢵧 𢵧 +U+22EB3 𢺳 𢺳
\ No newline at end of file diff --git a/test/utf8unxb.txt b/test/utf8unxb.txt new file mode 100755 index 0000000..f670e9b --- /dev/null +++ b/test/utf8unxb.txt @@ -0,0 +1,51 @@ +萨尔茨堡,奥地利共和国萨尔茨堡州的首府,2007年人口约15万,是继维也纳、格拉茨和林茨之后的奥地利第四大城市。萨尔茨堡位于奥地利的西部,是阿尔卑斯山脉的门庭,城市的建筑风格以巴洛克为主,城市的历史相当悠久,据史料记载,萨尔茨堡是现今奥地利管辖地域内历史最悠久的城市。萨尔茨堡是音乐天才莫扎特的出生地,莫扎特不到36年的短暂生命中超过一半的岁月是在萨尔茨堡度过的。萨尔茨堡也是指挥家赫伯特·冯·卡拉扬的故乡,电影《音乐之声》的拍摄地。萨尔茨堡老城在1996年被联合国教科文组织列入世界遗产名录。 + +𝌆 +Unicode +Scalar +Value UTF-8 NCR +U+2070E 𠜎 𠜎 +U+20731 𠜱 𠜱 +U+20779 𠝹 𠝹 +U+20C53 𠱓 𠱓 +U+20C78 𠱸 𠱸 +U+20C96 𠲖 𠲖 +U+20CCF 𠳏 𠳏 +U+20CD5 𠳕 𠳕 +U+20D15 𠴕 𠴕 +U+20D7C 𠵼 𠵼 +U+20D7F 𠵿 𠵿 +U+20E0E 𠸎 𠸎 +U+20E0F 𠸏 𠸏 +U+20E77 𠹷 𠹷 +U+20E9D 𠺝 𠺝 +U+20EA2 𠺢 𠺢 +U+20ED7 𠻗 𠻗 +U+20EF9 𠻹 𠻹 +U+20EFA 𠻺 𠻺 +U+20F2D 𠼭 𠼭 +U+20F2E 𠼮 𠼮 +U+20F4C 𠽌 𠽌 +U+20FB4 𠾴 𠾴 +U+20FBC 𠾼 𠾼 +U+20FEA 𠿪 𠿪 +U+2105C 𡁜 𡁜 +U+2106F 𡁯 𡁯 +U+21075 𡁵 𡁵 +U+21076 𡁶 𡁶 +U+2107B 𡁻 𡁻 +U+210C1 𡃁 𡃁 +U+210C9 𡃉 𡃉 +U+211D9 𡇙 𡇙 +U+220C7 𢃇 𢃇 +U+227B5 𢞵 𢞵 +U+22AD5 𢫕 𢫕 +U+22B43 𢭃 𢭃 +U+22BCA 𢯊 𢯊 +U+22C51 𢱑 𢱑 +U+22C55 𢱕 𢱕 +U+22CC2 𢳂 𢳂 +U+22D08 𢴈 𢴈 +U+22D4C 𢵌 𢵌 +U+22D67 𢵧 𢵧 +U+22EB3 𢺳 𢺳
\ No newline at end of file |