diff options
author | DongHun Kwak <dh0128.kwak@samsung.com> | 2016-10-20 13:27:55 +0900 |
---|---|---|
committer | DongHun Kwak <dh0128.kwak@samsung.com> | 2016-10-20 13:27:58 +0900 |
commit | d4aefcf501a8b41f2a0d90c90d460251a26052a4 (patch) | |
tree | a2a288cf6ef41b7fd9aaaf19f0d9957afdc81354 /man/man1 | |
parent | 842ebc0fa15fca613e0c9fce10dd63fa09d27666 (diff) | |
download | dos2unix-d4aefcf501a8b41f2a0d90c90d460251a26052a4.tar.gz dos2unix-d4aefcf501a8b41f2a0d90c90d460251a26052a4.tar.bz2 dos2unix-d4aefcf501a8b41f2a0d90c90d460251a26052a4.zip |
Imported Upstream version 6.0.5
Change-Id: I8a6d17ecabae8b5ef78f76e88256f2635b481a1c
Signed-off-by: DongHun Kwak <dh0128.kwak@samsung.com>
Diffstat (limited to 'man/man1')
-rw-r--r-- | man/man1/Makefile | 88 | ||||
-rw-r--r-- | man/man1/dos2unix-man.pot | 1234 | ||||
-rw-r--r-- | man/man1/dos2unix.1 | 595 | ||||
-rw-r--r-- | man/man1/dos2unix.htm | 516 | ||||
-rw-r--r-- | man/man1/dos2unix.pod | 30 | ||||
-rw-r--r-- | man/man1/dos2unix.txt | 468 |
6 files changed, 2857 insertions, 74 deletions
diff --git a/man/man1/Makefile b/man/man1/Makefile index c542a86..33ff009 100644 --- a/man/man1/Makefile +++ b/man/man1/Makefile @@ -3,6 +3,7 @@ # Copyright information # # Copyright (C) 2010 Jari Aalto +# Copyright (C) 2010-2014 Erwin Waterlander # # License # @@ -48,6 +49,8 @@ ifneq (,) This makefile requires GNU Make. endif +.PRECIOUS: %.pod + # This variable *must* be set when calling PACKAGE ?= dos2unix @@ -64,79 +67,43 @@ MANPAGE = $(MANDEST)$(PACKAGE).$(MANSECT) POD2MAN = pod2man POD2MAN_FLAGS = -MAN_NONLATIN ?= 1 -PODFILES = $(wildcard ../*/man1/dos2unix.pod) +POFILES = $(wildcard ../*/man1/dos2unix.po) +PODFILES = $(patsubst %.po,%.pod,$(POFILES)) MAN_OBJECTS = dos2unix.1 $(patsubst %.pod,%.1,$(PODFILES)) -ifeq ($(MAN_NONLATIN),1) -PODFILES_NONLATIN = $(wildcard ../nonlatin/*/man1/dos2unix.pod) -MAN_OBJECTS_NONLATIN = dos2unix.1 $(patsubst %.pod,%.1,$(PODFILES_NONLATIN)) -endif -all: $(MAN_OBJECTS) $(MAN_OBJECTS_NONLATIN) +all: $(MAN_OBJECTS) dos2unix-man.pot -MAN_OBJECTS_NONLATIN = $(patsubst %.pod,%.1,$(PODFILES_NONLATIN)) -# The .pod files for Latin scripts are encoded in Latin-1 (ISO-8859-1/CP1252). -# UTF-8 man pages are not properly displayed in a Windows Command Prompt, -# therefore we keep them in Latin-1 format. +# Issues: # Cygwin 1.7 expects man pages to be in Latin-1 format. # The perl version of MinGW and DJGPP is 5.8.8. The pod2man command of perl -# 5.8.8 does not yet have the options -u, --utf8, and does support the =encoding -# command. +# 5.8.8 does not yet have the options -u, --utf8, and does support the +# =encoding command. The =encoding command and the --utf8 option are supported +# since perl 5.10.1. +# Erwin W. -# There are different *roff implementations. For now I assume we are using -# groff (GNU-roff) which is wide spread, default on Linux, Cygwin, MinGW, and -# DJGPP. The groff specific escape sequences may not work with other *roff -# implementations, but they display OK when used in a Windows Command Prompt -# using DJGPP's or MinGW's groff. Although sometimes characters are displayed -# without their diacritics. +# Dos2unix 6.0.5: Forget Latin-1, we do now only UTF-8. -# One day everything will be in UTF-8... -# For the English manual it all makes no difference, because the English text -# is plain ASCII. +dos2unix-man.pot : dos2unix.pod + po4a-updatepo -f pod -m $< -p $@ -%.1 : %.pod - # make target - create manual page from a *.pod page - podchecker $< - LC_CTYPE=C $(POD2MAN) $(POD2MAN_FLAGS) \ - --center="$(PODCENTER)" \ - --name="$(PACKAGE)" \ - --section="$(MANSECT)" \ - $< \ - | perl -p -e 's/[Pp]erl v[0-9.]+/$(PACKAGE)/;' \ - > $@ && \ - rm -f pod*.tmp -# fix for bug http://rt.perl.org/rt3//Public/Bug/Display.html?id=79410 -# "Pod2man creates wrong ROFF esc sequences for Latin-1 characters." -# Create groff (specific) escape sequences which work also on DOS/Windows. -# See also: https://rt.cpan.org/Public/Bug/Display.html?id=73804 - perl -pli.bak \ - -e s/A\\\\\\*\'/\\\\[\'A]/g\; \ - -e s/a\\\\\\*\'/\\\\[\'a]/g\; \ - -e s/E\\\\\\*\'/\\\\[\'E]/g\; \ - -e s/e\\\\\\*:/\\\\[:e]/g\; \ - -e s/e\\\\\\*\'/\\\\[\'e]/g\; \ - -e s/i\\\\\\*\'/\\\\[\'i]/g\; \ - -e s/n\\\\\\*~/\\\\[~n]/g\; \ - -e s/O\\\\\\*\'/\\\\[\'O]/g\; \ - -e s/o\\\\\\*\'/\\\\[\'o]/g\; \ - -e s/u\\\\\\*\'/\\\\[\'u]/g\; \ - $@ - - - -# The .pod files under the 'nonlatin' folder are encoded in UTF-8. -# For these manuals pod2man needs to support the =encoding command -# and the --utf8 option. This is supported since perl 5.10.1. -# Erwin W. +%.po : dos2unix.pod + po4a-updatepo -f pod -m $< -p $@ --msgmerge-opt --backup=numbered -nonlatin: $(MAN_OBJECTS_NONLATIN) +# Create pod files from po. +# Fix problem that =encoding is before =pod command. +%.pod : %.po + po4a-translate -f pod -m dos2unix.pod -p $< > $@ + sed -i '/=encoding UTF-8/d' $@ + sed -i 's/=pod/=pod\n\n=encoding UTF-8/' $@ +# An empty recipe for dos2unix.pod to break circular dependency. +dos2unix.pod : ; -../nonlatin/ru/%.1 : ../nonlatin/ru/%.pod +%.1 : %.pod # make target - create manual page from a *.pod page podchecker $< $(POD2MAN) $(POD2MAN_FLAGS) \ @@ -149,8 +116,11 @@ nonlatin: $(MAN_OBJECTS_NONLATIN) > $@ && \ rm -f pod*.tmp + + clean: rm -f $(MAN_OBJECTS) - rm -f $(MAN_OBJECTS_NONLATIN) + rm -f dos2unix-man.pot + rm -f $(PODFILES) # End of of Makefile part diff --git a/man/man1/dos2unix-man.pot b/man/man1/dos2unix-man.pot new file mode 100644 index 0000000..b2b2eee --- /dev/null +++ b/man/man1/dos2unix-man.pot @@ -0,0 +1,1234 @@ +# SOME DESCRIPTIVE TITLE +# Copyright (C) YEAR Free Software Foundation, Inc. +# This file is distributed under the same license as the PACKAGE package. +# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"POT-Creation-Date: 2014-04-17 22:37+0300\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n" +"Language-Team: LANGUAGE <LL@li.org>\n" +"Language: \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" + +#. type: =head1 +#: dos2unix.pod:52 +msgid "NAME" +msgstr "" + +#. type: textblock +#: dos2unix.pod:54 +msgid "dos2unix - DOS/Mac to Unix and vice versa text file format converter" +msgstr "" + +#. type: =head1 +#: dos2unix.pod:56 +msgid "SYNOPSIS" +msgstr "" + +#. type: verbatim +#: dos2unix.pod:58 +#, no-wrap +msgid "" +" dos2unix [options] [FILE ...] [-n INFILE OUTFILE ...]\n" +" unix2dos [options] [FILE ...] [-n INFILE OUTFILE ...]\n" +"\n" +msgstr "" + +#. type: =head1 +#: dos2unix.pod:61 +msgid "DESCRIPTION" +msgstr "" + +#. type: textblock +#: dos2unix.pod:63 +msgid "" +"The Dos2unix package includes utilities C<dos2unix> and C<unix2dos> to " +"convert plain text files in DOS or Mac format to Unix format and vice versa." +msgstr "" + +#. type: textblock +#: dos2unix.pod:66 +msgid "" +"In DOS/Windows text files a line break, also known as newline, is a " +"combination of two characters: a Carriage Return (CR) followed by a Line " +"Feed (LF). In Unix text files a line break is a single character: the Line " +"Feed (LF). In Mac text files, prior to Mac OS X, a line break was single " +"Carriage Return (CR) character. Nowadays Mac OS uses Unix style (LF) line " +"breaks." +msgstr "" + +#. type: textblock +#: dos2unix.pod:72 +msgid "" +"Besides line breaks Dos2unix can also convert the encoding of files. A few " +"DOS code pages can be converted to Unix Latin-1. And Windows Unicode " +"(UTF-16) files can be converted to Unix Unicode (UTF-8) files." +msgstr "" + +#. type: textblock +#: dos2unix.pod:76 +msgid "Binary files are automatically skipped, unless conversion is forced." +msgstr "" + +#. type: textblock +#: dos2unix.pod:78 +msgid "Non-regular files, such as directories and FIFOs, are automatically skipped." +msgstr "" + +#. type: textblock +#: dos2unix.pod:80 +msgid "" +"Symbolic links and their targets are by default kept untouched. Symbolic " +"links can optionally be replaced, or the output can be written to the " +"symbolic link target. Writing to a symbolic link target is not supported on " +"Windows." +msgstr "" + +#. type: textblock +#: dos2unix.pod:84 +msgid "" +"Dos2unix was modelled after dos2unix under SunOS/Solaris. There is one " +"important difference with the original SunOS/Solaris version. This version " +"does by default in-place conversion (old file mode), while the original " +"SunOS/Solaris version only supports paired conversion (new file mode). See " +"also options C<-o> and C<-n>." +msgstr "" + +#. type: =head1 +#: dos2unix.pod:90 +msgid "OPTIONS" +msgstr "" + +#. type: =item +#: dos2unix.pod:94 +msgid "B<-->" +msgstr "" + +#. type: textblock +#: dos2unix.pod:96 +msgid "" +"Treat all following options as file names. Use this option if you want to " +"convert files whose names start with a dash. For instance to convert a file " +"named \"-foo\", you can use this command:" +msgstr "" + +#. type: verbatim +#: dos2unix.pod:100 +#, no-wrap +msgid "" +" dos2unix -- -foo\n" +"\n" +msgstr "" + +#. type: textblock +#: dos2unix.pod:102 +msgid "Or in new file mode:" +msgstr "" + +#. type: verbatim +#: dos2unix.pod:104 +#, no-wrap +msgid "" +" dos2unix -n -- -foo out.txt\n" +"\n" +msgstr "" + +#. type: =item +#: dos2unix.pod:106 +msgid "B<-ascii>" +msgstr "" + +#. type: textblock +#: dos2unix.pod:108 +msgid "Convert only line breaks. This is the default conversion mode." +msgstr "" + +#. type: =item +#: dos2unix.pod:110 +msgid "B<-iso>" +msgstr "" + +#. type: textblock +#: dos2unix.pod:112 +msgid "" +"Conversion between DOS and ISO-8859-1 character set. See also section " +"CONVERSION MODES." +msgstr "" + +#. type: =item +#: dos2unix.pod:115 +msgid "B<-1252>" +msgstr "" + +#. type: textblock +#: dos2unix.pod:117 +msgid "Use Windows code page 1252 (Western European)." +msgstr "" + +#. type: =item +#: dos2unix.pod:119 +msgid "B<-437>" +msgstr "" + +#. type: textblock +#: dos2unix.pod:121 +msgid "" +"Use DOS code page 437 (US). This is the default code page used for ISO " +"conversion." +msgstr "" + +#. type: =item +#: dos2unix.pod:123 +msgid "B<-850>" +msgstr "" + +#. type: textblock +#: dos2unix.pod:125 +msgid "Use DOS code page 850 (Western European)." +msgstr "" + +#. type: =item +#: dos2unix.pod:127 +msgid "B<-860>" +msgstr "" + +#. type: textblock +#: dos2unix.pod:129 +msgid "Use DOS code page 860 (Portuguese)." +msgstr "" + +#. type: =item +#: dos2unix.pod:131 +msgid "B<-863>" +msgstr "" + +#. type: textblock +#: dos2unix.pod:133 +msgid "Use DOS code page 863 (French Canadian)." +msgstr "" + +#. type: =item +#: dos2unix.pod:135 +msgid "B<-865>" +msgstr "" + +#. type: textblock +#: dos2unix.pod:137 +msgid "Use DOS code page 865 (Nordic)." +msgstr "" + +#. type: =item +#: dos2unix.pod:139 +msgid "B<-7>" +msgstr "" + +#. type: textblock +#: dos2unix.pod:141 +msgid "Convert 8 bit characters to 7 bit space." +msgstr "" + +#. type: =item +#: dos2unix.pod:143 +msgid "B<-c, --convmode CONVMODE>" +msgstr "" + +#. type: textblock +#: dos2unix.pod:145 +msgid "" +"Set conversion mode. Where CONVMODE is one of: I<ascii>, I<7bit>, I<iso>, " +"I<mac> with ascii being the default." +msgstr "" + +#. type: =item +#: dos2unix.pod:149 +msgid "B<-f, --force>" +msgstr "" + +#. type: textblock +#: dos2unix.pod:151 +msgid "Force conversion of binary files." +msgstr "" + +#. type: =item +#: dos2unix.pod:153 +msgid "B<-h, --help>" +msgstr "" + +#. type: textblock +#: dos2unix.pod:155 +msgid "Display help and exit." +msgstr "" + +#. type: =item +#: dos2unix.pod:157 +msgid "B<-k, --keepdate>" +msgstr "" + +#. type: textblock +#: dos2unix.pod:159 +msgid "Keep the date stamp of output file same as input file." +msgstr "" + +#. type: =item +#: dos2unix.pod:161 +msgid "B<-L, --license>" +msgstr "" + +#. type: textblock +#: dos2unix.pod:163 +msgid "Display program's license." +msgstr "" + +#. type: =item +#: dos2unix.pod:165 +msgid "B<-l, --newline>" +msgstr "" + +#. type: textblock +#: dos2unix.pod:167 +msgid "Add additional newline." +msgstr "" + +#. type: textblock +#: dos2unix.pod:169 +msgid "" +"B<dos2unix>: Only DOS line breaks are changed to two Unix line breaks. In " +"Mac mode only Mac line breaks are changed to two Unix line breaks." +msgstr "" + +#. type: textblock +#: dos2unix.pod:173 +msgid "" +"B<unix2dos>: Only Unix line breaks are changed to two DOS line breaks. In " +"Mac mode Unix line breaks are changed to two Mac line breaks." +msgstr "" + +#. type: =item +#: dos2unix.pod:176 +msgid "B<-m, --add-bom>" +msgstr "" + +#. type: textblock +#: dos2unix.pod:178 +msgid "" +"Write an UTF-8 Byte Order Mark in the output file. Never use this option " +"when the output encoding is other than UTF-8. See also section UNICODE." +msgstr "" + +#. type: =item +#: dos2unix.pod:181 +msgid "B<-n, --newfile INFILE OUTFILE ...>" +msgstr "" + +#. type: textblock +#: dos2unix.pod:183 +msgid "" +"New file mode. Convert file INFILE and write output to file OUTFILE. File " +"names must be given in pairs and wildcard names should I<not> be used or you " +"I<will> lose your files." +msgstr "" + +#. type: textblock +#: dos2unix.pod:187 +msgid "" +"The person who starts the conversion in new file (paired) mode will be the " +"owner of the converted file. The read/write permissions of the new file will " +"be the permissions of the original file minus the umask(1) of the person who " +"runs the conversion." +msgstr "" + +#. type: =item +#: dos2unix.pod:192 +msgid "B<-o, --oldfile FILE ...>" +msgstr "" + +#. type: textblock +#: dos2unix.pod:194 +msgid "" +"Old file mode. Convert file FILE and overwrite output to it. The program " +"defaults to run in this mode. Wildcard names may be used." +msgstr "" + +#. type: textblock +#: dos2unix.pod:197 +msgid "" +"In old file (in-place) mode the converted file gets the same owner, group, " +"and read/write permissions as the original file. Also when the file is " +"converted by another user who has write permissions on the file (e.g. user " +"root). The conversion will be aborted when it is not possible to preserve " +"the original values. Change of owner could mean that the original owner is " +"not able to read the file any more. Change of group could be a security " +"risk, the file could be made readable for persons for whom it is not " +"intended. Preservation of owner, group, and read/write permissions is only " +"supported on Unix." +msgstr "" + +#. type: =item +#: dos2unix.pod:206 +msgid "B<-q, --quiet>" +msgstr "" + +#. type: textblock +#: dos2unix.pod:208 +msgid "" +"Quiet mode. Suppress all warnings and messages. The return value is zero. " +"Except when wrong command-line options are used." +msgstr "" + +#. type: =item +#: dos2unix.pod:211 +msgid "B<-s, --safe>" +msgstr "" + +#. type: textblock +#: dos2unix.pod:213 +msgid "Skip binary files (default)." +msgstr "" + +#. type: =item +#: dos2unix.pod:215 +msgid "B<-ul, --assume-utf16le>" +msgstr "" + +#. type: textblock +#: dos2unix.pod:217 +msgid "Assume that the input file format is UTF-16LE." +msgstr "" + +#. type: textblock +#: dos2unix.pod:219 +msgid "" +"When there is a Byte Order Mark in the input file the BOM has priority over " +"this option." +msgstr "" + +#. type: textblock +#: dos2unix.pod:222 +msgid "" +"When you made a wrong assumption (the input file was not in UTF-16LE format) " +"and the conversion succeeded, you will get an UTF-8 output file with wrong " +"text. You can undo the wrong conversion with iconv(1) by converting the " +"UTF-8 output file back to UTF-16LE. This will bring back the original file." +msgstr "" + +#. type: textblock +#: dos2unix.pod:227 +msgid "" +"The assumption of UTF-16LE works as a I<conversion mode>. By switching to " +"the default I<ascii> mode the UTF-16LE assumption is turned off." +msgstr "" + +#. type: =item +#: dos2unix.pod:230 +msgid "B<-ub, --assume-utf16be>" +msgstr "" + +#. type: textblock +#: dos2unix.pod:232 +msgid "Assume that the input file format is UTF-16BE." +msgstr "" + +#. type: textblock +#: dos2unix.pod:234 +msgid "This option works the same as option C<-ul>." +msgstr "" + +#. type: =item +#: dos2unix.pod:236 +msgid "B<-F, --follow-symlink>" +msgstr "" + +#. type: textblock +#: dos2unix.pod:238 +msgid "Follow symbolic links and convert the targets." +msgstr "" + +#. type: =item +#: dos2unix.pod:240 +msgid "B<-R, --replace-symlink>" +msgstr "" + +#. type: textblock +#: dos2unix.pod:242 +msgid "" +"Replace symbolic links with converted files (original target files remain " +"unchanged)." +msgstr "" + +#. type: =item +#: dos2unix.pod:245 +msgid "B<-S, --skip-symlink>" +msgstr "" + +#. type: textblock +#: dos2unix.pod:247 +msgid "Keep symbolic links and targets unchanged (default)." +msgstr "" + +#. type: =item +#: dos2unix.pod:249 +msgid "B<-V, --version>" +msgstr "" + +#. type: textblock +#: dos2unix.pod:251 +msgid "Display version information and exit." +msgstr "" + +#. type: =head1 +#: dos2unix.pod:255 +msgid "MAC MODE" +msgstr "" + +#. type: textblock +#: dos2unix.pod:257 +msgid "" +"In normal mode line breaks are converted from DOS to Unix and vice versa. " +"Mac line breaks are not converted." +msgstr "" + +#. type: textblock +#: dos2unix.pod:260 +msgid "" +"In Mac mode line breaks are converted from Mac to Unix and vice versa. DOS " +"line breaks are not changed." +msgstr "" + +#. type: textblock +#: dos2unix.pod:263 +msgid "" +"To run in Mac mode use the command-line option C<-c mac> or use the commands " +"C<mac2unix> or C<unix2mac>." +msgstr "" + +#. type: =head1 +#: dos2unix.pod:266 +msgid "CONVERSION MODES" +msgstr "" + +#. type: =item +#: dos2unix.pod:270 +msgid "B<ascii>" +msgstr "" + +#. type: textblock +#: dos2unix.pod:272 +msgid "" +"In mode C<ascii> only line breaks are converted. This is the default " +"conversion mode." +msgstr "" + +#. type: textblock +#: dos2unix.pod:275 +msgid "" +"Although the name of this mode is ASCII, which is a 7 bit standard, the " +"actual mode is 8 bit. Use always this mode when converting Unicode UTF-8 " +"files." +msgstr "" + +#. type: =item +#: dos2unix.pod:279 +msgid "B<7bit>" +msgstr "" + +#. type: textblock +#: dos2unix.pod:281 +msgid "" +"In this mode all 8 bit non-ASCII characters (with values from 128 to 255) " +"are converted to a 7 bit space." +msgstr "" + +#. type: =item +#: dos2unix.pod:284 +msgid "B<iso>" +msgstr "" + +#. type: textblock +#: dos2unix.pod:286 +msgid "" +"Characters are converted between a DOS character set (code page) and ISO " +"character set ISO-8859-1 (Latin-1) on Unix. DOS characters without " +"ISO-8859-1 equivalent, for which conversion is not possible, are converted " +"to a dot. The same counts for ISO-8859-1 characters without DOS counterpart." +msgstr "" + +#. type: textblock +#: dos2unix.pod:291 +msgid "" +"When only option C<-iso> is used dos2unix will try to determine the active " +"code page. When this is not possible dos2unix will use default code page " +"CP437, which is mainly used in the USA. To force a specific code page use " +"options C<-437> (US), C<-850> (Western European), C<-860> (Portuguese), " +"C<-863> (French Canadian), or C<-865> (Nordic). Windows code page CP1252 " +"(Western European) is also supported with option C<-1252>. For other code " +"pages use dos2unix in combination with iconv(1). Iconv can convert between " +"a long list of character encodings." +msgstr "" + +#. type: textblock +#: dos2unix.pod:300 +msgid "" +"Never use ISO conversion on Unicode text files. It will corrupt UTF-8 " +"encoded files." +msgstr "" + +#. type: textblock +#: dos2unix.pod:302 +msgid "Some examples:" +msgstr "" + +#. type: textblock +#: dos2unix.pod:304 +msgid "Convert from DOS default code page to Unix Latin-1" +msgstr "" + +#. type: verbatim +#: dos2unix.pod:306 +#, no-wrap +msgid "" +" dos2unix -iso -n in.txt out.txt\n" +"\n" +msgstr "" + +#. type: textblock +#: dos2unix.pod:308 +msgid "Convert from DOS CP850 to Unix Latin-1" +msgstr "" + +#. type: verbatim +#: dos2unix.pod:310 +#, no-wrap +msgid "" +" dos2unix -850 -n in.txt out.txt\n" +"\n" +msgstr "" + +#. type: textblock +#: dos2unix.pod:312 +msgid "Convert from Windows CP1252 to Unix Latin-1" +msgstr "" + +#. type: verbatim +#: dos2unix.pod:314 +#, no-wrap +msgid "" +" dos2unix -1252 -n in.txt out.txt\n" +"\n" +msgstr "" + +#. type: textblock +#: dos2unix.pod:316 +msgid "Convert from Windows CP1252 to Unix UTF-8 (Unicode)" +msgstr "" + +#. type: verbatim +#: dos2unix.pod:318 +#, no-wrap +msgid "" +" iconv -f CP1252 -t UTF-8 in.txt | dos2unix > out.txt\n" +"\n" +msgstr "" + +#. type: textblock +#: dos2unix.pod:320 +msgid "Convert from Unix Latin-1 to DOS default code page" +msgstr "" + +#. type: verbatim +#: dos2unix.pod:322 +#, no-wrap +msgid "" +" unix2dos -iso -n in.txt out.txt\n" +"\n" +msgstr "" + +#. type: textblock +#: dos2unix.pod:324 +msgid "Convert from Unix Latin-1 to DOS CP850" +msgstr "" + +#. type: verbatim +#: dos2unix.pod:326 +#, no-wrap +msgid "" +" unix2dos -850 -n in.txt out.txt\n" +"\n" +msgstr "" + +#. type: textblock +#: dos2unix.pod:328 +msgid "Convert from Unix Latin-1 to Windows CP1252" +msgstr "" + +#. type: verbatim +#: dos2unix.pod:330 +#, no-wrap +msgid "" +" unix2dos -1252 -n in.txt out.txt\n" +"\n" +msgstr "" + +#. type: textblock +#: dos2unix.pod:332 +msgid "Convert from Unix UTF-8 (Unicode) to Windows CP1252" +msgstr "" + +#. type: verbatim +#: dos2unix.pod:334 +#, no-wrap +msgid "" +" unix2dos < in.txt | iconv -f UTF-8 -t CP1252 > out.txt\n" +"\n" +msgstr "" + +#. type: textblock +#: dos2unix.pod:336 +msgid "" +"See also L<http://czyborra.com/charsets/codepages.html> and " +"L<http://czyborra.com/charsets/iso8859.html>." +msgstr "" + +#. type: =head1 +#: dos2unix.pod:341 +msgid "UNICODE" +msgstr "" + +#. type: =head2 +#: dos2unix.pod:343 +msgid "Encodings" +msgstr "" + +#. type: textblock +#: dos2unix.pod:345 +msgid "" +"There exist different Unicode encodings. On Unix and Linux Unicode files are " +"typically encoded in UTF-8 encoding. On Windows Unicode text files can be " +"encoded in UTF-8, UTF-16, or UTF-16 big endian, but are mostly encoded in " +"UTF-16 format." +msgstr "" + +#. type: =head2 +#: dos2unix.pod:350 +msgid "Conversion" +msgstr "" + +#. type: textblock +#: dos2unix.pod:352 +msgid "" +"Unicode text files can have DOS, Unix or Mac line breaks, like regular text " +"files." +msgstr "" + +#. type: textblock +#: dos2unix.pod:355 +msgid "" +"All versions of dos2unix and unix2dos can convert UTF-8 encoded files, " +"because UTF-8 was designed for backward compatibility with ASCII." +msgstr "" + +#. type: textblock +#: dos2unix.pod:358 +msgid "" +"Dos2unix and unix2dos with Unicode UTF-16 support, can read little and big " +"endian UTF-16 encoded text files. To see if dos2unix was built with UTF-16 " +"support type C<dos2unix -V>." +msgstr "" + +#. type: textblock +#: dos2unix.pod:362 +msgid "" +"The Windows versions of dos2unix and unix2dos convert UTF-16 encoded files " +"always to UTF-8 encoded files. Unix versions of dos2unix/unix2dos convert " +"UTF-16 encoded files to the locale character encoding when it is set to " +"UTF-8. Use the locale(1) command to find out what the locale character " +"encoding is." +msgstr "" + +#. type: textblock +#: dos2unix.pod:367 +msgid "" +"Because UTF-8 formatted text files are well supported on both Windows and " +"Unix, dos2unix and unix2dos have no option to write UTF-16 files. All UTF-16 " +"characters can be encoded in UTF-8. Conversion from UTF-16 to UTF-8 is " +"without loss. UTF-16 files will be skipped on Unix when the locale character " +"encoding is not UTF-8, to prevent accidental loss of text. When an UTF-16 to " +"UTF-8 conversion error occurs, for instance when the UTF-16 input file " +"contains an error, the file will be skipped." +msgstr "" + +#. type: textblock +#: dos2unix.pod:375 +msgid "ISO and 7-bit mode conversion do not work on UTF-16 files." +msgstr "" + +#. type: =head2 +#: dos2unix.pod:377 +msgid "Byte Order Mark" +msgstr "" + +#. type: textblock +#: dos2unix.pod:379 +msgid "" +"On Windows Unicode text files typically have a Byte Order Mark (BOM), " +"because many Windows programs (including Notepad) add BOMs by default. See " +"also L<http://en.wikipedia.org/wiki/Byte_order_mark>." +msgstr "" + +#. type: textblock +#: dos2unix.pod:383 +msgid "" +"On Unix Unicode files typically don't have a BOM. It is assumed that text " +"files are encoded in the locale character encoding." +msgstr "" + +#. type: textblock +#: dos2unix.pod:386 +msgid "" +"Dos2unix can only detect if a file is in UTF-16 format if the file has a " +"BOM. When an UTF-16 file doesn't have a BOM, dos2unix will see the file as " +"a binary file." +msgstr "" + +#. type: textblock +#: dos2unix.pod:390 +msgid "Use option C<-ul> or C<-ub> to convert an UTF-16 file without BOM." +msgstr "" + +#. type: textblock +#: dos2unix.pod:392 +msgid "Dos2unix never writes a BOM in the output file, unless you use option C<-m>." +msgstr "" + +#. type: textblock +#: dos2unix.pod:394 +msgid "" +"Unix2dos writes a BOM in the output file when the input file has a BOM, or " +"when option C<-m> is used." +msgstr "" + +#. type: =head2 +#: dos2unix.pod:397 +msgid "Unicode examples" +msgstr "" + +#. type: textblock +#: dos2unix.pod:399 +msgid "Convert from Windows UTF-16 (with BOM) to Unix UTF-8" +msgstr "" + +#. type: verbatim +#: dos2unix.pod:401 +#, no-wrap +msgid "" +" dos2unix -n in.txt out.txt\n" +"\n" +msgstr "" + +#. type: textblock +#: dos2unix.pod:403 +msgid "Convert from Windows UTF-16LE (without BOM) to Unix UTF-8" +msgstr "" + +#. type: verbatim +#: dos2unix.pod:405 +#, no-wrap +msgid "" +" dos2unix -ul -n in.txt out.txt\n" +"\n" +msgstr "" + +#. type: textblock +#: dos2unix.pod:407 +msgid "Convert from Unix UTF-8 to Windows UTF-8 with BOM" +msgstr "" + +#. type: verbatim +#: dos2unix.pod:409 +#, no-wrap +msgid "" +" unix2dos -m -n in.txt out.txt\n" +"\n" +msgstr "" + +#. type: textblock +#: dos2unix.pod:411 +msgid "Convert from Unix UTF-8 to Windows UTF-16" +msgstr "" + +#. type: verbatim +#: dos2unix.pod:413 +#, no-wrap +msgid "" +" unix2dos < in.txt | iconv -f UTF-8 -t UTF-16 > out.txt\n" +"\n" +msgstr "" + +#. type: =head1 +#: dos2unix.pod:415 +msgid "EXAMPLES" +msgstr "" + +#. type: textblock +#: dos2unix.pod:417 +msgid "Read input from 'stdin' and write output to 'stdout'." +msgstr "" + +#. type: verbatim +#: dos2unix.pod:419 +#, no-wrap +msgid "" +" dos2unix\n" +" dos2unix -l -c mac\n" +"\n" +msgstr "" + +#. type: textblock +#: dos2unix.pod:422 +msgid "Convert and replace a.txt. Convert and replace b.txt." +msgstr "" + +#. type: verbatim +#: dos2unix.pod:424 +#, no-wrap +msgid "" +" dos2unix a.txt b.txt\n" +" dos2unix -o a.txt b.txt\n" +"\n" +msgstr "" + +#. type: textblock +#: dos2unix.pod:427 +msgid "Convert and replace a.txt in ascii conversion mode." +msgstr "" + +#. type: verbatim +#: dos2unix.pod:429 +#, no-wrap +msgid "" +" dos2unix a.txt\n" +"\n" +msgstr "" + +#. type: textblock +#: dos2unix.pod:431 +msgid "" +"Convert and replace a.txt in ascii conversion mode. Convert and replace " +"b.txt in 7bit conversion mode." +msgstr "" + +#. type: verbatim +#: dos2unix.pod:434 +#, no-wrap +msgid "" +" dos2unix a.txt -c 7bit b.txt\n" +" dos2unix -c ascii a.txt -c 7bit b.txt\n" +" dos2unix -ascii a.txt -7 b.txt\n" +"\n" +msgstr "" + +#. type: textblock +#: dos2unix.pod:438 +msgid "Convert a.txt from Mac to Unix format." +msgstr "" + +#. type: verbatim +#: dos2unix.pod:440 +#, no-wrap +msgid "" +" dos2unix -c mac a.txt\n" +" mac2unix a.txt\n" +"\n" +msgstr "" + +#. type: textblock +#: dos2unix.pod:443 +msgid "Convert a.txt from Unix to Mac format." +msgstr "" + +#. type: verbatim +#: dos2unix.pod:445 +#, no-wrap +msgid "" +" unix2dos -c mac a.txt\n" +" unix2mac a.txt\n" +"\n" +msgstr "" + +#. type: textblock +#: dos2unix.pod:448 +msgid "Convert and replace a.txt while keeping original date stamp." +msgstr "" + +#. type: verbatim +#: dos2unix.pod:450 +#, no-wrap +msgid "" +" dos2unix -k a.txt\n" +" dos2unix -k -o a.txt\n" +"\n" +msgstr "" + +#. type: textblock +#: dos2unix.pod:453 +msgid "Convert a.txt and write to e.txt." +msgstr "" + +#. type: verbatim +#: dos2unix.pod:455 +#, no-wrap +msgid "" +" dos2unix -n a.txt e.txt\n" +"\n" +msgstr "" + +#. type: textblock +#: dos2unix.pod:457 +msgid "Convert a.txt and write to e.txt, keep date stamp of e.txt same as a.txt." +msgstr "" + +#. type: verbatim +#: dos2unix.pod:459 +#, no-wrap +msgid "" +" dos2unix -k -n a.txt e.txt\n" +"\n" +msgstr "" + +#. type: textblock +#: dos2unix.pod:461 +msgid "Convert and replace a.txt. Convert b.txt and write to e.txt." +msgstr "" + +#. type: verbatim +#: dos2unix.pod:463 +#, no-wrap +msgid "" +" dos2unix a.txt -n b.txt e.txt\n" +" dos2unix -o a.txt -n b.txt e.txt\n" +"\n" +msgstr "" + +#. type: textblock +#: dos2unix.pod:466 +msgid "" +"Convert c.txt and write to e.txt. Convert and replace a.txt. Convert and " +"replace b.txt. Convert d.txt and write to f.txt." +msgstr "" + +#. type: verbatim +#: dos2unix.pod:469 +#, no-wrap +msgid "" +" dos2unix -n c.txt e.txt -o a.txt b.txt -n d.txt f.txt\n" +"\n" +msgstr "" + +#. type: =head1 +#: dos2unix.pod:471 +msgid "RECURSIVE CONVERSION" +msgstr "" + +#. type: textblock +#: dos2unix.pod:473 +msgid "" +"Use dos2unix in combination with the find(1) and xargs(1) commands to " +"recursively convert text files in a directory tree structure. For instance " +"to convert all .txt files in the directory tree under the current directory " +"type:" +msgstr "" + +#. type: verbatim +#: dos2unix.pod:477 +#, no-wrap +msgid "" +" find . -name *.txt |xargs dos2unix\n" +"\n" +msgstr "" + +#. type: =head1 +#: dos2unix.pod:479 +msgid "LOCALIZATION" +msgstr "" + +#. type: =item +#: dos2unix.pod:483 +msgid "B<LANG>" +msgstr "" + +#. type: textblock +#: dos2unix.pod:485 +msgid "" +"The primary language is selected with the environment variable LANG. The " +"LANG variable consists out of several parts. The first part is in small " +"letters the language code. The second is optional and is the country code in " +"capital letters, preceded with an underscore. There is also an optional " +"third part: character encoding, preceded with a dot. A few examples for " +"POSIX standard type shells:" +msgstr "" + +#. type: verbatim +#: dos2unix.pod:492 +#, no-wrap +msgid "" +" export LANG=nl Dutch\n" +" export LANG=nl_NL Dutch, The Netherlands\n" +" export LANG=nl_BE Dutch, Belgium\n" +" export LANG=es_ES Spanish, Spain\n" +" export LANG=es_MX Spanish, Mexico\n" +" export LANG=en_US.iso88591 English, USA, Latin-1 encoding\n" +" export LANG=en_GB.UTF-8 English, UK, UTF-8 encoding\n" +"\n" +msgstr "" + +#. type: textblock +#: dos2unix.pod:500 +msgid "" +"For a complete list of language and country codes see the gettext manual: " +"L<http://www.gnu.org/software/gettext/manual/gettext.html#Language-Codes>" +msgstr "" + +#. type: textblock +#: dos2unix.pod:503 +msgid "" +"On Unix systems you can use to command locale(1) to get locale specific " +"information." +msgstr "" + +#. type: =item +#: dos2unix.pod:506 +msgid "B<LANGUAGE>" +msgstr "" + +#. type: textblock +#: dos2unix.pod:508 +msgid "" +"With the LANGUAGE environment variable you can specify a priority list of " +"languages, separated by colons. Dos2unix gives preference to LANGUAGE over " +"LANG. For instance, first Dutch and then German: C<LANGUAGE=nl:de>. You " +"have to first enable localization, by setting LANG (or LC_ALL) to a value " +"other than \"C\", before you can use a language priority list through the " +"LANGUAGE variable. See also the gettext manual: " +"L<http://www.gnu.org/software/gettext/manual/gettext.html#The-LANGUAGE-variable>" +msgstr "" + +#. type: textblock +#: dos2unix.pod:516 +msgid "" +"If you select a language which is not available you will get the standard " +"English messages." +msgstr "" + +#. type: =item +#: dos2unix.pod:520 +msgid "B<DOS2UNIX_LOCALEDIR>" +msgstr "" + +#. type: textblock +#: dos2unix.pod:522 +msgid "" +"With the environment variable DOS2UNIX_LOCALEDIR the LOCALEDIR set during " +"compilation can be overruled. LOCALEDIR is used to find the language " +"files. The GNU default value is C</usr/local/share/locale>. Option " +"B<--version> will display the LOCALEDIR that is used." +msgstr "" + +#. type: textblock +#: dos2unix.pod:527 +msgid "Example (POSIX shell):" +msgstr "" + +#. type: verbatim +#: dos2unix.pod:529 +#, no-wrap +msgid "" +" export DOS2UNIX_LOCALEDIR=$HOME/share/locale\n" +"\n" +msgstr "" + +#. type: =head1 +#: dos2unix.pod:534 +msgid "RETURN VALUE" +msgstr "" + +#. type: textblock +#: dos2unix.pod:536 +msgid "" +"On success, zero is returned. When a system error occurs the last system " +"error will be returned. For other errors 1 is returned." +msgstr "" + +#. type: textblock +#: dos2unix.pod:539 +msgid "" +"The return value is always zero in quiet mode, except when wrong " +"command-line options are used." +msgstr "" + +#. type: =head1 +#: dos2unix.pod:542 +msgid "STANDARDS" +msgstr "" + +#. type: textblock +#: dos2unix.pod:544 +msgid "L<http://en.wikipedia.org/wiki/Text_file>" +msgstr "" + +#. type: textblock +#: dos2unix.pod:546 +msgid "L<http://en.wikipedia.org/wiki/Carriage_return>" +msgstr "" + +#. type: textblock +#: dos2unix.pod:548 +msgid "L<http://en.wikipedia.org/wiki/Newline>" +msgstr "" + +#. type: textblock +#: dos2unix.pod:550 +msgid "L<http://en.wikipedia.org/wiki/Unicode>" +msgstr "" + +#. type: =head1 +#: dos2unix.pod:552 +msgid "AUTHORS" +msgstr "" + +#. type: textblock +#: dos2unix.pod:554 +msgid "" +"Benjamin Lin - <blin@socs.uts.edu.au> Bernd Johannes Wuebben (mac2unix mode) " +"- <wuebben@kde.org>, Christian Wurll (add extra newline) - " +"<wurll@ira.uka.de>, Erwin Waterlander - <waterlan@xs4all.nl> (Maintainer)" +msgstr "" + +#. type: textblock +#: dos2unix.pod:559 +msgid "Project page: L<http://waterlan.home.xs4all.nl/dos2unix.html>" +msgstr "" + +#. type: textblock +#: dos2unix.pod:561 +msgid "SourceForge page: L<http://sourceforge.net/projects/dos2unix/>" +msgstr "" + +#. type: textblock +#: dos2unix.pod:563 +msgid "Freecode: L<http://freecode.com/projects/dos2unix>" +msgstr "" + +#. type: =head1 +#: dos2unix.pod:565 +msgid "SEE ALSO" +msgstr "" + +#. type: textblock +#: dos2unix.pod:567 +msgid "file(1) find(1) iconv(1) locale(1) xargs(1)" +msgstr "" diff --git a/man/man1/dos2unix.1 b/man/man1/dos2unix.1 new file mode 100644 index 0000000..6ff221d --- /dev/null +++ b/man/man1/dos2unix.1 @@ -0,0 +1,595 @@ +.\" Automatically generated by Pod::Man 2.27 (Pod::Simple 3.20) +.\" +.\" Standard preamble: +.\" ======================================================================== +.de Sp \" Vertical space (when we can't use .PP) +.if t .sp .5v +.if n .sp +.. +.de Vb \" Begin verbatim text +.ft CW +.nf +.ne \\$1 +.. +.de Ve \" End verbatim text +.ft R +.fi +.. +.\" Set up some character translations and predefined strings. \*(-- will +.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left +.\" double quote, and \*(R" will give a right double quote. \*(C+ will +.\" give a nicer C++. Capital omega is used to do unbreakable dashes and +.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, +.\" nothing in troff, for use with C<>. +.tr \(*W- +.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' +.ie n \{\ +. ds -- \(*W- +. ds PI pi +. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch +. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch +. ds L" "" +. ds R" "" +. ds C` "" +. ds C' "" +'br\} +.el\{\ +. ds -- \|\(em\| +. ds PI \(*p +. ds L" `` +. ds R" '' +. ds C` +. ds C' +'br\} +.\" +.\" Escape single quotes in literal strings from groff's Unicode transform. +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.\" +.\" If the F register is turned on, we'll generate index entries on stderr for +.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index +.\" entries marked with X<> in POD. Of course, you'll have to process the +.\" output yourself in some meaningful fashion. +.\" +.\" Avoid warning from groff about undefined register 'F'. +.de IX +.. +.nr rF 0 +.if \n(.g .if rF .nr rF 1 +.if (\n(rF:(\n(.g==0)) \{ +. if \nF \{ +. de IX +. tm Index:\\$1\t\\n%\t"\\$2" +.. +. if !\nF==2 \{ +. nr % 0 +. nr F 2 +. \} +. \} +.\} +.rr rF +.\" ======================================================================== +.\" +.IX Title "dos2unix 1" +.TH dos2unix 1 "2014-03-21" "dos2unix" "2014-04-17" +.\" For nroff, turn off justification. Always turn off hyphenation; it makes +.\" way too many mistakes in technical documents. +.if n .ad l +.nh +.SH "NAME" +dos2unix \- DOS/Mac to Unix and vice versa text file format converter +.SH "SYNOPSIS" +.IX Header "SYNOPSIS" +.Vb 2 +\& dos2unix [options] [FILE ...] [\-n INFILE OUTFILE ...] +\& unix2dos [options] [FILE ...] [\-n INFILE OUTFILE ...] +.Ve +.SH "DESCRIPTION" +.IX Header "DESCRIPTION" +The Dos2unix package includes utilities \f(CW\*(C`dos2unix\*(C'\fR and \f(CW\*(C`unix2dos\*(C'\fR to convert +plain text files in \s-1DOS\s0 or Mac format to Unix format and vice versa. +.PP +In DOS/Windows text files a line break, also known as newline, is a combination +of two characters: a Carriage Return (\s-1CR\s0) followed by a Line Feed (\s-1LF\s0). In Unix +text files a line break is a single character: the Line Feed (\s-1LF\s0). In Mac text +files, prior to Mac \s-1OS X,\s0 a line break was single Carriage Return (\s-1CR\s0) +character. Nowadays Mac \s-1OS\s0 uses Unix style (\s-1LF\s0) line breaks. +.PP +Besides line breaks Dos2unix can also convert the encoding of files. A few +\&\s-1DOS\s0 code pages can be converted to Unix Latin\-1. And Windows Unicode (\s-1UTF\-16\s0) +files can be converted to Unix Unicode (\s-1UTF\-8\s0) files. +.PP +Binary files are automatically skipped, unless conversion is forced. +.PP +Non-regular files, such as directories and FIFOs, are automatically skipped. +.PP +Symbolic links and their targets are by default kept untouched. Symbolic links +can optionally be replaced, or the output can be written to the symbolic link +target. Writing to a symbolic link target is not supported on Windows. +.PP +Dos2unix was modelled after dos2unix under SunOS/Solaris. There is one +important difference with the original SunOS/Solaris version. This version does +by default in-place conversion (old file mode), while the original +SunOS/Solaris version only supports paired conversion (new file mode). See also +options \f(CW\*(C`\-o\*(C'\fR and \f(CW\*(C`\-n\*(C'\fR. +.SH "OPTIONS" +.IX Header "OPTIONS" +.IP "\fB\-\-\fR" 4 +.IX Item "--" +Treat all following options as file names. Use this option if you want to +convert files whose names start with a dash. For instance to convert +a file named \*(L"\-foo\*(R", you can use this command: +.Sp +.Vb 1 +\& dos2unix \-\- \-foo +.Ve +.Sp +Or in new file mode: +.Sp +.Vb 1 +\& dos2unix \-n \-\- \-foo out.txt +.Ve +.IP "\fB\-ascii\fR" 4 +.IX Item "-ascii" +Convert only line breaks. This is the default conversion mode. +.IP "\fB\-iso\fR" 4 +.IX Item "-iso" +Conversion between \s-1DOS\s0 and \s-1ISO\-8859\-1\s0 character set. See also section +\&\s-1CONVERSION MODES.\s0 +.IP "\fB\-1252\fR" 4 +.IX Item "-1252" +Use Windows code page 1252 (Western European). +.IP "\fB\-437\fR" 4 +.IX Item "-437" +Use \s-1DOS\s0 code page 437 (\s-1US\s0). This is the default code page used for \s-1ISO\s0 conversion. +.IP "\fB\-850\fR" 4 +.IX Item "-850" +Use \s-1DOS\s0 code page 850 (Western European). +.IP "\fB\-860\fR" 4 +.IX Item "-860" +Use \s-1DOS\s0 code page 860 (Portuguese). +.IP "\fB\-863\fR" 4 +.IX Item "-863" +Use \s-1DOS\s0 code page 863 (French Canadian). +.IP "\fB\-865\fR" 4 +.IX Item "-865" +Use \s-1DOS\s0 code page 865 (Nordic). +.IP "\fB\-7\fR" 4 +.IX Item "-7" +Convert 8 bit characters to 7 bit space. +.IP "\fB\-c, \-\-convmode \s-1CONVMODE\s0\fR" 4 +.IX Item "-c, --convmode CONVMODE" +Set conversion mode. Where \s-1CONVMODE\s0 is one of: +\&\fIascii\fR, \fI7bit\fR, \fIiso\fR, \fImac\fR +with ascii being the default. +.IP "\fB\-f, \-\-force\fR" 4 +.IX Item "-f, --force" +Force conversion of binary files. +.IP "\fB\-h, \-\-help\fR" 4 +.IX Item "-h, --help" +Display help and exit. +.IP "\fB\-k, \-\-keepdate\fR" 4 +.IX Item "-k, --keepdate" +Keep the date stamp of output file same as input file. +.IP "\fB\-L, \-\-license\fR" 4 +.IX Item "-L, --license" +Display program's license. +.IP "\fB\-l, \-\-newline\fR" 4 +.IX Item "-l, --newline" +Add additional newline. +.Sp +\&\fBdos2unix\fR: Only \s-1DOS\s0 line breaks are changed to two Unix line breaks. +In Mac mode only Mac line breaks are changed to two Unix +line breaks. +.Sp +\&\fBunix2dos\fR: Only Unix line breaks are changed to two \s-1DOS\s0 line breaks. +In Mac mode Unix line breaks are changed to two Mac line breaks. +.IP "\fB\-m, \-\-add\-bom\fR" 4 +.IX Item "-m, --add-bom" +Write an \s-1UTF\-8\s0 Byte Order Mark in the output file. Never use this option when +the output encoding is other than \s-1UTF\-8.\s0 See also section \s-1UNICODE.\s0 +.IP "\fB\-n, \-\-newfile \s-1INFILE OUTFILE ...\s0\fR" 4 +.IX Item "-n, --newfile INFILE OUTFILE ..." +New file mode. Convert file \s-1INFILE\s0 and write output to file \s-1OUTFILE.\s0 +File names must be given in pairs and wildcard names should \fInot\fR be +used or you \fIwill\fR lose your files. +.Sp +The person who starts the conversion in new file (paired) mode will be the owner +of the converted file. The read/write permissions of the new file will be the +permissions of the original file minus the \fIumask\fR\|(1) of the person who runs the +conversion. +.IP "\fB\-o, \-\-oldfile \s-1FILE ...\s0\fR" 4 +.IX Item "-o, --oldfile FILE ..." +Old file mode. Convert file \s-1FILE\s0 and overwrite output to it. The program +defaults to run in this mode. Wildcard names may be used. +.Sp +In old file (in-place) mode the converted file gets the same owner, group, and +read/write permissions as the original file. Also when the file is converted by +another user who has write permissions on the file (e.g. user root). The +conversion will be aborted when it is not possible to preserve the original +values. Change of owner could mean that the original owner is not able to read +the file any more. Change of group could be a security risk, the file could be +made readable for persons for whom it is not intended. Preservation of owner, +group, and read/write permissions is only supported on Unix. +.IP "\fB\-q, \-\-quiet\fR" 4 +.IX Item "-q, --quiet" +Quiet mode. Suppress all warnings and messages. The return value is zero. +Except when wrong command-line options are used. +.IP "\fB\-s, \-\-safe\fR" 4 +.IX Item "-s, --safe" +Skip binary files (default). +.IP "\fB\-ul, \-\-assume\-utf16le\fR" 4 +.IX Item "-ul, --assume-utf16le" +Assume that the input file format is \s-1UTF\-16LE.\s0 +.Sp +When there is a Byte Order Mark in the input file the \s-1BOM\s0 has priority over +this option. +.Sp +When you made a wrong assumption (the input file was not in \s-1UTF\-16LE\s0 format) and +the conversion succeeded, you will get an \s-1UTF\-8\s0 output file with wrong text. +You can undo the wrong conversion with \fIiconv\fR\|(1) by converting the \s-1UTF\-8\s0 output +file back to \s-1UTF\-16LE.\s0 This will bring back the original file. +.Sp +The assumption of \s-1UTF\-16LE\s0 works as a \fIconversion mode\fR. By switching to the default +\&\fIascii\fR mode the \s-1UTF\-16LE\s0 assumption is turned off. +.IP "\fB\-ub, \-\-assume\-utf16be\fR" 4 +.IX Item "-ub, --assume-utf16be" +Assume that the input file format is \s-1UTF\-16BE.\s0 +.Sp +This option works the same as option \f(CW\*(C`\-ul\*(C'\fR. +.IP "\fB\-F, \-\-follow\-symlink\fR" 4 +.IX Item "-F, --follow-symlink" +Follow symbolic links and convert the targets. +.IP "\fB\-R, \-\-replace\-symlink\fR" 4 +.IX Item "-R, --replace-symlink" +Replace symbolic links with converted files +(original target files remain unchanged). +.IP "\fB\-S, \-\-skip\-symlink\fR" 4 +.IX Item "-S, --skip-symlink" +Keep symbolic links and targets unchanged (default). +.IP "\fB\-V, \-\-version\fR" 4 +.IX Item "-V, --version" +Display version information and exit. +.SH "MAC MODE" +.IX Header "MAC MODE" +In normal mode line breaks are converted from \s-1DOS\s0 to Unix and vice versa. +Mac line breaks are not converted. +.PP +In Mac mode line breaks are converted from Mac to Unix and vice versa. \s-1DOS\s0 +line breaks are not changed. +.PP +To run in Mac mode use the command-line option \f(CW\*(C`\-c mac\*(C'\fR or use the +commands \f(CW\*(C`mac2unix\*(C'\fR or \f(CW\*(C`unix2mac\*(C'\fR. +.SH "CONVERSION MODES" +.IX Header "CONVERSION MODES" +.IP "\fBascii\fR" 4 +.IX Item "ascii" +In mode \f(CW\*(C`ascii\*(C'\fR only line breaks are converted. This is the default conversion +mode. +.Sp +Although the name of this mode is \s-1ASCII,\s0 which is a 7 bit standard, the +actual mode is 8 bit. Use always this mode when converting Unicode \s-1UTF\-8\s0 +files. +.IP "\fB7bit\fR" 4 +.IX Item "7bit" +In this mode all 8 bit non-ASCII characters (with values from 128 to 255) +are converted to a 7 bit space. +.IP "\fBiso\fR" 4 +.IX Item "iso" +Characters are converted between a \s-1DOS\s0 character set (code page) and \s-1ISO\s0 +character set \s-1ISO\-8859\-1 \s0(Latin\-1) on Unix. \s-1DOS\s0 characters without \s-1ISO\-8859\-1\s0 +equivalent, for which conversion is not possible, are converted to a dot. The +same counts for \s-1ISO\-8859\-1\s0 characters without \s-1DOS\s0 counterpart. +.Sp +When only option \f(CW\*(C`\-iso\*(C'\fR is used dos2unix will try to determine the active code +page. When this is not possible dos2unix will use default code page \s-1CP437,\s0 +which is mainly used in the \s-1USA. \s0 To force a specific code page use options +\&\f(CW\*(C`\-437\*(C'\fR (\s-1US\s0), \f(CW\*(C`\-850\*(C'\fR (Western European), \f(CW\*(C`\-860\*(C'\fR (Portuguese), \f(CW\*(C`\-863\*(C'\fR (French +Canadian), or \f(CW\*(C`\-865\*(C'\fR (Nordic). Windows code page \s-1CP1252 \s0(Western European) is +also supported with option \f(CW\*(C`\-1252\*(C'\fR. For other code pages use dos2unix in +combination with \fIiconv\fR\|(1). Iconv can convert between a long list of character +encodings. +.Sp +Never use \s-1ISO\s0 conversion on Unicode text files. It will corrupt \s-1UTF\-8\s0 encoded files. +.Sp +Some examples: +.Sp +Convert from \s-1DOS\s0 default code page to Unix Latin\-1 +.Sp +.Vb 1 +\& dos2unix \-iso \-n in.txt out.txt +.Ve +.Sp +Convert from \s-1DOS CP850\s0 to Unix Latin\-1 +.Sp +.Vb 1 +\& dos2unix \-850 \-n in.txt out.txt +.Ve +.Sp +Convert from Windows \s-1CP1252\s0 to Unix Latin\-1 +.Sp +.Vb 1 +\& dos2unix \-1252 \-n in.txt out.txt +.Ve +.Sp +Convert from Windows \s-1CP1252\s0 to Unix \s-1UTF\-8 \s0(Unicode) +.Sp +.Vb 1 +\& iconv \-f CP1252 \-t UTF\-8 in.txt | dos2unix > out.txt +.Ve +.Sp +Convert from Unix Latin\-1 to \s-1DOS\s0 default code page +.Sp +.Vb 1 +\& unix2dos \-iso \-n in.txt out.txt +.Ve +.Sp +Convert from Unix Latin\-1 to \s-1DOS CP850\s0 +.Sp +.Vb 1 +\& unix2dos \-850 \-n in.txt out.txt +.Ve +.Sp +Convert from Unix Latin\-1 to Windows \s-1CP1252\s0 +.Sp +.Vb 1 +\& unix2dos \-1252 \-n in.txt out.txt +.Ve +.Sp +Convert from Unix \s-1UTF\-8 \s0(Unicode) to Windows \s-1CP1252\s0 +.Sp +.Vb 1 +\& unix2dos < in.txt | iconv \-f UTF\-8 \-t CP1252 > out.txt +.Ve +.Sp +See also <http://czyborra.com/charsets/codepages.html> +and <http://czyborra.com/charsets/iso8859.html>. +.SH "UNICODE" +.IX Header "UNICODE" +.SS "Encodings" +.IX Subsection "Encodings" +There exist different Unicode encodings. On Unix and Linux Unicode files are +typically encoded in \s-1UTF\-8\s0 encoding. On Windows Unicode text files can be +encoded in \s-1UTF\-8, UTF\-16,\s0 or \s-1UTF\-16\s0 big endian, but are mostly encoded in +\&\s-1UTF\-16\s0 format. +.SS "Conversion" +.IX Subsection "Conversion" +Unicode text files can have \s-1DOS,\s0 Unix or Mac line breaks, like regular text +files. +.PP +All versions of dos2unix and unix2dos can convert \s-1UTF\-8\s0 encoded files, because +\&\s-1UTF\-8\s0 was designed for backward compatibility with \s-1ASCII.\s0 +.PP +Dos2unix and unix2dos with Unicode \s-1UTF\-16\s0 support, can read little and big +endian \s-1UTF\-16\s0 encoded text files. To see if dos2unix was built with \s-1UTF\-16\s0 +support type \f(CW\*(C`dos2unix \-V\*(C'\fR. +.PP +The Windows versions of dos2unix and unix2dos convert \s-1UTF\-16\s0 encoded files +always to \s-1UTF\-8\s0 encoded files. Unix versions of dos2unix/unix2dos convert +\&\s-1UTF\-16\s0 encoded files to the locale character encoding when it is set to \s-1UTF\-8.\s0 +Use the \fIlocale\fR\|(1) command to find out what the locale character encoding is. +.PP +Because \s-1UTF\-8\s0 formatted text files are well supported on both Windows and Unix, +dos2unix and unix2dos have no option to write \s-1UTF\-16\s0 files. All \s-1UTF\-16\s0 +characters can be encoded in \s-1UTF\-8.\s0 Conversion from \s-1UTF\-16\s0 to \s-1UTF\-8\s0 is without +loss. \s-1UTF\-16\s0 files will be skipped on Unix when the locale character encoding +is not \s-1UTF\-8,\s0 to prevent accidental loss of text. When an \s-1UTF\-16\s0 to \s-1UTF\-8\s0 +conversion error occurs, for instance when the \s-1UTF\-16\s0 input file contains +an error, the file will be skipped. +.PP +\&\s-1ISO\s0 and 7\-bit mode conversion do not work on \s-1UTF\-16\s0 files. +.SS "Byte Order Mark" +.IX Subsection "Byte Order Mark" +On Windows Unicode text files typically have a Byte Order Mark (\s-1BOM\s0), because +many Windows programs (including Notepad) add BOMs by default. See also +<http://en.wikipedia.org/wiki/Byte_order_mark>. +.PP +On Unix Unicode files typically don't have a \s-1BOM.\s0 It is assumed that text files +are encoded in the locale character encoding. +.PP +Dos2unix can only detect if a file is in \s-1UTF\-16\s0 format if the file has a \s-1BOM.\s0 +When an \s-1UTF\-16\s0 file doesn't have a \s-1BOM,\s0 dos2unix will see the file as a binary +file. +.PP +Use option \f(CW\*(C`\-ul\*(C'\fR or \f(CW\*(C`\-ub\*(C'\fR to convert an \s-1UTF\-16\s0 file without \s-1BOM.\s0 +.PP +Dos2unix never writes a \s-1BOM\s0 in the output file, unless you use option \f(CW\*(C`\-m\*(C'\fR. +.PP +Unix2dos writes a \s-1BOM\s0 in the output file when the input file has a \s-1BOM,\s0 or +when option \f(CW\*(C`\-m\*(C'\fR is used. +.SS "Unicode examples" +.IX Subsection "Unicode examples" +Convert from Windows \s-1UTF\-16 \s0(with \s-1BOM\s0) to Unix \s-1UTF\-8\s0 +.PP +.Vb 1 +\& dos2unix \-n in.txt out.txt +.Ve +.PP +Convert from Windows \s-1UTF\-16LE \s0(without \s-1BOM\s0) to Unix \s-1UTF\-8\s0 +.PP +.Vb 1 +\& dos2unix \-ul \-n in.txt out.txt +.Ve +.PP +Convert from Unix \s-1UTF\-8\s0 to Windows \s-1UTF\-8\s0 with \s-1BOM\s0 +.PP +.Vb 1 +\& unix2dos \-m \-n in.txt out.txt +.Ve +.PP +Convert from Unix \s-1UTF\-8\s0 to Windows \s-1UTF\-16\s0 +.PP +.Vb 1 +\& unix2dos < in.txt | iconv \-f UTF\-8 \-t UTF\-16 > out.txt +.Ve +.SH "EXAMPLES" +.IX Header "EXAMPLES" +Read input from 'stdin' and write output to 'stdout'. +.PP +.Vb 2 +\& dos2unix +\& dos2unix \-l \-c mac +.Ve +.PP +Convert and replace a.txt. Convert and replace b.txt. +.PP +.Vb 2 +\& dos2unix a.txt b.txt +\& dos2unix \-o a.txt b.txt +.Ve +.PP +Convert and replace a.txt in ascii conversion mode. +.PP +.Vb 1 +\& dos2unix a.txt +.Ve +.PP +Convert and replace a.txt in ascii conversion mode. +Convert and replace b.txt in 7bit conversion mode. +.PP +.Vb 3 +\& dos2unix a.txt \-c 7bit b.txt +\& dos2unix \-c ascii a.txt \-c 7bit b.txt +\& dos2unix \-ascii a.txt \-7 b.txt +.Ve +.PP +Convert a.txt from Mac to Unix format. +.PP +.Vb 2 +\& dos2unix \-c mac a.txt +\& mac2unix a.txt +.Ve +.PP +Convert a.txt from Unix to Mac format. +.PP +.Vb 2 +\& unix2dos \-c mac a.txt +\& unix2mac a.txt +.Ve +.PP +Convert and replace a.txt while keeping original date stamp. +.PP +.Vb 2 +\& dos2unix \-k a.txt +\& dos2unix \-k \-o a.txt +.Ve +.PP +Convert a.txt and write to e.txt. +.PP +.Vb 1 +\& dos2unix \-n a.txt e.txt +.Ve +.PP +Convert a.txt and write to e.txt, keep date stamp of e.txt same as a.txt. +.PP +.Vb 1 +\& dos2unix \-k \-n a.txt e.txt +.Ve +.PP +Convert and replace a.txt. Convert b.txt and write to e.txt. +.PP +.Vb 2 +\& dos2unix a.txt \-n b.txt e.txt +\& dos2unix \-o a.txt \-n b.txt e.txt +.Ve +.PP +Convert c.txt and write to e.txt. Convert and replace a.txt. +Convert and replace b.txt. Convert d.txt and write to f.txt. +.PP +.Vb 1 +\& dos2unix \-n c.txt e.txt \-o a.txt b.txt \-n d.txt f.txt +.Ve +.SH "RECURSIVE CONVERSION" +.IX Header "RECURSIVE CONVERSION" +Use dos2unix in combination with the \fIfind\fR\|(1) and \fIxargs\fR\|(1) commands to +recursively convert text files in a directory tree structure. For instance to +convert all .txt files in the directory tree under the current directory type: +.PP +.Vb 1 +\& find . \-name *.txt |xargs dos2unix +.Ve +.SH "LOCALIZATION" +.IX Header "LOCALIZATION" +.IP "\fB\s-1LANG\s0\fR" 4 +.IX Item "LANG" +The primary language is selected with the environment variable \s-1LANG.\s0 The \s-1LANG\s0 +variable consists out of several parts. The first part is in small letters the +language code. The second is optional and is the country code in capital +letters, preceded with an underscore. There is also an optional third part: +character encoding, preceded with a dot. A few examples for \s-1POSIX\s0 standard type +shells: +.Sp +.Vb 7 +\& export LANG=nl Dutch +\& export LANG=nl_NL Dutch, The Netherlands +\& export LANG=nl_BE Dutch, Belgium +\& export LANG=es_ES Spanish, Spain +\& export LANG=es_MX Spanish, Mexico +\& export LANG=en_US.iso88591 English, USA, Latin\-1 encoding +\& export LANG=en_GB.UTF\-8 English, UK, UTF\-8 encoding +.Ve +.Sp +For a complete list of language and country codes see the gettext manual: +<http://www.gnu.org/software/gettext/manual/gettext.html#Language\-Codes> +.Sp +On Unix systems you can use to command \fIlocale\fR\|(1) to get locale specific +information. +.IP "\fB\s-1LANGUAGE\s0\fR" 4 +.IX Item "LANGUAGE" +With the \s-1LANGUAGE\s0 environment variable you can specify a priority list of +languages, separated by colons. Dos2unix gives preference to \s-1LANGUAGE\s0 over \s-1LANG.\s0 +For instance, first Dutch and then German: \f(CW\*(C`LANGUAGE=nl:de\*(C'\fR. You have to first +enable localization, by setting \s-1LANG \s0(or \s-1LC_ALL\s0) to a value other than +\&\*(L"C\*(R", before you can use a language priority list through the \s-1LANGUAGE\s0 +variable. See also the gettext manual: +<http://www.gnu.org/software/gettext/manual/gettext.html#The\-LANGUAGE\-variable> +.Sp +If you select a language which is not available you will get the +standard English messages. +.IP "\fB\s-1DOS2UNIX_LOCALEDIR\s0\fR" 4 +.IX Item "DOS2UNIX_LOCALEDIR" +With the environment variable \s-1DOS2UNIX_LOCALEDIR\s0 the \s-1LOCALEDIR\s0 set +during compilation can be overruled. \s-1LOCALEDIR\s0 is used to find the +language files. The \s-1GNU\s0 default value is \f(CW\*(C`/usr/local/share/locale\*(C'\fR. +Option \fB\-\-version\fR will display the \s-1LOCALEDIR\s0 that is used. +.Sp +Example (\s-1POSIX\s0 shell): +.Sp +.Vb 1 +\& export DOS2UNIX_LOCALEDIR=$HOME/share/locale +.Ve +.SH "RETURN VALUE" +.IX Header "RETURN VALUE" +On success, zero is returned. When a system error occurs the last system error will be +returned. For other errors 1 is returned. +.PP +The return value is always zero in quiet mode, except when wrong command-line options +are used. +.SH "STANDARDS" +.IX Header "STANDARDS" +<http://en.wikipedia.org/wiki/Text_file> +.PP +<http://en.wikipedia.org/wiki/Carriage_return> +.PP +<http://en.wikipedia.org/wiki/Newline> +.PP +<http://en.wikipedia.org/wiki/Unicode> +.SH "AUTHORS" +.IX Header "AUTHORS" +Benjamin Lin \- <blin@socs.uts.edu.au> +Bernd Johannes Wuebben (mac2unix mode) \- <wuebben@kde.org>, +Christian Wurll (add extra newline) \- <wurll@ira.uka.de>, +Erwin Waterlander \- <waterlan@xs4all.nl> (Maintainer) +.PP +Project page: <http://waterlan.home.xs4all.nl/dos2unix.html> +.PP +SourceForge page: <http://sourceforge.net/projects/dos2unix/> +.PP +Freecode: <http://freecode.com/projects/dos2unix> +.SH "SEE ALSO" +.IX Header "SEE ALSO" +\&\fIfile\fR\|(1) +\&\fIfind\fR\|(1) +\&\fIiconv\fR\|(1) +\&\fIlocale\fR\|(1) +\&\fIxargs\fR\|(1) diff --git a/man/man1/dos2unix.htm b/man/man1/dos2unix.htm new file mode 100644 index 0000000..33da9bf --- /dev/null +++ b/man/man1/dos2unix.htm @@ -0,0 +1,516 @@ +<?xml version="1.0" ?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml"> +<head> +<title>dos2unix 6.0.5 - DOS/MAC to UNIX and vice versa text file format converter</title> +<meta http-equiv="content-type" content="text/html; charset=utf-8" /> +<link rev="made" href="mailto:root@localhost" /> +</head> + +<body style="background-color: white"> + + + +<ul id="index"> + <li><a href="#NAME">NAME</a></li> + <li><a href="#SYNOPSIS">SYNOPSIS</a></li> + <li><a href="#DESCRIPTION">DESCRIPTION</a></li> + <li><a href="#OPTIONS">OPTIONS</a></li> + <li><a href="#MAC-MODE">MAC MODE</a></li> + <li><a href="#CONVERSION-MODES">CONVERSION MODES</a></li> + <li><a href="#UNICODE">UNICODE</a> + <ul> + <li><a href="#Encodings">Encodings</a></li> + <li><a href="#Conversion">Conversion</a></li> + <li><a href="#Byte-Order-Mark">Byte Order Mark</a></li> + <li><a href="#Unicode-examples">Unicode examples</a></li> + </ul> + </li> + <li><a href="#EXAMPLES">EXAMPLES</a></li> + <li><a href="#RECURSIVE-CONVERSION">RECURSIVE CONVERSION</a></li> + <li><a href="#LOCALIZATION">LOCALIZATION</a></li> + <li><a href="#RETURN-VALUE">RETURN VALUE</a></li> + <li><a href="#STANDARDS">STANDARDS</a></li> + <li><a href="#AUTHORS">AUTHORS</a></li> + <li><a href="#SEE-ALSO">SEE ALSO</a></li> +</ul> + +<h1 id="NAME">NAME</h1> + +<p>dos2unix - DOS/Mac to Unix and vice versa text file format converter</p> + +<h1 id="SYNOPSIS">SYNOPSIS</h1> + +<pre><code> dos2unix [options] [FILE ...] [-n INFILE OUTFILE ...] + unix2dos [options] [FILE ...] [-n INFILE OUTFILE ...]</code></pre> + +<h1 id="DESCRIPTION">DESCRIPTION</h1> + +<p>The Dos2unix package includes utilities <code>dos2unix</code> and <code>unix2dos</code> to convert plain text files in DOS or Mac format to Unix format and vice versa.</p> + +<p>In DOS/Windows text files a line break, also known as newline, is a combination of two characters: a Carriage Return (CR) followed by a Line Feed (LF). In Unix text files a line break is a single character: the Line Feed (LF). In Mac text files, prior to Mac OS X, a line break was single Carriage Return (CR) character. Nowadays Mac OS uses Unix style (LF) line breaks.</p> + +<p>Besides line breaks Dos2unix can also convert the encoding of files. A few DOS code pages can be converted to Unix Latin-1. And Windows Unicode (UTF-16) files can be converted to Unix Unicode (UTF-8) files.</p> + +<p>Binary files are automatically skipped, unless conversion is forced.</p> + +<p>Non-regular files, such as directories and FIFOs, are automatically skipped.</p> + +<p>Symbolic links and their targets are by default kept untouched. Symbolic links can optionally be replaced, or the output can be written to the symbolic link target. Writing to a symbolic link target is not supported on Windows.</p> + +<p>Dos2unix was modelled after dos2unix under SunOS/Solaris. There is one important difference with the original SunOS/Solaris version. This version does by default in-place conversion (old file mode), while the original SunOS/Solaris version only supports paired conversion (new file mode). See also options <code>-o</code> and <code>-n</code>.</p> + +<h1 id="OPTIONS">OPTIONS</h1> + +<dl> + +<dt id="pod--"><b>--</b></dt> +<dd> + +<p>Treat all following options as file names. Use this option if you want to convert files whose names start with a dash. For instance to convert a file named "-foo", you can use this command:</p> + +<pre><code> dos2unix -- -foo</code></pre> + +<p>Or in new file mode:</p> + +<pre><code> dos2unix -n -- -foo out.txt</code></pre> + +</dd> +<dt id="ascii"><b>-ascii</b></dt> +<dd> + +<p>Convert only line breaks. This is the default conversion mode.</p> + +</dd> +<dt id="iso"><b>-iso</b></dt> +<dd> + +<p>Conversion between DOS and ISO-8859-1 character set. See also section CONVERSION MODES.</p> + +</dd> +<dt id="pod-1252"><b>-1252</b></dt> +<dd> + +<p>Use Windows code page 1252 (Western European).</p> + +</dd> +<dt id="pod-437"><b>-437</b></dt> +<dd> + +<p>Use DOS code page 437 (US). This is the default code page used for ISO conversion.</p> + +</dd> +<dt id="pod-850"><b>-850</b></dt> +<dd> + +<p>Use DOS code page 850 (Western European).</p> + +</dd> +<dt id="pod-860"><b>-860</b></dt> +<dd> + +<p>Use DOS code page 860 (Portuguese).</p> + +</dd> +<dt id="pod-863"><b>-863</b></dt> +<dd> + +<p>Use DOS code page 863 (French Canadian).</p> + +</dd> +<dt id="pod-865"><b>-865</b></dt> +<dd> + +<p>Use DOS code page 865 (Nordic).</p> + +</dd> +<dt id="pod-7"><b>-7</b></dt> +<dd> + +<p>Convert 8 bit characters to 7 bit space.</p> + +</dd> +<dt id="c---convmode-CONVMODE"><b>-c, --convmode CONVMODE</b></dt> +<dd> + +<p>Set conversion mode. Where CONVMODE is one of: <i>ascii</i>, <i>7bit</i>, <i>iso</i>, <i>mac</i> with ascii being the default.</p> + +</dd> +<dt id="f---force"><b>-f, --force</b></dt> +<dd> + +<p>Force conversion of binary files.</p> + +</dd> +<dt id="h---help"><b>-h, --help</b></dt> +<dd> + +<p>Display help and exit.</p> + +</dd> +<dt id="k---keepdate"><b>-k, --keepdate</b></dt> +<dd> + +<p>Keep the date stamp of output file same as input file.</p> + +</dd> +<dt id="L---license"><b>-L, --license</b></dt> +<dd> + +<p>Display program's license.</p> + +</dd> +<dt id="l---newline"><b>-l, --newline</b></dt> +<dd> + +<p>Add additional newline.</p> + +<p><b>dos2unix</b>: Only DOS line breaks are changed to two Unix line breaks. In Mac mode only Mac line breaks are changed to two Unix line breaks.</p> + +<p><b>unix2dos</b>: Only Unix line breaks are changed to two DOS line breaks. In Mac mode Unix line breaks are changed to two Mac line breaks.</p> + +</dd> +<dt id="m---add-bom"><b>-m, --add-bom</b></dt> +<dd> + +<p>Write an UTF-8 Byte Order Mark in the output file. Never use this option when the output encoding is other than UTF-8. See also section UNICODE.</p> + +</dd> +<dt id="n---newfile-INFILE-OUTFILE-..."><b>-n, --newfile INFILE OUTFILE ...</b></dt> +<dd> + +<p>New file mode. Convert file INFILE and write output to file OUTFILE. File names must be given in pairs and wildcard names should <i>not</i> be used or you <i>will</i> lose your files.</p> + +<p>The person who starts the conversion in new file (paired) mode will be the owner of the converted file. The read/write permissions of the new file will be the permissions of the original file minus the umask(1) of the person who runs the conversion.</p> + +</dd> +<dt id="o---oldfile-FILE-..."><b>-o, --oldfile FILE ...</b></dt> +<dd> + +<p>Old file mode. Convert file FILE and overwrite output to it. The program defaults to run in this mode. Wildcard names may be used.</p> + +<p>In old file (in-place) mode the converted file gets the same owner, group, and read/write permissions as the original file. Also when the file is converted by another user who has write permissions on the file (e.g. user root). The conversion will be aborted when it is not possible to preserve the original values. Change of owner could mean that the original owner is not able to read the file any more. Change of group could be a security risk, the file could be made readable for persons for whom it is not intended. Preservation of owner, group, and read/write permissions is only supported on Unix.</p> + +</dd> +<dt id="q---quiet"><b>-q, --quiet</b></dt> +<dd> + +<p>Quiet mode. Suppress all warnings and messages. The return value is zero. Except when wrong command-line options are used.</p> + +</dd> +<dt id="s---safe"><b>-s, --safe</b></dt> +<dd> + +<p>Skip binary files (default).</p> + +</dd> +<dt id="ul---assume-utf16le"><b>-ul, --assume-utf16le</b></dt> +<dd> + +<p>Assume that the input file format is UTF-16LE.</p> + +<p>When there is a Byte Order Mark in the input file the BOM has priority over this option.</p> + +<p>When you made a wrong assumption (the input file was not in UTF-16LE format) and the conversion succeeded, you will get an UTF-8 output file with wrong text. You can undo the wrong conversion with iconv(1) by converting the UTF-8 output file back to UTF-16LE. This will bring back the original file.</p> + +<p>The assumption of UTF-16LE works as a <i>conversion mode</i>. By switching to the default <i>ascii</i> mode the UTF-16LE assumption is turned off.</p> + +</dd> +<dt id="ub---assume-utf16be"><b>-ub, --assume-utf16be</b></dt> +<dd> + +<p>Assume that the input file format is UTF-16BE.</p> + +<p>This option works the same as option <code>-ul</code>.</p> + +</dd> +<dt id="F---follow-symlink"><b>-F, --follow-symlink</b></dt> +<dd> + +<p>Follow symbolic links and convert the targets.</p> + +</dd> +<dt id="R---replace-symlink"><b>-R, --replace-symlink</b></dt> +<dd> + +<p>Replace symbolic links with converted files (original target files remain unchanged).</p> + +</dd> +<dt id="S---skip-symlink"><b>-S, --skip-symlink</b></dt> +<dd> + +<p>Keep symbolic links and targets unchanged (default).</p> + +</dd> +<dt id="V---version"><b>-V, --version</b></dt> +<dd> + +<p>Display version information and exit.</p> + +</dd> +</dl> + +<h1 id="MAC-MODE">MAC MODE</h1> + +<p>In normal mode line breaks are converted from DOS to Unix and vice versa. Mac line breaks are not converted.</p> + +<p>In Mac mode line breaks are converted from Mac to Unix and vice versa. DOS line breaks are not changed.</p> + +<p>To run in Mac mode use the command-line option <code>-c mac</code> or use the commands <code>mac2unix</code> or <code>unix2mac</code>.</p> + +<h1 id="CONVERSION-MODES">CONVERSION MODES</h1> + +<dl> + +<dt id="ascii1"><b>ascii</b></dt> +<dd> + +<p>In mode <code>ascii</code> only line breaks are converted. This is the default conversion mode.</p> + +<p>Although the name of this mode is ASCII, which is a 7 bit standard, the actual mode is 8 bit. Use always this mode when converting Unicode UTF-8 files.</p> + +</dd> +<dt id="bit"><b>7bit</b></dt> +<dd> + +<p>In this mode all 8 bit non-ASCII characters (with values from 128 to 255) are converted to a 7 bit space.</p> + +</dd> +<dt id="iso1"><b>iso</b></dt> +<dd> + +<p>Characters are converted between a DOS character set (code page) and ISO character set ISO-8859-1 (Latin-1) on Unix. DOS characters without ISO-8859-1 equivalent, for which conversion is not possible, are converted to a dot. The same counts for ISO-8859-1 characters without DOS counterpart.</p> + +<p>When only option <code>-iso</code> is used dos2unix will try to determine the active code page. When this is not possible dos2unix will use default code page CP437, which is mainly used in the USA. To force a specific code page use options <code>-437</code> (US), <code>-850</code> (Western European), <code>-860</code> (Portuguese), <code>-863</code> (French Canadian), or <code>-865</code> (Nordic). Windows code page CP1252 (Western European) is also supported with option <code>-1252</code>. For other code pages use dos2unix in combination with iconv(1). Iconv can convert between a long list of character encodings.</p> + +<p>Never use ISO conversion on Unicode text files. It will corrupt UTF-8 encoded files.</p> + +<p>Some examples:</p> + +<p>Convert from DOS default code page to Unix Latin-1</p> + +<pre><code> dos2unix -iso -n in.txt out.txt</code></pre> + +<p>Convert from DOS CP850 to Unix Latin-1</p> + +<pre><code> dos2unix -850 -n in.txt out.txt</code></pre> + +<p>Convert from Windows CP1252 to Unix Latin-1</p> + +<pre><code> dos2unix -1252 -n in.txt out.txt</code></pre> + +<p>Convert from Windows CP1252 to Unix UTF-8 (Unicode)</p> + +<pre><code> iconv -f CP1252 -t UTF-8 in.txt | dos2unix > out.txt</code></pre> + +<p>Convert from Unix Latin-1 to DOS default code page</p> + +<pre><code> unix2dos -iso -n in.txt out.txt</code></pre> + +<p>Convert from Unix Latin-1 to DOS CP850</p> + +<pre><code> unix2dos -850 -n in.txt out.txt</code></pre> + +<p>Convert from Unix Latin-1 to Windows CP1252</p> + +<pre><code> unix2dos -1252 -n in.txt out.txt</code></pre> + +<p>Convert from Unix UTF-8 (Unicode) to Windows CP1252</p> + +<pre><code> unix2dos < in.txt | iconv -f UTF-8 -t CP1252 > out.txt</code></pre> + +<p>See also <a href="http://czyborra.com/charsets/codepages.html">http://czyborra.com/charsets/codepages.html</a> and <a href="http://czyborra.com/charsets/iso8859.html">http://czyborra.com/charsets/iso8859.html</a>.</p> + +</dd> +</dl> + +<h1 id="UNICODE">UNICODE</h1> + +<h2 id="Encodings">Encodings</h2> + +<p>There exist different Unicode encodings. On Unix and Linux Unicode files are typically encoded in UTF-8 encoding. On Windows Unicode text files can be encoded in UTF-8, UTF-16, or UTF-16 big endian, but are mostly encoded in UTF-16 format.</p> + +<h2 id="Conversion">Conversion</h2> + +<p>Unicode text files can have DOS, Unix or Mac line breaks, like regular text files.</p> + +<p>All versions of dos2unix and unix2dos can convert UTF-8 encoded files, because UTF-8 was designed for backward compatibility with ASCII.</p> + +<p>Dos2unix and unix2dos with Unicode UTF-16 support, can read little and big endian UTF-16 encoded text files. To see if dos2unix was built with UTF-16 support type <code>dos2unix -V</code>.</p> + +<p>The Windows versions of dos2unix and unix2dos convert UTF-16 encoded files always to UTF-8 encoded files. Unix versions of dos2unix/unix2dos convert UTF-16 encoded files to the locale character encoding when it is set to UTF-8. Use the locale(1) command to find out what the locale character encoding is.</p> + +<p>Because UTF-8 formatted text files are well supported on both Windows and Unix, dos2unix and unix2dos have no option to write UTF-16 files. All UTF-16 characters can be encoded in UTF-8. Conversion from UTF-16 to UTF-8 is without loss. UTF-16 files will be skipped on Unix when the locale character encoding is not UTF-8, to prevent accidental loss of text. When an UTF-16 to UTF-8 conversion error occurs, for instance when the UTF-16 input file contains an error, the file will be skipped.</p> + +<p>ISO and 7-bit mode conversion do not work on UTF-16 files.</p> + +<h2 id="Byte-Order-Mark">Byte Order Mark</h2> + +<p>On Windows Unicode text files typically have a Byte Order Mark (BOM), because many Windows programs (including Notepad) add BOMs by default. See also <a href="http://en.wikipedia.org/wiki/Byte_order_mark">http://en.wikipedia.org/wiki/Byte_order_mark</a>.</p> + +<p>On Unix Unicode files typically don't have a BOM. It is assumed that text files are encoded in the locale character encoding.</p> + +<p>Dos2unix can only detect if a file is in UTF-16 format if the file has a BOM. When an UTF-16 file doesn't have a BOM, dos2unix will see the file as a binary file.</p> + +<p>Use option <code>-ul</code> or <code>-ub</code> to convert an UTF-16 file without BOM.</p> + +<p>Dos2unix never writes a BOM in the output file, unless you use option <code>-m</code>.</p> + +<p>Unix2dos writes a BOM in the output file when the input file has a BOM, or when option <code>-m</code> is used.</p> + +<h2 id="Unicode-examples">Unicode examples</h2> + +<p>Convert from Windows UTF-16 (with BOM) to Unix UTF-8</p> + +<pre><code> dos2unix -n in.txt out.txt</code></pre> + +<p>Convert from Windows UTF-16LE (without BOM) to Unix UTF-8</p> + +<pre><code> dos2unix -ul -n in.txt out.txt</code></pre> + +<p>Convert from Unix UTF-8 to Windows UTF-8 with BOM</p> + +<pre><code> unix2dos -m -n in.txt out.txt</code></pre> + +<p>Convert from Unix UTF-8 to Windows UTF-16</p> + +<pre><code> unix2dos < in.txt | iconv -f UTF-8 -t UTF-16 > out.txt</code></pre> + +<h1 id="EXAMPLES">EXAMPLES</h1> + +<p>Read input from 'stdin' and write output to 'stdout'.</p> + +<pre><code> dos2unix + dos2unix -l -c mac</code></pre> + +<p>Convert and replace a.txt. Convert and replace b.txt.</p> + +<pre><code> dos2unix a.txt b.txt + dos2unix -o a.txt b.txt</code></pre> + +<p>Convert and replace a.txt in ascii conversion mode.</p> + +<pre><code> dos2unix a.txt</code></pre> + +<p>Convert and replace a.txt in ascii conversion mode. Convert and replace b.txt in 7bit conversion mode.</p> + +<pre><code> dos2unix a.txt -c 7bit b.txt + dos2unix -c ascii a.txt -c 7bit b.txt + dos2unix -ascii a.txt -7 b.txt</code></pre> + +<p>Convert a.txt from Mac to Unix format.</p> + +<pre><code> dos2unix -c mac a.txt + mac2unix a.txt</code></pre> + +<p>Convert a.txt from Unix to Mac format.</p> + +<pre><code> unix2dos -c mac a.txt + unix2mac a.txt</code></pre> + +<p>Convert and replace a.txt while keeping original date stamp.</p> + +<pre><code> dos2unix -k a.txt + dos2unix -k -o a.txt</code></pre> + +<p>Convert a.txt and write to e.txt.</p> + +<pre><code> dos2unix -n a.txt e.txt</code></pre> + +<p>Convert a.txt and write to e.txt, keep date stamp of e.txt same as a.txt.</p> + +<pre><code> dos2unix -k -n a.txt e.txt</code></pre> + +<p>Convert and replace a.txt. Convert b.txt and write to e.txt.</p> + +<pre><code> dos2unix a.txt -n b.txt e.txt + dos2unix -o a.txt -n b.txt e.txt</code></pre> + +<p>Convert c.txt and write to e.txt. Convert and replace a.txt. Convert and replace b.txt. Convert d.txt and write to f.txt.</p> + +<pre><code> dos2unix -n c.txt e.txt -o a.txt b.txt -n d.txt f.txt</code></pre> + +<h1 id="RECURSIVE-CONVERSION">RECURSIVE CONVERSION</h1> + +<p>Use dos2unix in combination with the find(1) and xargs(1) commands to recursively convert text files in a directory tree structure. For instance to convert all .txt files in the directory tree under the current directory type:</p> + +<pre><code> find . -name *.txt |xargs dos2unix</code></pre> + +<h1 id="LOCALIZATION">LOCALIZATION</h1> + +<dl> + +<dt id="LANG"><b>LANG</b></dt> +<dd> + +<p>The primary language is selected with the environment variable LANG. The LANG variable consists out of several parts. The first part is in small letters the language code. The second is optional and is the country code in capital letters, preceded with an underscore. There is also an optional third part: character encoding, preceded with a dot. A few examples for POSIX standard type shells:</p> + +<pre><code> export LANG=nl Dutch + export LANG=nl_NL Dutch, The Netherlands + export LANG=nl_BE Dutch, Belgium + export LANG=es_ES Spanish, Spain + export LANG=es_MX Spanish, Mexico + export LANG=en_US.iso88591 English, USA, Latin-1 encoding + export LANG=en_GB.UTF-8 English, UK, UTF-8 encoding</code></pre> + +<p>For a complete list of language and country codes see the gettext manual: <a href="http://www.gnu.org/software/gettext/manual/gettext.html#Language-Codes">http://www.gnu.org/software/gettext/manual/gettext.html#Language-Codes</a></p> + +<p>On Unix systems you can use to command locale(1) to get locale specific information.</p> + +</dd> +<dt id="LANGUAGE"><b>LANGUAGE</b></dt> +<dd> + +<p>With the LANGUAGE environment variable you can specify a priority list of languages, separated by colons. Dos2unix gives preference to LANGUAGE over LANG. For instance, first Dutch and then German: <code>LANGUAGE=nl:de</code>. You have to first enable localization, by setting LANG (or LC_ALL) to a value other than "C", before you can use a language priority list through the LANGUAGE variable. See also the gettext manual: <a href="http://www.gnu.org/software/gettext/manual/gettext.html#The-LANGUAGE-variable">http://www.gnu.org/software/gettext/manual/gettext.html#The-LANGUAGE-variable</a></p> + +<p>If you select a language which is not available you will get the standard English messages.</p> + +</dd> +<dt id="DOS2UNIX_LOCALEDIR"><b>DOS2UNIX_LOCALEDIR</b></dt> +<dd> + +<p>With the environment variable DOS2UNIX_LOCALEDIR the LOCALEDIR set during compilation can be overruled. LOCALEDIR is used to find the language files. The GNU default value is <code>/usr/local/share/locale</code>. Option <b>--version</b> will display the LOCALEDIR that is used.</p> + +<p>Example (POSIX shell):</p> + +<pre><code> export DOS2UNIX_LOCALEDIR=$HOME/share/locale</code></pre> + +</dd> +</dl> + +<h1 id="RETURN-VALUE">RETURN VALUE</h1> + +<p>On success, zero is returned. When a system error occurs the last system error will be returned. For other errors 1 is returned.</p> + +<p>The return value is always zero in quiet mode, except when wrong command-line options are used.</p> + +<h1 id="STANDARDS">STANDARDS</h1> + +<p><a href="http://en.wikipedia.org/wiki/Text_file">http://en.wikipedia.org/wiki/Text_file</a></p> + +<p><a href="http://en.wikipedia.org/wiki/Carriage_return">http://en.wikipedia.org/wiki/Carriage_return</a></p> + +<p><a href="http://en.wikipedia.org/wiki/Newline">http://en.wikipedia.org/wiki/Newline</a></p> + +<p><a href="http://en.wikipedia.org/wiki/Unicode">http://en.wikipedia.org/wiki/Unicode</a></p> + +<h1 id="AUTHORS">AUTHORS</h1> + +<p>Benjamin Lin - <blin@socs.uts.edu.au> Bernd Johannes Wuebben (mac2unix mode) - <wuebben@kde.org>, Christian Wurll (add extra newline) - <wurll@ira.uka.de>, Erwin Waterlander - <waterlan@xs4all.nl> (Maintainer)</p> + +<p>Project page: <a href="http://waterlan.home.xs4all.nl/dos2unix.html">http://waterlan.home.xs4all.nl/dos2unix.html</a></p> + +<p>SourceForge page: <a href="http://sourceforge.net/projects/dos2unix/">http://sourceforge.net/projects/dos2unix/</a></p> + +<p>Freecode: <a href="http://freecode.com/projects/dos2unix">http://freecode.com/projects/dos2unix</a></p> + +<h1 id="SEE-ALSO">SEE ALSO</h1> + +<p>file(1) find(1) iconv(1) locale(1) xargs(1)</p> + + +</body> + +</html> + + diff --git a/man/man1/dos2unix.pod b/man/man1/dos2unix.pod index 4bcef37..32a0d29 100644 --- a/man/man1/dos2unix.pod +++ b/man/man1/dos2unix.pod @@ -47,6 +47,8 @@ =pod +=encoding UTF-8 + =head1 NAME dos2unix - DOS/Mac to Unix and vice versa text file format converter @@ -75,14 +77,15 @@ Binary files are automatically skipped, unless conversion is forced. Non-regular files, such as directories and FIFOs, are automatically skipped. -Symbolic links and their targets are by default kept untouched. -Symbolic links can optionally be replaced, or the output can be written -to the symbolic link target. -Symbolic links on Windows are not supported. Windows symbolic links -always replaced, keeping the targets unchanged. +Symbolic links and their targets are by default kept untouched. Symbolic links +can optionally be replaced, or the output can be written to the symbolic link +target. Writing to a symbolic link target is not supported on Windows. -Dos2unix was modelled after dos2unix under SunOS/Solaris and has similar -conversion modes. +Dos2unix was modelled after dos2unix under SunOS/Solaris. There is one +important difference with the original SunOS/Solaris version. This version does +by default in-place conversion (old file mode), while the original +SunOS/Solaris version only supports paired conversion (new file mode). See also +options C<-o> and C<-n>. =head1 OPTIONS @@ -262,15 +265,12 @@ commands C<mac2unix> or C<unix2mac>. =head1 CONVERSION MODES -Conversion modes I<ascii>, I<7bit>, and I<iso> -are similar to those of dos2unix/unix2dos under SunOS/Solaris. - =over 4 =item B<ascii> -In mode C<ascii> only line breaks are converted. This is the default -conversion mode. +In mode C<ascii> only line breaks are converted. This is the default conversion +mode. Although the name of this mode is ASCII, which is a 7 bit standard, the actual mode is 8 bit. Use always this mode when converting Unicode UTF-8 @@ -297,7 +297,7 @@ also supported with option C<-1252>. For other code pages use dos2unix in combination with iconv(1). Iconv can convert between a long list of character encodings. -Never use ISO converion on Unicode text files. It will corrupt UTF-8 encoded files. +Never use ISO conversion on Unicode text files. It will corrupt UTF-8 encoded files. Some examples: @@ -317,7 +317,7 @@ Convert from Windows CP1252 to Unix UTF-8 (Unicode) iconv -f CP1252 -t UTF-8 in.txt | dos2unix > out.txt -Convert from Unix Latin-1 to DOS default code page. +Convert from Unix Latin-1 to DOS default code page unix2dos -iso -n in.txt out.txt @@ -353,7 +353,7 @@ Unicode text files can have DOS, Unix or Mac line breaks, like regular text files. All versions of dos2unix and unix2dos can convert UTF-8 encoded files, because -UTF-8 was designed for backward compatiblity with ASCII. +UTF-8 was designed for backward compatibility with ASCII. Dos2unix and unix2dos with Unicode UTF-16 support, can read little and big endian UTF-16 encoded text files. To see if dos2unix was built with UTF-16 diff --git a/man/man1/dos2unix.txt b/man/man1/dos2unix.txt new file mode 100644 index 0000000..3301138 --- /dev/null +++ b/man/man1/dos2unix.txt @@ -0,0 +1,468 @@ +NAME + dos2unix - DOS/Mac to Unix and vice versa text file format converter + +SYNOPSIS + dos2unix [options] [FILE ...] [-n INFILE OUTFILE ...] + unix2dos [options] [FILE ...] [-n INFILE OUTFILE ...] + +DESCRIPTION + The Dos2unix package includes utilities "dos2unix" and "unix2dos" to + convert plain text files in DOS or Mac format to Unix format and vice + versa. + + In DOS/Windows text files a line break, also known as newline, is a + combination of two characters: a Carriage Return (CR) followed by a Line + Feed (LF). In Unix text files a line break is a single character: the + Line Feed (LF). In Mac text files, prior to Mac OS X, a line break was + single Carriage Return (CR) character. Nowadays Mac OS uses Unix style + (LF) line breaks. + + Besides line breaks Dos2unix can also convert the encoding of files. A + few DOS code pages can be converted to Unix Latin-1. And Windows Unicode + (UTF-16) files can be converted to Unix Unicode (UTF-8) files. + + Binary files are automatically skipped, unless conversion is forced. + + Non-regular files, such as directories and FIFOs, are automatically + skipped. + + Symbolic links and their targets are by default kept untouched. Symbolic + links can optionally be replaced, or the output can be written to the + symbolic link target. Writing to a symbolic link target is not supported + on Windows. + + Dos2unix was modelled after dos2unix under SunOS/Solaris. There is one + important difference with the original SunOS/Solaris version. This + version does by default in-place conversion (old file mode), while the + original SunOS/Solaris version only supports paired conversion (new file + mode). See also options "-o" and "-n". + +OPTIONS + -- Treat all following options as file names. Use this option if you + want to convert files whose names start with a dash. For instance to + convert a file named "-foo", you can use this command: + + dos2unix -- -foo + + Or in new file mode: + + dos2unix -n -- -foo out.txt + + -ascii + Convert only line breaks. This is the default conversion mode. + + -iso + Conversion between DOS and ISO-8859-1 character set. See also + section CONVERSION MODES. + + -1252 + Use Windows code page 1252 (Western European). + + -437 + Use DOS code page 437 (US). This is the default code page used for + ISO conversion. + + -850 + Use DOS code page 850 (Western European). + + -860 + Use DOS code page 860 (Portuguese). + + -863 + Use DOS code page 863 (French Canadian). + + -865 + Use DOS code page 865 (Nordic). + + -7 Convert 8 bit characters to 7 bit space. + + -c, --convmode CONVMODE + Set conversion mode. Where CONVMODE is one of: *ascii*, *7bit*, + *iso*, *mac* with ascii being the default. + + -f, --force + Force conversion of binary files. + + -h, --help + Display help and exit. + + -k, --keepdate + Keep the date stamp of output file same as input file. + + -L, --license + Display program's license. + + -l, --newline + Add additional newline. + + dos2unix: Only DOS line breaks are changed to two Unix line breaks. + In Mac mode only Mac line breaks are changed to two Unix line + breaks. + + unix2dos: Only Unix line breaks are changed to two DOS line breaks. + In Mac mode Unix line breaks are changed to two Mac line breaks. + + -m, --add-bom + Write an UTF-8 Byte Order Mark in the output file. Never use this + option when the output encoding is other than UTF-8. See also + section UNICODE. + + -n, --newfile INFILE OUTFILE ... + New file mode. Convert file INFILE and write output to file OUTFILE. + File names must be given in pairs and wildcard names should *not* be + used or you *will* lose your files. + + The person who starts the conversion in new file (paired) mode will + be the owner of the converted file. The read/write permissions of + the new file will be the permissions of the original file minus the + umask(1) of the person who runs the conversion. + + -o, --oldfile FILE ... + Old file mode. Convert file FILE and overwrite output to it. The + program defaults to run in this mode. Wildcard names may be used. + + In old file (in-place) mode the converted file gets the same owner, + group, and read/write permissions as the original file. Also when + the file is converted by another user who has write permissions on + the file (e.g. user root). The conversion will be aborted when it is + not possible to preserve the original values. Change of owner could + mean that the original owner is not able to read the file any more. + Change of group could be a security risk, the file could be made + readable for persons for whom it is not intended. Preservation of + owner, group, and read/write permissions is only supported on Unix. + + -q, --quiet + Quiet mode. Suppress all warnings and messages. The return value is + zero. Except when wrong command-line options are used. + + -s, --safe + Skip binary files (default). + + -ul, --assume-utf16le + Assume that the input file format is UTF-16LE. + + When there is a Byte Order Mark in the input file the BOM has + priority over this option. + + When you made a wrong assumption (the input file was not in UTF-16LE + format) and the conversion succeeded, you will get an UTF-8 output + file with wrong text. You can undo the wrong conversion with + iconv(1) by converting the UTF-8 output file back to UTF-16LE. This + will bring back the original file. + + The assumption of UTF-16LE works as a *conversion mode*. By + switching to the default *ascii* mode the UTF-16LE assumption is + turned off. + + -ub, --assume-utf16be + Assume that the input file format is UTF-16BE. + + This option works the same as option "-ul". + + -F, --follow-symlink + Follow symbolic links and convert the targets. + + -R, --replace-symlink + Replace symbolic links with converted files (original target files + remain unchanged). + + -S, --skip-symlink + Keep symbolic links and targets unchanged (default). + + -V, --version + Display version information and exit. + +MAC MODE + In normal mode line breaks are converted from DOS to Unix and vice + versa. Mac line breaks are not converted. + + In Mac mode line breaks are converted from Mac to Unix and vice versa. + DOS line breaks are not changed. + + To run in Mac mode use the command-line option "-c mac" or use the + commands "mac2unix" or "unix2mac". + +CONVERSION MODES + ascii + In mode "ascii" only line breaks are converted. This is the default + conversion mode. + + Although the name of this mode is ASCII, which is a 7 bit standard, + the actual mode is 8 bit. Use always this mode when converting + Unicode UTF-8 files. + + 7bit + In this mode all 8 bit non-ASCII characters (with values from 128 to + 255) are converted to a 7 bit space. + + iso Characters are converted between a DOS character set (code page) and + ISO character set ISO-8859-1 (Latin-1) on Unix. DOS characters + without ISO-8859-1 equivalent, for which conversion is not possible, + are converted to a dot. The same counts for ISO-8859-1 characters + without DOS counterpart. + + When only option "-iso" is used dos2unix will try to determine the + active code page. When this is not possible dos2unix will use + default code page CP437, which is mainly used in the USA. To force a + specific code page use options -437 (US), -850 (Western European), + -860 (Portuguese), -863 (French Canadian), or -865 (Nordic). Windows + code page CP1252 (Western European) is also supported with option + -1252. For other code pages use dos2unix in combination with + iconv(1). Iconv can convert between a long list of character + encodings. + + Never use ISO conversion on Unicode text files. It will corrupt + UTF-8 encoded files. + + Some examples: + + Convert from DOS default code page to Unix Latin-1 + + dos2unix -iso -n in.txt out.txt + + Convert from DOS CP850 to Unix Latin-1 + + dos2unix -850 -n in.txt out.txt + + Convert from Windows CP1252 to Unix Latin-1 + + dos2unix -1252 -n in.txt out.txt + + Convert from Windows CP1252 to Unix UTF-8 (Unicode) + + iconv -f CP1252 -t UTF-8 in.txt | dos2unix > out.txt + + Convert from Unix Latin-1 to DOS default code page + + unix2dos -iso -n in.txt out.txt + + Convert from Unix Latin-1 to DOS CP850 + + unix2dos -850 -n in.txt out.txt + + Convert from Unix Latin-1 to Windows CP1252 + + unix2dos -1252 -n in.txt out.txt + + Convert from Unix UTF-8 (Unicode) to Windows CP1252 + + unix2dos < in.txt | iconv -f UTF-8 -t CP1252 > out.txt + + See also <http://czyborra.com/charsets/codepages.html> and + <http://czyborra.com/charsets/iso8859.html>. + +UNICODE + Encodings + There exist different Unicode encodings. On Unix and Linux Unicode files + are typically encoded in UTF-8 encoding. On Windows Unicode text files + can be encoded in UTF-8, UTF-16, or UTF-16 big endian, but are mostly + encoded in UTF-16 format. + + Conversion + Unicode text files can have DOS, Unix or Mac line breaks, like regular + text files. + + All versions of dos2unix and unix2dos can convert UTF-8 encoded files, + because UTF-8 was designed for backward compatibility with ASCII. + + Dos2unix and unix2dos with Unicode UTF-16 support, can read little and + big endian UTF-16 encoded text files. To see if dos2unix was built with + UTF-16 support type "dos2unix -V". + + The Windows versions of dos2unix and unix2dos convert UTF-16 encoded + files always to UTF-8 encoded files. Unix versions of dos2unix/unix2dos + convert UTF-16 encoded files to the locale character encoding when it is + set to UTF-8. Use the locale(1) command to find out what the locale + character encoding is. + + Because UTF-8 formatted text files are well supported on both Windows + and Unix, dos2unix and unix2dos have no option to write UTF-16 files. + All UTF-16 characters can be encoded in UTF-8. Conversion from UTF-16 to + UTF-8 is without loss. UTF-16 files will be skipped on Unix when the + locale character encoding is not UTF-8, to prevent accidental loss of + text. When an UTF-16 to UTF-8 conversion error occurs, for instance when + the UTF-16 input file contains an error, the file will be skipped. + + ISO and 7-bit mode conversion do not work on UTF-16 files. + + Byte Order Mark + On Windows Unicode text files typically have a Byte Order Mark (BOM), + because many Windows programs (including Notepad) add BOMs by default. + See also <http://en.wikipedia.org/wiki/Byte_order_mark>. + + On Unix Unicode files typically don't have a BOM. It is assumed that + text files are encoded in the locale character encoding. + + Dos2unix can only detect if a file is in UTF-16 format if the file has a + BOM. When an UTF-16 file doesn't have a BOM, dos2unix will see the file + as a binary file. + + Use option "-ul" or "-ub" to convert an UTF-16 file without BOM. + + Dos2unix never writes a BOM in the output file, unless you use option + "-m". + + Unix2dos writes a BOM in the output file when the input file has a BOM, + or when option "-m" is used. + + Unicode examples + Convert from Windows UTF-16 (with BOM) to Unix UTF-8 + + dos2unix -n in.txt out.txt + + Convert from Windows UTF-16LE (without BOM) to Unix UTF-8 + + dos2unix -ul -n in.txt out.txt + + Convert from Unix UTF-8 to Windows UTF-8 with BOM + + unix2dos -m -n in.txt out.txt + + Convert from Unix UTF-8 to Windows UTF-16 + + unix2dos < in.txt | iconv -f UTF-8 -t UTF-16 > out.txt + +EXAMPLES + Read input from 'stdin' and write output to 'stdout'. + + dos2unix + dos2unix -l -c mac + + Convert and replace a.txt. Convert and replace b.txt. + + dos2unix a.txt b.txt + dos2unix -o a.txt b.txt + + Convert and replace a.txt in ascii conversion mode. + + dos2unix a.txt + + Convert and replace a.txt in ascii conversion mode. Convert and replace + b.txt in 7bit conversion mode. + + dos2unix a.txt -c 7bit b.txt + dos2unix -c ascii a.txt -c 7bit b.txt + dos2unix -ascii a.txt -7 b.txt + + Convert a.txt from Mac to Unix format. + + dos2unix -c mac a.txt + mac2unix a.txt + + Convert a.txt from Unix to Mac format. + + unix2dos -c mac a.txt + unix2mac a.txt + + Convert and replace a.txt while keeping original date stamp. + + dos2unix -k a.txt + dos2unix -k -o a.txt + + Convert a.txt and write to e.txt. + + dos2unix -n a.txt e.txt + + Convert a.txt and write to e.txt, keep date stamp of e.txt same as + a.txt. + + dos2unix -k -n a.txt e.txt + + Convert and replace a.txt. Convert b.txt and write to e.txt. + + dos2unix a.txt -n b.txt e.txt + dos2unix -o a.txt -n b.txt e.txt + + Convert c.txt and write to e.txt. Convert and replace a.txt. Convert and + replace b.txt. Convert d.txt and write to f.txt. + + dos2unix -n c.txt e.txt -o a.txt b.txt -n d.txt f.txt + +RECURSIVE CONVERSION + Use dos2unix in combination with the find(1) and xargs(1) commands to + recursively convert text files in a directory tree structure. For + instance to convert all .txt files in the directory tree under the + current directory type: + + find . -name *.txt |xargs dos2unix + +LOCALIZATION + LANG + The primary language is selected with the environment variable LANG. + The LANG variable consists out of several parts. The first part is + in small letters the language code. The second is optional and is + the country code in capital letters, preceded with an underscore. + There is also an optional third part: character encoding, preceded + with a dot. A few examples for POSIX standard type shells: + + export LANG=nl Dutch + export LANG=nl_NL Dutch, The Netherlands + export LANG=nl_BE Dutch, Belgium + export LANG=es_ES Spanish, Spain + export LANG=es_MX Spanish, Mexico + export LANG=en_US.iso88591 English, USA, Latin-1 encoding + export LANG=en_GB.UTF-8 English, UK, UTF-8 encoding + + For a complete list of language and country codes see the gettext + manual: + <http://www.gnu.org/software/gettext/manual/gettext.html#Language-Co + des> + + On Unix systems you can use to command locale(1) to get locale + specific information. + + LANGUAGE + With the LANGUAGE environment variable you can specify a priority + list of languages, separated by colons. Dos2unix gives preference to + LANGUAGE over LANG. For instance, first Dutch and then German: + "LANGUAGE=nl:de". You have to first enable localization, by setting + LANG (or LC_ALL) to a value other than "C", before you can use a + language priority list through the LANGUAGE variable. See also the + gettext manual: + <http://www.gnu.org/software/gettext/manual/gettext.html#The-LANGUAG + E-variable> + + If you select a language which is not available you will get the + standard English messages. + + DOS2UNIX_LOCALEDIR + With the environment variable DOS2UNIX_LOCALEDIR the LOCALEDIR set + during compilation can be overruled. LOCALEDIR is used to find the + language files. The GNU default value is "/usr/local/share/locale". + Option --version will display the LOCALEDIR that is used. + + Example (POSIX shell): + + export DOS2UNIX_LOCALEDIR=$HOME/share/locale + +RETURN VALUE + On success, zero is returned. When a system error occurs the last system + error will be returned. For other errors 1 is returned. + + The return value is always zero in quiet mode, except when wrong + command-line options are used. + +STANDARDS + <http://en.wikipedia.org/wiki/Text_file> + + <http://en.wikipedia.org/wiki/Carriage_return> + + <http://en.wikipedia.org/wiki/Newline> + + <http://en.wikipedia.org/wiki/Unicode> + +AUTHORS + Benjamin Lin - <blin@socs.uts.edu.au> Bernd Johannes Wuebben (mac2unix + mode) - <wuebben@kde.org>, Christian Wurll (add extra newline) - + <wurll@ira.uka.de>, Erwin Waterlander - <waterlan@xs4all.nl> + (Maintainer) + + Project page: <http://waterlan.home.xs4all.nl/dos2unix.html> + + SourceForge page: <http://sourceforge.net/projects/dos2unix/> + + Freecode: <http://freecode.com/projects/dos2unix> + +SEE ALSO + file(1) find(1) iconv(1) locale(1) xargs(1) + |