summaryrefslogtreecommitdiff
path: root/common.c
diff options
context:
space:
mode:
Diffstat (limited to 'common.c')
-rw-r--r--common.c1178
1 files changed, 940 insertions, 238 deletions
diff --git a/common.c b/common.c
index 2ee160b..5547fbd 100644
--- a/common.c
+++ b/common.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2009-2014 Erwin Waterlander
+ * Copyright (C) 2009-2015 Erwin Waterlander
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -28,14 +28,20 @@
#include "dos2unix.h"
#include "querycp.h"
-#if defined(D2U_UNICODE)
+#include <stdarg.h>
+#ifdef D2U_UNIFILE
+#include <windows.h>
+#elif defined(D2U_UNICODE)
#if defined(_WIN32) || defined(__CYGWIN__)
#include <windows.h>
#endif
#endif
+
+#if defined(D2U_UNICODE)
#if !defined(__MSDOS__) && !defined(_WIN32) && !defined(__OS2__) /* Unix, Cygwin */
# include <langinfo.h>
#endif
+#endif
#if defined(__GLIBC__)
/* on glibc, canonicalize_file_name() broken prior to 2.4 (06-Mar-2006) */
@@ -51,6 +57,269 @@
# endif
#endif
+/* global variable */
+#ifdef D2U_UNIFILE
+int d2u_display_encoding = D2U_DISPLAY_ANSI ;
+#endif
+
+/*
+ * Print last system error on Windows.
+ *
+ */
+#if (defined(_WIN32) && !defined(__CYGWIN__))
+void d2u_PrintLastError(const char *progname)
+{
+ /* Retrieve the system error message for the last-error code */
+
+ LPVOID lpMsgBuf;
+ DWORD dw;
+
+ dw = GetLastError();
+
+ FormatMessage(
+ FORMAT_MESSAGE_ALLOCATE_BUFFER |
+ FORMAT_MESSAGE_FROM_SYSTEM |
+ FORMAT_MESSAGE_IGNORE_INSERTS,
+ NULL,
+ dw,
+ MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
+ (LPTSTR) &lpMsgBuf,
+ 0, NULL );
+
+ /* Display the error message */
+
+ /* MessageBox(NULL, (LPCTSTR)lpMsgBuf, TEXT("Error"), MB_OK); */
+ fprintf(stderr, "%s: ",progname);
+#ifdef D2U_UNIFILE
+ fwprintf(stderr, L"%ls\n",(LPCTSTR)lpMsgBuf);
+#else
+ fprintf(stderr, "%s\n",(LPCTSTR)lpMsgBuf);
+#endif
+
+ LocalFree(lpMsgBuf);
+}
+
+
+int d2u_WideCharToMultiByte(UINT CodePage, DWORD dwFlags, LPCWSTR lpWideCharStr, int cchWideChar, LPSTR lpMultiByteStr, int cbMultiByte, LPCSTR lpDefaultChar, LPBOOL lpUsedDefaultChar)
+{
+ int i;
+
+ if ( (i = WideCharToMultiByte(CodePage, dwFlags, lpWideCharStr, cchWideChar, lpMultiByteStr, cbMultiByte, lpDefaultChar, lpUsedDefaultChar)) == 0)
+ d2u_PrintLastError("dos2unix");
+
+ return i;
+}
+
+int d2u_MultiByteToWideChar(UINT CodePage, DWORD dwFlags, LPCSTR lpMultiByteStr, int cbMultiByte, LPWSTR lpWideCharStr, int cchWideChar)
+{
+ int i;
+
+ if ( (i = MultiByteToWideChar(CodePage, dwFlags, lpMultiByteStr, cbMultiByte, lpWideCharStr, cchWideChar)) == 0)
+ d2u_PrintLastError("dos2unix");
+ return i;
+}
+
+#endif
+
+/*
+ * d2u_fprintf() : printf wrapper, print in Windows Command Prompt in Unicode
+ * mode, to have consistent output. Regardless of active code page.
+ *
+ * On Windows the file system uses always Unicode UTF-16 encoding, regardless
+ * of the system default code page. This means that files and directories can
+ * have names that can't be encoded in the default system Windows ANSI code
+ * page.
+ *
+ * Dos2unix for Windows with Unicode file name support translates all directory
+ * names to UTF-8, to be able to work with char type strings. This is also
+ * done to keep the code portable.
+ *
+ * Dos2unix's messages are encoded in the default Windows ANSI code page, which
+ * can be translated with gettext. Gettext/libintl recodes messages (format) to
+ * the system default ANSI code page.
+ *
+ * d2u_fprintf() on Windows assumes that:
+ * - The format string is encoded in the system default ANSI code page.
+ * - The arguments are encoded in UTF-8.
+ *
+ * There are several methods for printing Unicode in the Windows Console, but
+ * none of them is perfect. There are so many issues that I decided to go back
+ * to ANSI by default.
+ */
+
+void d2u_fprintf( FILE *stream, const char* format, ... ) {
+ va_list args;
+#ifdef D2U_UNIFILE
+ char buf[D2U_MAX_PATH];
+ char formatmbs[D2U_MAX_PATH];
+ wchar_t formatwcs[D2U_MAX_PATH];
+ UINT outputCP;
+ wchar_t wstr[D2U_MAX_PATH];
+ int prevmode;
+/* HANDLE out_handle;
+
+ if (stream == stderr)
+ out_handle =GetStdHandle(STD_ERROR_HANDLE);
+ else
+ out_handle =GetStdHandle(STD_OUTPUT_HANDLE);
+*/
+ va_start(args, format);
+
+ /* The format string is encoded in the system default
+ * Windows ANSI code page. May have been translated
+ * by gettext. Convert it to wide characters. */
+ d2u_MultiByteToWideChar(CP_ACP,0, format, -1, formatwcs, D2U_MAX_PATH);
+ /* then convert the format string to UTF-8 */
+ d2u_WideCharToMultiByte(CP_UTF8, 0, formatwcs, -1, formatmbs, D2U_MAX_PATH, NULL, NULL);
+
+ /* The arguments (file names) are in UTF-8 encoding, because
+ * in dos2unix for Windows all file names are in UTF-8 format.
+ * Print to buffer (UTF-8) */
+ vsnprintf(buf, sizeof(buf), formatmbs, args);
+
+ if (d2u_display_encoding == D2U_DISPLAY_UTF8) {
+
+ /* Using UTF-8 has my preference. The following method works fine when NLS is
+ disabled. But when I enable NLS (ENABLE_NLS=1) all non-ASCII characters are
+ printed as a square with a question mark in it. This will make the screen
+ output of dos2unix for most languages unreadable.
+ When I redirect the output to a file, the output is correct UTF-8. I don't
+ know why NLS causes wrong printed text in the console. I just turn NLS off.
+ A disadvantage of this method is that all non-ASCII characters are printed
+ wrongly when the console uses raster font (which is the default).
+ I tried on a Chinese Windows 7 (code page 936) and then all non-ASCII
+ is printed wrongly, using raster and TrueType font. Only in ConEmu I
+ get correct output. I'm afraid that most people use the default Command Prompt
+ and PowerShell consolse, so for many people the text will be unreadable.
+ On a Chinese Windows there was a lot of flickering during the printing of the
+ lines of text. This is not acceptable, but I'm not sure it this was because the
+ Windows Command Prompt was broken. It sometimes crashes.
+ */
+#ifdef ENABLE_NLS
+ /* temporarely disable NLS */
+ setlocale (LC_ALL, "C");
+#endif
+ /* print UTF-8 buffer to console in UTF-8 mode */
+ outputCP = GetConsoleOutputCP();
+ SetConsoleOutputCP(CP_UTF8);
+ fwprintf(stream,L"%S",buf);
+ SetConsoleOutputCP(outputCP);
+#ifdef ENABLE_NLS
+ /* re-enable NLS */
+ setlocale (LC_ALL, "");
+#endif
+
+ /* The following UTF-8 method does not give correct output. I don't know why. */
+ //prevmode = _setmode(_fileno(stream), _O_U8TEXT);
+ //fwprintf(stream,L"%S",buf);
+ //_setmode(_fileno(stream), prevmode);
+
+ } else if (d2u_display_encoding == D2U_DISPLAY_UNICODE) {
+
+ /* Another method for printing Unicode is using WriteConsoleW().
+ WriteConsoleW always prints output correct in the console. Even when
+ using raster font WriteConsoleW prints correctly when possible.
+ WriteConsoleW has one big disadvantage: The output of WriteConsoleW
+ can't be redirected. The output can't be piped to a log file. */
+ /* Convert UTF-8 buffer to wide characters. */
+ //d2u_MultiByteToWideChar(CP_UTF8,0, buf, -1, wstr, D2U_MAX_PATH);
+ //WriteConsoleW(out_handle, wstr, wcslen(wstr), NULL, NULL);
+
+ /* Printing UTF-16 works correctly like WriteConsoleW, with and without NLS enabled.
+ Works also good with raster fonts. In a Chinese CP936 locale it works correctly
+ in the Windows Command Prompt. The downside is that it is UTF-16. When this is
+ redirected to a file it gives a big mess. It is not compatible with ASCII. So
+ even a simple ASCII grep on the screen output will not work.
+ When the output is redirected in a Windows Command Prompt to a file all line breaks end up as
+ 0d0a 00 (instead of 0d00 0a00), which makes it a corrupt UTF-16 file.
+ In PowerShell you get correct line breaks 0d00 0a00 when you redirect to a file, but there are
+ null characters (0000) inserted, as if it is UTF-32 with UTF-16 BOM and UTF-16 line breaks.
+ See also test/testu16.c. */
+ d2u_MultiByteToWideChar(CP_UTF8,0, buf, -1, wstr, D2U_MAX_PATH);
+ prevmode = _setmode(_fileno(stream), _O_U16TEXT);
+ fwprintf(stream,L"%ls",wstr);
+ _setmode(_fileno(stream), prevmode);
+ } else { /* ANSI */
+ d2u_MultiByteToWideChar(CP_UTF8,0, buf, -1, wstr, D2U_MAX_PATH);
+ /* Convert the whole message to ANSI, some Unicode characters may fail to translate to ANSI.
+ They will be displayed as a question mark. */
+ d2u_WideCharToMultiByte(CP_ACP, 0, wstr, -1, buf, D2U_MAX_PATH, NULL, NULL);
+ fprintf(stream,"%s",buf);
+ }
+
+#else
+ va_start(args, format);
+ vfprintf(stream, format, args);
+#endif
+ va_end( args );
+}
+
+/* d2u_ansi_fprintf()
+ fprintf wrapper for Windows console.
+
+ Format and arguments are in ANSI format.
+ Redirect the printing to d2u_fprintf such that the output
+ format is consistent. To prevent a mix of ANSI/UTF-8/UTF-16
+ encodings in the print output.
+ */
+
+void d2u_ansi_fprintf( FILE *stream, const char* format, ... ) {
+ va_list args;
+#ifdef D2U_UNIFILE
+ char buf[D2U_MAX_PATH]; /* ANSI encoded string */
+ char bufmbs[D2U_MAX_PATH]; /* UTF-8 encoded string */
+ wchar_t bufwcs[D2U_MAX_PATH]; /* Wide encoded string */
+
+ va_start(args, format);
+
+ vsnprintf(buf, sizeof(buf), format, args);
+ /* The format string and arguments are encoded in the system default
+ * Windows ANSI code page. May have been translated
+ * by gettext. Convert it to wide characters. */
+ d2u_MultiByteToWideChar(CP_ACP,0, buf, -1, bufwcs, D2U_MAX_PATH);
+ /* then convert the format string to UTF-8 */
+ d2u_WideCharToMultiByte(CP_UTF8, 0, bufwcs, -1, bufmbs, D2U_MAX_PATH, NULL, NULL);
+
+ d2u_fprintf(stream, "%s",bufmbs);
+
+#else
+ va_start(args, format);
+ vfprintf(stream, format, args);
+#endif
+ va_end( args );
+}
+
+/* d2u_rename
+ * wrapper for rename().
+ * On Windows file names are encoded in UTF-8.
+ */
+int d2u_rename(const char *oldname, const char *newname)
+{
+#ifdef D2U_UNIFILE
+ wchar_t oldnamew[D2U_MAX_PATH];
+ wchar_t newnamew[D2U_MAX_PATH];
+ d2u_MultiByteToWideChar(CP_UTF8, 0, oldname, -1, oldnamew, D2U_MAX_PATH);
+ d2u_MultiByteToWideChar(CP_UTF8, 0, newname, -1, newnamew, D2U_MAX_PATH);
+ return _wrename(oldnamew, newnamew);
+#else
+ return rename(oldname, newname);
+#endif
+}
+
+/* d2u_unlink
+ * wrapper for unlink().
+ * On Windows file names are encoded in UTF-8.
+ */
+int d2u_unlink(const char *filename)
+{
+#ifdef D2U_UNIFILE
+ wchar_t filenamew[D2U_MAX_PATH];
+ d2u_MultiByteToWideChar(CP_UTF8, 0, filename, -1, filenamew, D2U_MAX_PATH);
+ return _wunlink(filenamew);
+#else
+ return unlink(filename);
+#endif
+}
/******************************************************************
*
@@ -62,7 +331,23 @@
*
******************************************************************/
-#if (defined(_WIN32) && !defined(__CYGWIN__))
+#ifdef D2U_UNIFILE
+
+int symbolic_link(const char *path)
+{
+ DWORD attrs;
+ wchar_t pathw[D2U_MAX_PATH];
+
+ d2u_MultiByteToWideChar(CP_UTF8, 0, path, -1, pathw, D2U_MAX_PATH);
+ attrs = GetFileAttributesW(pathw);
+
+ if (attrs == INVALID_FILE_ATTRIBUTES)
+ return(0);
+
+ return ((attrs & FILE_ATTRIBUTE_REPARSE_POINT) != 0);
+}
+
+#elif(defined(_WIN32) && !defined(__CYGWIN__))
int symbolic_link(const char *path)
{
@@ -103,33 +388,44 @@ int symbolic_link(const char *path)
******************************************************************/
int regfile(char *path, int allowSymlinks, CFlag *ipFlag, const char *progname)
{
+#ifdef D2U_UNIFILE
+ struct _stat buf;
+ wchar_t pathw[D2U_MAX_PATH];
+#else
struct stat buf;
+#endif
char *errstr;
+#ifdef D2U_UNIFILE
+ d2u_MultiByteToWideChar(CP_UTF8, 0, path, -1, pathw, D2U_MAX_PATH);
+ if (_wstat(pathw, &buf) == 0) {
+#else
if (STAT(path, &buf) == 0) {
+#endif
#if DEBUG
- fprintf(stderr, "%s: %s MODE 0%o ", progname, path, buf.st_mode);
+ d2u_fprintf(stderr, "%s: %s", progname, path);
+ d2u_fprintf(stderr, " MODE 0%o ", buf.st_mode);
#ifdef S_ISSOCK
if (S_ISSOCK(buf.st_mode))
- fprintf(stderr, " (socket)");
+ d2u_fprintf(stderr, " (socket)");
#endif
#ifdef S_ISLNK
if (S_ISLNK(buf.st_mode))
- fprintf(stderr, " (symbolic link)");
+ d2u_fprintf(stderr, " (symbolic link)");
#endif
if (S_ISREG(buf.st_mode))
- fprintf(stderr, " (regular file)");
+ d2u_fprintf(stderr, " (regular file)");
#ifdef S_ISBLK
if (S_ISBLK(buf.st_mode))
- fprintf(stderr, " (block device)");
+ d2u_fprintf(stderr, " (block device)");
#endif
if (S_ISDIR(buf.st_mode))
- fprintf(stderr, " (directory)");
+ d2u_fprintf(stderr, " (directory)");
if (S_ISCHR(buf.st_mode))
- fprintf(stderr, " (character device)");
+ d2u_fprintf(stderr, " (character device)");
if (S_ISFIFO(buf.st_mode))
- fprintf(stderr, " (FIFO)");
- fprintf(stderr, "\n");
+ d2u_fprintf(stderr, " (FIFO)");
+ d2u_fprintf(stderr, "\n");
#endif
if ((S_ISREG(buf.st_mode))
#ifdef S_ISLNK
@@ -144,7 +440,8 @@ int regfile(char *path, int allowSymlinks, CFlag *ipFlag, const char *progname)
if (ipFlag->verbose) {
ipFlag->error = errno;
errstr = strerror(errno);
- fprintf(stderr, "%s: %s: %s\n", progname, path, errstr);
+ d2u_fprintf(stderr, "%s: %s:", progname, path);
+ d2u_ansi_fprintf(stderr, " %s\n", errstr);
}
return(-1);
}
@@ -161,10 +458,20 @@ int regfile(char *path, int allowSymlinks, CFlag *ipFlag, const char *progname)
******************************************************************/
int regfile_target(char *path, CFlag *ipFlag, const char *progname)
{
+#ifdef D2U_UNIFILE
+ struct _stat buf;
+ wchar_t pathw[D2U_MAX_PATH];
+#else
struct stat buf;
+#endif
char *errstr;
+#ifdef D2U_UNIFILE
+ d2u_MultiByteToWideChar(CP_UTF8, 0, path, -1, pathw, D2U_MAX_PATH);
+ if (_wstat(pathw, &buf) == 0) {
+#else
if (stat(path, &buf) == 0) {
+#endif
if (S_ISREG(buf.st_mode))
return(0);
else
@@ -174,12 +481,119 @@ int regfile_target(char *path, CFlag *ipFlag, const char *progname)
if (ipFlag->verbose) {
ipFlag->error = errno;
errstr = strerror(errno);
- fprintf(stderr, "%s: %s: %s\n", progname, path, errstr);
+ d2u_fprintf(stderr, "%s: %s:", progname, path);
+ d2u_ansi_fprintf(stderr, " %s\n", errstr);
}
return(-1);
}
}
+/*
+ * glob_warg() expands the wide command line arguments.
+ * Input : wide Unicode arguments.
+ * Output : argv : expanded arguments in UTF-8 format.
+ * Returns: new argc value.
+ * -1 when an error occurred.
+ *
+ */
+
+#ifdef D2U_UNIFILE
+int glob_warg(int argc, wchar_t *wargv[], char ***argv, CFlag *ipFlag, const char *progname)
+{
+ int i;
+ int argc_glob = 0;
+ wchar_t *warg;
+ wchar_t *path;
+ wchar_t *path_and_filename;
+ wchar_t *ptr;
+ char *arg;
+ char **argv_new;
+ char *errstr;
+ size_t len;
+ int found, add_path;
+ WIN32_FIND_DATA FindFileData;
+ HANDLE hFind;
+
+ argv_new = (char **)malloc(sizeof(char**));
+ if (argv_new == NULL) goto glob_failed;
+
+ len = (size_t)d2u_WideCharToMultiByte(CP_UTF8, 0, wargv[0], -1, NULL, 0, NULL, NULL);
+ arg = (char *)malloc(len);
+ if (argv == NULL) goto glob_failed;
+ d2u_WideCharToMultiByte(CP_UTF8, 0, wargv[argc_glob], -1, arg, (int)len, NULL, NULL);
+ argv_new[argc_glob] = arg;
+
+ for (i=1; i<argc; ++i)
+ {
+ warg = wargv[i];
+ found = 0;
+ add_path = 0;
+ /* FindFileData.cFileName has the path stripped off. We need to add it again. */
+ path = _wcsdup(warg);
+ /* replace all back slashes with slashes */
+ while ( (ptr = wcschr(path,L'\\')) != NULL) {
+ *ptr = L'/';
+ }
+ if ( (ptr = wcsrchr(path,L'/')) != NULL) {
+ ptr++;
+ *ptr = L'\0';
+ add_path = 1;
+ }
+
+ hFind = FindFirstFileW(warg, &FindFileData);
+ while (hFind != INVALID_HANDLE_VALUE)
+ {
+ len = wcslen(path) + wcslen(FindFileData.cFileName) + 2;
+ path_and_filename = (wchar_t *)malloc(len*sizeof(wchar_t));
+ if (path_and_filename == NULL) goto glob_failed;
+ if (add_path) {
+ wcsncpy_s(path_and_filename, len, path, wcslen(path));
+ wcsncat_s(path_and_filename, len, FindFileData.cFileName, wcslen(FindFileData.cFileName));
+ } else {
+ wcsncpy_s(path_and_filename, len, FindFileData.cFileName, wcslen(FindFileData.cFileName));
+ }
+
+ found = 1;
+ ++argc_glob;
+ len =(size_t) d2u_WideCharToMultiByte(CP_UTF8, 0, path_and_filename, -1, NULL, 0, NULL, NULL);
+ arg = (char *)malloc((size_t)len);
+ if (argv == NULL) goto glob_failed;
+ d2u_WideCharToMultiByte(CP_UTF8, 0, path_and_filename, -1, arg, (int)len, NULL, NULL);
+ free(path_and_filename);
+ argv_new = (char **)realloc(argv_new, (size_t)(argc_glob+1)*sizeof(char**));
+ if (argv_new == NULL) goto glob_failed;
+ argv_new[argc_glob] = arg;
+
+ if (!FindNextFileW(hFind, &FindFileData)) {
+ FindClose(hFind);
+ hFind = INVALID_HANDLE_VALUE;
+ }
+ }
+ free(path);
+ if (found == 0) {
+ /* Not a file. Just copy the argument */
+ ++argc_glob;
+ len =(size_t) d2u_WideCharToMultiByte(CP_UTF8, 0, warg, -1, NULL, 0, NULL, NULL);
+ arg = (char *)malloc((size_t)len);
+ if (argv == NULL) goto glob_failed;
+ d2u_WideCharToMultiByte(CP_UTF8, 0, warg, -1, arg, (int)len, NULL, NULL);
+ argv_new = (char **)realloc(argv_new, (size_t)(argc_glob+1)*sizeof(char**));
+ if (argv_new == NULL) goto glob_failed;
+ argv_new[argc_glob] = arg;
+ }
+ }
+ *argv = argv_new;
+ return ++argc_glob;
+
+ glob_failed:
+ ipFlag->error = errno;
+ errstr = strerror(errno);
+ d2u_fprintf(stderr, "%s:", progname);
+ d2u_ansi_fprintf(stderr, " %s\n", errstr);
+ return -1;
+}
+#endif
+
void PrintBSDLicense(void)
{
printf("%s", _("\
@@ -233,6 +647,10 @@ void PrintUsage(const char *progname)
printf(_(" -b, --keep-bom keep Byte Order Mark (default)\n"));
printf(_(" -c, --convmode conversion mode\n\
convmode ascii, 7bit, iso, mac, default to ascii\n"));
+#ifdef D2U_UNIFILE
+ printf(_(" -D, --display-enc set encoding of displayed text messages\n\
+ encoding ansi, unicode, utf8, default to ansi\n"));
+#endif
printf(_(" -f, --force force conversion of binary files\n"));
#ifdef D2U_UNICODE
#if (defined(_WIN32) && !defined(__CYGWIN__))
@@ -313,11 +731,21 @@ void PrintVersion(const char *progname, const char *localedir)
#elif defined(__OS)
printf(_("%s version.\n"), __OS);
#endif
+#if defined(_WIN32) && defined(WINVER)
+ printf("WINVER 0x%X\n",WINVER);
+#endif
#ifdef D2U_UNICODE
printf("%s", _("With Unicode UTF-16 support.\n"));
#else
printf("%s", _("Without Unicode UTF-16 support.\n"));
#endif
+#ifdef _WIN32
+#ifdef D2U_UNIFILE
+ printf("%s", _("With Unicode file name support.\n"));
+#else
+ printf("%s", _("Without Unicode file name support.\n"));
+#endif
+#endif
#ifdef ENABLE_NLS
printf("%s", _("With native language support.\n"));
#else
@@ -336,7 +764,14 @@ void PrintVersion(const char *progname, const char *localedir)
*/
FILE* OpenInFile(char *ipFN)
{
+#ifdef D2U_UNIFILE
+ wchar_t pathw[D2U_MAX_PATH];
+
+ d2u_MultiByteToWideChar(CP_UTF8, 0, ipFN, -1, pathw, D2U_MAX_PATH);
+ return _wfopen(pathw, R_CNTRLW);
+#else
return (fopen(ipFN, R_CNTRL));
+#endif
}
@@ -379,6 +814,10 @@ int MakeTempFileFrom(const char *OutFN, char **fname_ret)
#else
int fd = -1;
#endif
+#ifdef D2U_UNIFILE
+ wchar_t fname_strw[D2U_MAX_PATH];
+ wchar_t *namew;
+#endif
*fname_ret = NULL;
@@ -387,8 +826,12 @@ int MakeTempFileFrom(const char *OutFN, char **fname_ret)
dir = dirname(cpy);
+#ifdef D2U_UNIFILE
+ fname_len = D2U_MAX_PATH;
+#else
fname_len = strlen(dir) + strlen("/d2utmpXXXXXX") + sizeof (char);
- if (!(fname_str = malloc(fname_len)))
+#endif
+ if (!(fname_str = (char *)malloc(fname_len)))
goto make_failed;
sprintf(fname_str, "%s%s", dir, "/d2utmpXXXXXX");
*fname_ret = fname_str;
@@ -396,10 +839,19 @@ int MakeTempFileFrom(const char *OutFN, char **fname_ret)
free(cpy);
#ifdef NO_MKSTEMP
+#ifdef D2U_UNIFILE
+ d2u_MultiByteToWideChar(CP_UTF8, 0, fname_str, -1, fname_strw, D2U_MAX_PATH);
+ namew = _wmktemp(fname_strw);
+ d2u_WideCharToMultiByte(CP_UTF8, 0, namew, -1, fname_str, (int)fname_len, NULL, NULL);
+ *fname_ret = fname_str;
+ if ((fd = _wfopen(fname_strw, W_CNTRLW)) == NULL)
+ goto make_failed;
+#else
name = mktemp(fname_str);
*fname_ret = name;
if ((fd = fopen(fname_str, W_CNTRL)) == NULL)
goto make_failed;
+#endif
#else
if ((fd = mkstemp(fname_str)) == -1)
goto make_failed;
@@ -443,7 +895,8 @@ int ResolveSymbolicLink(char *lFN, char **rFN, CFlag *ipFlag, const char *progna
if (ipFlag->verbose) {
ipFlag->error = errno;
errstr = strerror(errno);
- fprintf(stderr, "%s: %s: %s\n", progname, lFN, errstr);
+ d2u_fprintf(stderr, "%s: %s:", progname, lFN);
+ d2u_ansi_fprintf(stderr, " %s\n", errstr);
}
RetVal = -1;
}
@@ -454,7 +907,8 @@ int ResolveSymbolicLink(char *lFN, char **rFN, CFlag *ipFlag, const char *progna
if (ipFlag->verbose) {
ipFlag->error = errno;
errstr = strerror(errno);
- fprintf(stderr, "%s: %s: %s\n", progname, lFN, errstr);
+ d2u_fprintf(stderr, "%s: %s:", progname, lFN);
+ d2u_ansi_fprintf(stderr, " %s\n", errstr);
}
RetVal = -1;
}
@@ -471,7 +925,8 @@ int ResolveSymbolicLink(char *lFN, char **rFN, CFlag *ipFlag, const char *progna
if (ipFlag->verbose) {
ipFlag->error = errno;
errstr = strerror(errno);
- fprintf(stderr, "%s: %s: %s\n", progname, lFN, errstr);
+ d2u_fprintf(stderr, "%s: %s:", progname, lFN);
+ d2u_ansi_fprintf(stderr, " %s\n", errstr);
}
RetVal = -1;
}
@@ -482,7 +937,8 @@ int ResolveSymbolicLink(char *lFN, char **rFN, CFlag *ipFlag, const char *progna
if (ipFlag->verbose) {
ipFlag->error = errno;
errstr = strerror(errno);
- fprintf(stderr, "%s: %s: %s\n", progname, lFN, errstr);
+ d2u_fprintf(stderr, "%s: %s:", progname, lFN);
+ d2u_ansi_fprintf(stderr, " %s\n", errstr);
}
free(targetFN);
RetVal = -1;
@@ -502,6 +958,9 @@ int ResolveSymbolicLink(char *lFN, char **rFN, CFlag *ipFlag, const char *progna
return RetVal;
}
+/* Read the Byte Order Mark.
+ Returns file pointer or NULL in case of a read error */
+
FILE *read_bom (FILE *f, int *bomtype)
{
int bom[4];
@@ -517,18 +976,23 @@ FILE *read_bom (FILE *f, int *bomtype)
/* Check for BOM */
if (f != NULL) {
if ((bom[0] = fgetc(f)) == EOF) {
- ungetc(bom[0], f);
+ if (ferror(f)) {
+ return NULL;
+ }
*bomtype = FILE_MBS;
return(f);
}
if ((bom[0] != 0xff) && (bom[0] != 0xfe) && (bom[0] != 0xef) && (bom[0] != 0x84)) {
- ungetc(bom[0], f);
+ if (ungetc(bom[0], f) == EOF) return NULL;
*bomtype = FILE_MBS;
return(f);
}
if ((bom[1] = fgetc(f)) == EOF) {
- ungetc(bom[1], f);
- ungetc(bom[0], f);
+ if (ferror(f)) {
+ return NULL;
+ }
+ if (ungetc(bom[1], f) == EOF) return NULL;
+ if (ungetc(bom[0], f) == EOF) return NULL;
*bomtype = FILE_MBS;
return(f);
}
@@ -541,9 +1005,12 @@ FILE *read_bom (FILE *f, int *bomtype)
return(f);
}
if ((bom[2] = fgetc(f)) == EOF) {
- ungetc(bom[2], f);
- ungetc(bom[1], f);
- ungetc(bom[0], f);
+ if (ferror(f)) {
+ return NULL;
+ }
+ if (ungetc(bom[2], f) == EOF) return NULL;
+ if (ungetc(bom[1], f) == EOF) return NULL;
+ if (ungetc(bom[0], f) == EOF) return NULL;
*bomtype = FILE_MBS;
return(f);
}
@@ -552,16 +1019,19 @@ FILE *read_bom (FILE *f, int *bomtype)
return(f);
}
if ((bom[0] == 0x84) && (bom[1] == 0x31) && (bom[2]== 0x95)) {
- bom[3] = fgetc(f);
- if (bom[3]== 0x33) { /* GB18030 */
- *bomtype = FILE_GB18030;
- return(f);
- }
- ungetc(bom[3], f);
+ bom[3] = fgetc(f);
+ if (ferror(f)) {
+ return NULL;
+ }
+ if (bom[3]== 0x33) { /* GB18030 */
+ *bomtype = FILE_GB18030;
+ return(f);
+ }
+ if (ungetc(bom[3], f) == EOF) return NULL;
}
- ungetc(bom[2], f);
- ungetc(bom[1], f);
- ungetc(bom[0], f);
+ if (ungetc(bom[2], f) == EOF) return NULL;
+ if (ungetc(bom[1], f) == EOF) return NULL;
+ if (ungetc(bom[0], f) == EOF) return NULL;
*bomtype = FILE_MBS;
return(f);
}
@@ -579,31 +1049,31 @@ FILE *write_bom (FILE *f, CFlag *ipFlag, const char *progname)
{
switch (bomtype) {
case FILE_UTF16LE: /* UTF-16 Little Endian */
- fprintf(f, "%s", "\xFF\xFE");
+ if (fprintf(f, "%s", "\xFF\xFE") < 0) return NULL;
if (ipFlag->verbose > 1) {
- fprintf(stderr, "%s: ", progname);
- fprintf(stderr, _("Writing %s BOM.\n"), _("UTF-16LE"));
+ d2u_fprintf(stderr, "%s: ", progname);
+ d2u_ansi_fprintf(stderr, _("Writing %s BOM.\n"), _("UTF-16LE"));
}
break;
case FILE_UTF16BE: /* UTF-16 Big Endian */
- fprintf(f, "%s", "\xFE\xFF");
+ if (fprintf(f, "%s", "\xFE\xFF") < 0) return NULL;
if (ipFlag->verbose > 1) {
- fprintf(stderr, "%s: ", progname);
- fprintf(stderr, _("Writing %s BOM.\n"), _("UTF-16BE"));
+ d2u_fprintf(stderr, "%s: ", progname);
+ d2u_ansi_fprintf(stderr, _("Writing %s BOM.\n"), _("UTF-16BE"));
}
break;
case FILE_GB18030: /* GB18030 */
- fprintf(f, "%s", "\x84\x31\x95\x33");
+ if (fprintf(f, "%s", "\x84\x31\x95\x33") < 0) return NULL;
if (ipFlag->verbose > 1) {
- fprintf(stderr, "%s: ", progname);
- fprintf(stderr, _("Writing %s BOM.\n"), _("GB18030"));
+ d2u_fprintf(stderr, "%s: ", progname);
+ d2u_ansi_fprintf(stderr, _("Writing %s BOM.\n"), _("GB18030"));
}
break;
default: /* UTF-8 */
- fprintf(f, "%s", "\xEF\xBB\xBF");
+ if (fprintf(f, "%s", "\xEF\xBB\xBF") < 0) return NULL;
if (ipFlag->verbose > 1) {
- fprintf(stderr, "%s: ", progname);
- fprintf(stderr, _("Writing %s BOM.\n"), _("UTF-8"));
+ d2u_fprintf(stderr, "%s: ", progname);
+ d2u_ansi_fprintf(stderr, _("Writing %s BOM.\n"), _("UTF-8"));
}
;
}
@@ -611,18 +1081,18 @@ FILE *write_bom (FILE *f, CFlag *ipFlag, const char *progname)
if ((bomtype == FILE_GB18030) ||
(((bomtype == FILE_UTF16LE)||(bomtype == FILE_UTF16BE))&&(ipFlag->locale_target == TARGET_GB18030))
) {
- fprintf(f, "%s", "\x84\x31\x95\x33"); /* GB18030 */
+ if (fprintf(f, "%s", "\x84\x31\x95\x33") < 0) return NULL; /* GB18030 */
if (ipFlag->verbose > 1)
{
- fprintf(stderr, "%s: ", progname);
- fprintf(stderr, _("Writing %s BOM.\n"), _("GB18030"));
+ d2u_fprintf(stderr, "%s: ", progname);
+ d2u_ansi_fprintf(stderr, _("Writing %s BOM.\n"), _("GB18030"));
}
} else {
- fprintf(f, "%s", "\xEF\xBB\xBF"); /* UTF-8 */
+ if (fprintf(f, "%s", "\xEF\xBB\xBF") < 0) return NULL; /* UTF-8 */
if (ipFlag->verbose > 1)
{
- fprintf(stderr, "%s: ", progname);
- fprintf(stderr, _("Writing %s BOM.\n"), _("UTF-8"));
+ d2u_fprintf(stderr, "%s: ", progname);
+ d2u_ansi_fprintf(stderr, _("Writing %s BOM.\n"), _("UTF-8"));
}
}
}
@@ -631,26 +1101,45 @@ FILE *write_bom (FILE *f, CFlag *ipFlag, const char *progname)
void print_bom (const int bomtype, const char *filename, const char *progname)
{
+ char informat[64];
+# ifdef D2U_UNIFILE
+ wchar_t informatw[64];
+#endif
+
switch (bomtype) {
case FILE_UTF16LE: /* UTF-16 Little Endian */
- fprintf(stderr, "%s: ", progname);
- fprintf(stderr, _("Input file %s has %s BOM.\n"), filename, _("UTF-16LE"));
+ strncpy(informat,_("UTF-16LE"),sizeof(informat));
break;
case FILE_UTF16BE: /* UTF-16 Big Endian */
- fprintf(stderr, "%s: ", progname);
- fprintf(stderr, _("Input file %s has %s BOM.\n"), filename, _("UTF-16BE"));
+ strncpy(informat,_("UTF-16BE"),sizeof(informat));
break;
case FILE_UTF8: /* UTF-8 */
- fprintf(stderr, "%s: ", progname);
- fprintf(stderr, _("Input file %s has %s BOM.\n"), filename, _("UTF-8"));
+ strncpy(informat,_("UTF-8"),sizeof(informat));
break;
case FILE_GB18030: /* GB18030 */
- fprintf(stderr, "%s: ", progname);
- fprintf(stderr, _("Input file %s has %s BOM.\n"), filename, _("GB18030"));
+ strncpy(informat,_("GB18030"),sizeof(informat));
break;
default:
;
}
+
+ if (bomtype > 0) {
+ informat[sizeof(informat)-1] = '\0';
+
+/* Change informat to UTF-8 for d2u_fprintf. */
+# ifdef D2U_UNIFILE
+ /* The format string is encoded in the system default
+ * Windows ANSI code page. May have been translated
+ * by gettext. Convert it to wide characters. */
+ d2u_MultiByteToWideChar(CP_ACP,0, informat, -1, informatw, sizeof(informat));
+ /* then convert the format string to UTF-8 */
+ d2u_WideCharToMultiByte(CP_UTF8, 0, informatw, -1, informat, sizeof(informat), NULL, NULL);
+#endif
+
+ d2u_fprintf(stderr, "%s: ", progname);
+ d2u_fprintf(stderr, _("Input file %s has %s BOM.\n"), filename, informat);
+ }
+
}
void print_bom_info (const int bomtype)
@@ -659,19 +1148,19 @@ void print_bom_info (const int bomtype)
that process the output may not work in other than English locales. */
switch (bomtype) {
case FILE_UTF16LE: /* UTF-16 Little Endian */
- printf(" UTF-16LE");
+ d2u_fprintf(stdout, " UTF-16LE");
break;
case FILE_UTF16BE: /* UTF-16 Big Endian */
- printf(" UTF-16BE");
+ d2u_fprintf(stdout, " UTF-16BE");
break;
case FILE_UTF8: /* UTF-8 */
- printf(" UTF-8 ");
+ d2u_fprintf(stdout, " UTF-8 ");
break;
case FILE_GB18030: /* GB18030 */
- printf(" GB18030 ");
+ d2u_fprintf(stdout, " GB18030 ");
break;
default:
- printf(" no_bom ");
+ d2u_fprintf(stdout, " no_bom ");
;
}
}
@@ -683,16 +1172,19 @@ int check_unicode_info(FILE *InF, CFlag *ipFlag, const char *progname, int *bomt
#ifdef D2U_UNICODE
if (ipFlag->verbose > 1) {
if (ipFlag->ConvMode == CONVMODE_UTF16LE) {
- fprintf(stderr, "%s: ", progname);
- fprintf(stderr, _("Assuming UTF-16LE encoding.\n") );
+ d2u_fprintf(stderr, "%s: ", progname);
+ d2u_fprintf(stderr, _("Assuming UTF-16LE encoding.\n") );
}
if (ipFlag->ConvMode == CONVMODE_UTF16BE) {
- fprintf(stderr, "%s: ", progname);
- fprintf(stderr, _("Assuming UTF-16BE encoding.\n") );
+ d2u_fprintf(stderr, "%s: ", progname);
+ d2u_fprintf(stderr, _("Assuming UTF-16BE encoding.\n") );
}
}
#endif
- InF = read_bom(InF, &ipFlag->bomtype);
+ if ((InF = read_bom(InF, &ipFlag->bomtype)) == NULL) {
+ d2u_getc_error(ipFlag,progname);
+ return -1;
+ }
*bomtype_orig = ipFlag->bomtype;
#ifdef D2U_UNICODE
if ((ipFlag->bomtype == FILE_MBS) && (ipFlag->ConvMode == CONVMODE_UTF16LE))
@@ -723,16 +1215,19 @@ int check_unicode(FILE *InF, FILE *TempF, CFlag *ipFlag, const char *ipInFN, co
#ifdef D2U_UNICODE
if (ipFlag->verbose > 1) {
if (ipFlag->ConvMode == CONVMODE_UTF16LE) {
- fprintf(stderr, "%s: ", progname);
- fprintf(stderr, _("Assuming UTF-16LE encoding.\n") );
+ d2u_fprintf(stderr, "%s: ", progname);
+ d2u_fprintf(stderr, _("Assuming UTF-16LE encoding.\n") );
}
if (ipFlag->ConvMode == CONVMODE_UTF16BE) {
- fprintf(stderr, "%s: ", progname);
- fprintf(stderr, _("Assuming UTF-16BE encoding.\n") );
+ d2u_fprintf(stderr, "%s: ", progname);
+ d2u_fprintf(stderr, _("Assuming UTF-16BE encoding.\n") );
}
}
#endif
- InF = read_bom(InF, &ipFlag->bomtype);
+ if ((InF = read_bom(InF, &ipFlag->bomtype)) == NULL) {
+ d2u_getc_error(ipFlag,progname);
+ return -1;
+ }
if (ipFlag->verbose > 1)
print_bom(ipFlag->bomtype, ipInFN, progname);
#ifdef D2U_UNICODE
@@ -752,15 +1247,15 @@ int check_unicode(FILE *InF, FILE *TempF, CFlag *ipFlag, const char *ipInFN, co
}
}
#endif
-#endif
#if !defined(__MSDOS__) && !defined(_WIN32) && !defined(__OS2__) /* Unix, Cygwin */
if (strcmp(nl_langinfo(CODESET), "GB18030") == 0)
ipFlag->locale_target = TARGET_GB18030;
#endif
+#endif
if ((!RetVal) && ((ipFlag->add_bom) || ((ipFlag->keep_bom) && (ipFlag->bomtype > 0))))
- write_bom(TempF, ipFlag, progname);
+ if (write_bom(TempF, ipFlag, progname) == NULL) return -1;
return RetVal;
}
@@ -781,7 +1276,12 @@ int ConvertNewFile(char *ipInFN, char *ipOutFN, CFlag *ipFlag, const char *progn
FILE *TempF = NULL;
char *TempPath;
char *errstr;
+#ifdef D2U_UNIFILE
+ struct _stat StatBuf;
+ wchar_t pathw[D2U_MAX_PATH];
+#else
struct stat StatBuf;
+#endif
struct utimbuf UTimeBuf;
#ifndef NO_CHMOD
mode_t mask;
@@ -826,11 +1326,17 @@ int ConvertNewFile(char *ipInFN, char *ipOutFN, CFlag *ipFlag, const char *progn
}
/* retrieve ipInFN file date stamp */
+#ifdef D2U_UNIFILE
+ d2u_MultiByteToWideChar(CP_UTF8, 0, ipInFN, -1, pathw, D2U_MAX_PATH);
+ if (_wstat(pathw, &StatBuf)) {
+#else
if (stat(ipInFN, &StatBuf)) {
+#endif
if (ipFlag->verbose) {
ipFlag->error = errno;
errstr = strerror(errno);
- fprintf(stderr, "%s: %s: %s\n", progname, ipInFN, errstr);
+ d2u_fprintf(stderr, "%s: %s:", progname, ipInFN);
+ d2u_ansi_fprintf(stderr, " %s\n", errstr);
}
RetVal = -1;
}
@@ -843,15 +1349,15 @@ int ConvertNewFile(char *ipInFN, char *ipOutFN, CFlag *ipFlag, const char *progn
if (ipFlag->verbose) {
ipFlag->error = errno;
errstr = strerror(errno);
- fprintf(stderr, "%s: ", progname);
- fprintf(stderr, _("Failed to open temporary output file: %s\n"), errstr);
+ d2u_fprintf(stderr, "%s: ", progname);
+ d2u_ansi_fprintf(stderr, _("Failed to open temporary output file: %s\n"), errstr);
}
RetVal = -1;
}
#if DEBUG
- fprintf(stderr, "%s: ", progname);
- fprintf(stderr, _("using %s as temporary file\n"), TempPath);
+ d2u_fprintf(stderr, "%s: ", progname);
+ d2u_fprintf(stderr, _("using %s as temporary file\n"), TempPath);
#endif
/* can open in file? */
@@ -860,7 +1366,8 @@ int ConvertNewFile(char *ipInFN, char *ipOutFN, CFlag *ipFlag, const char *progn
if (InF == NULL) {
ipFlag->error = errno;
errstr = strerror(errno);
- fprintf(stderr, "%s: %s: %s\n", progname, ipInFN, errstr);
+ d2u_fprintf(stderr, "%s: %s:", progname, ipInFN);
+ d2u_ansi_fprintf(stderr, " %s\n", errstr);
RetVal = -1;
}
}
@@ -873,7 +1380,8 @@ int ConvertNewFile(char *ipInFN, char *ipOutFN, CFlag *ipFlag, const char *progn
if ((TempF=OpenOutFile(fd)) == NULL) {
ipFlag->error = errno;
errstr = strerror(errno);
- fprintf(stderr, "%s: %s\n", progname, errstr);
+ d2u_fprintf(stderr, "%s:", progname);
+ d2u_ansi_fprintf(stderr, " %s\n", errstr);
#endif
fclose (InF);
InF = NULL;
@@ -913,8 +1421,9 @@ int ConvertNewFile(char *ipInFN, char *ipOutFN, CFlag *ipFlag, const char *progn
if (ipFlag->verbose) {
ipFlag->error = errno;
errstr = strerror(errno);
- fprintf(stderr, "%s: ", progname);
- fprintf(stderr, _("Failed to write to temporary output file %s: %s\n"), TempPath, errstr);
+ d2u_fprintf(stderr, "%s: ", progname);
+ d2u_fprintf(stderr, _("Failed to write to temporary output file %s:"), TempPath);
+ d2u_ansi_fprintf(stderr, " %s\n", errstr);
}
RetVal = -1;
}
@@ -943,8 +1452,9 @@ int ConvertNewFile(char *ipInFN, char *ipOutFN, CFlag *ipFlag, const char *progn
if (ipFlag->verbose) {
ipFlag->error = errno;
errstr = strerror(errno);
- fprintf(stderr, "%s: ", progname);
- fprintf(stderr, _("Failed to change the permissions of temporary output file %s: %s\n"), TempPath, errstr);
+ d2u_fprintf(stderr, "%s: ", progname);
+ d2u_fprintf(stderr, _("Failed to change the permissions of temporary output file %s:"), TempPath);
+ d2u_ansi_fprintf(stderr, " %s\n", errstr);
}
}
}
@@ -959,8 +1469,9 @@ int ConvertNewFile(char *ipInFN, char *ipOutFN, CFlag *ipFlag, const char *progn
if (ipFlag->verbose) {
ipFlag->error = errno;
errstr = strerror(errno);
- fprintf(stderr, "%s: ", progname);
- fprintf(stderr, _("Failed to change the owner and group of temporary output file %s: %s\n"), TempPath, errstr);
+ d2u_fprintf(stderr, "%s: ", progname);
+ d2u_fprintf(stderr, _("Failed to change the owner and group of temporary output file %s:"), TempPath);
+ d2u_ansi_fprintf(stderr, " %s\n", errstr);
}
RetVal = -1;
}
@@ -976,7 +1487,8 @@ int ConvertNewFile(char *ipInFN, char *ipOutFN, CFlag *ipFlag, const char *progn
if (ipFlag->verbose) {
ipFlag->error = errno;
errstr = strerror(errno);
- fprintf(stderr, "%s: %s: %s\n", progname, TempPath, errstr);
+ d2u_fprintf(stderr, "%s: %s:", progname, TempPath);
+ d2u_ansi_fprintf(stderr, " %s\n", errstr);
}
RetVal = -1;
}
@@ -984,11 +1496,12 @@ int ConvertNewFile(char *ipInFN, char *ipOutFN, CFlag *ipFlag, const char *progn
/* any error? cleanup the temp file */
if (RetVal && (TempPath != NULL)) {
- if (unlink(TempPath) && (errno != ENOENT)) {
+ if (d2u_unlink(TempPath) && (errno != ENOENT)) {
if (ipFlag->verbose) {
ipFlag->error = errno;
errstr = strerror(errno);
- fprintf(stderr, "%s: %s: %s\n", progname, TempPath, errstr);
+ d2u_fprintf(stderr, "%s: %s:", progname, TempPath);
+ d2u_ansi_fprintf(stderr, " %s\n", errstr);
}
RetVal = -1;
}
@@ -1003,9 +1516,9 @@ int ConvertNewFile(char *ipInFN, char *ipOutFN, CFlag *ipFlag, const char *progn
ResolveSymlinkResult = ResolveSymbolicLink(ipOutFN, &TargetFN, ipFlag, progname);
if (ResolveSymlinkResult < 0) {
if (ipFlag->verbose) {
- fprintf(stderr, "%s: ", progname);
- fprintf(stderr, _("problems resolving symbolic link '%s'\n"), ipOutFN);
- fprintf(stderr, _(" output file remains in '%s'\n"), TempPath);
+ d2u_fprintf(stderr, "%s: ", progname);
+ d2u_fprintf(stderr, _("problems resolving symbolic link '%s'\n"), ipOutFN);
+ d2u_fprintf(stderr, _(" output file remains in '%s'\n"), TempPath);
}
RetVal = -1;
}
@@ -1015,26 +1528,29 @@ int ConvertNewFile(char *ipInFN, char *ipOutFN, CFlag *ipFlag, const char *progn
/* can rename temporary file to output file? */
if (!RetVal) {
#ifdef NEED_REMOVE
- if (unlink(TargetFN) && (errno != ENOENT)) {
+ if (d2u_unlink(TargetFN) && (errno != ENOENT)) {
if (ipFlag->verbose) {
ipFlag->error = errno;
errstr = strerror(errno);
- fprintf(stderr, "%s: %s: %s\n", progname, TargetFN, errstr);
+ d2u_fprintf(stderr, "%s: %s:", progname, TargetFN);
+ d2u_ansi_fprintf(stderr, " %s\n", errstr);
}
RetVal = -1;
}
#endif
- if (rename(TempPath, TargetFN) == -1) {
+
+ if (d2u_rename(TempPath, TargetFN) != 0) {
if (ipFlag->verbose) {
ipFlag->error = errno;
errstr = strerror(errno);
- fprintf(stderr, "%s: ", progname);
- fprintf(stderr, _("problems renaming '%s' to '%s': %s\n"), TempPath, TargetFN, errstr);
+ d2u_fprintf(stderr, "%s: ", progname);
+ d2u_fprintf(stderr, _("problems renaming '%s' to '%s':"), TempPath, TargetFN);
+ d2u_ansi_fprintf(stderr, " %s\n", errstr);
#ifdef S_ISLNK
if (ResolveSymlinkResult > 0)
- fprintf(stderr, _(" which is the target of symbolic link '%s'\n"), ipOutFN);
+ d2u_fprintf(stderr, _(" which is the target of symbolic link '%s'\n"), ipOutFN);
#endif
- fprintf(stderr, _(" output file remains in '%s'\n"), TempPath);
+ d2u_fprintf(stderr, _(" output file remains in '%s'\n"), TempPath);
}
RetVal = -1;
}
@@ -1094,17 +1610,17 @@ int ConvertStdio(CFlag *ipFlag, const char *progname,
void print_messages_stdio(const CFlag *pFlag, const char *progname)
{
if (pFlag->status & BINARY_FILE) {
- fprintf(stderr,"%s: ",progname);
- fprintf(stderr, _("Skipping binary file %s\n"), "stdin");
+ d2u_fprintf(stderr,"%s: ",progname);
+ d2u_fprintf(stderr, _("Skipping binary file %s\n"), "stdin");
} else if (pFlag->status & WRONG_CODEPAGE) {
- fprintf(stderr,"%s: ",progname);
- fprintf(stderr, _("code page %d is not supported.\n"), pFlag->ConvMode);
+ d2u_fprintf(stderr,"%s: ",progname);
+ d2u_fprintf(stderr, _("code page %d is not supported.\n"), pFlag->ConvMode);
} else if (pFlag->status & WCHAR_T_TOO_SMALL) {
- fprintf(stderr,"%s: ",progname);
- fprintf(stderr, _("Skipping UTF-16 file %s, the size of wchar_t is %d bytes.\n"), "stdin", (int)sizeof(wchar_t));
+ d2u_fprintf(stderr,"%s: ",progname);
+ d2u_fprintf(stderr, _("Skipping UTF-16 file %s, the size of wchar_t is %d bytes.\n"), "stdin", (int)sizeof(wchar_t));
} else if (pFlag->status & UNICODE_CONVERSION_ERROR) {
- fprintf(stderr,"%s: ",progname);
- fprintf(stderr, _("Skipping UTF-16 file %s, an UTF-16 conversion error occurred.\n"), "stdin");
+ d2u_fprintf(stderr,"%s: ",progname);
+ d2u_fprintf(stderr, _("Skipping UTF-16 file %s, an UTF-16 conversion error occurred on line %u.\n"), "stdin", pFlag->line_nr);
}
}
@@ -1146,65 +1662,83 @@ void print_format(const CFlag *pFlag, char *informat, char *outformat, size_t li
void print_messages_newfile(const CFlag *pFlag, const char *infile, const char *outfile, const char *progname, const int RetVal)
{
- char informat[10];
- char outformat[32];
+ char informat[32];
+ char outformat[64];
+# ifdef D2U_UNIFILE
+ wchar_t informatw[32];
+ wchar_t outformatw[64];
+#endif
print_format(pFlag, informat, outformat, sizeof(informat), sizeof(outformat));
+/* Change informat and outformat to UTF-8 for d2u_fprintf. */
+# ifdef D2U_UNIFILE
+ /* The format string is encoded in the system default
+ * Windows ANSI code page. May have been translated
+ * by gettext. Convert it to wide characters. */
+ d2u_MultiByteToWideChar(CP_ACP,0, informat, -1, informatw, sizeof(informat));
+ d2u_MultiByteToWideChar(CP_ACP,0, outformat, -1, outformatw, sizeof(outformat));
+ /* then convert the format string to UTF-8 */
+ d2u_WideCharToMultiByte(CP_UTF8, 0, informatw, -1, informat, sizeof(informat), NULL, NULL);
+ d2u_WideCharToMultiByte(CP_UTF8, 0, outformatw, -1, outformat, sizeof(outformat), NULL, NULL);
+#endif
+
if (pFlag->status & NO_REGFILE) {
- fprintf(stderr,"%s: ",progname);
- fprintf(stderr, _("Skipping %s, not a regular file.\n"), infile);
+ d2u_fprintf(stderr,"%s: ",progname);
+ d2u_fprintf(stderr, _("Skipping %s, not a regular file.\n"), infile);
} else if (pFlag->status & OUTPUTFILE_SYMLINK) {
- fprintf(stderr,"%s: ",progname);
- fprintf(stderr, _("Skipping %s, output file %s is a symbolic link.\n"), infile, outfile);
+ d2u_fprintf(stderr,"%s: ",progname);
+ d2u_fprintf(stderr, _("Skipping %s, output file %s is a symbolic link.\n"), infile, outfile);
} else if (pFlag->status & INPUT_TARGET_NO_REGFILE) {
- fprintf(stderr,"%s: ",progname);
- fprintf(stderr, _("Skipping symbolic link %s, target is not a regular file.\n"), infile);
+ d2u_fprintf(stderr,"%s: ",progname);
+ d2u_fprintf(stderr, _("Skipping symbolic link %s, target is not a regular file.\n"), infile);
} else if (pFlag->status & OUTPUT_TARGET_NO_REGFILE) {
- fprintf(stderr,"%s: ",progname);
- fprintf(stderr, _("Skipping %s, target of symbolic link %s is not a regular file.\n"), infile, outfile);
+ d2u_fprintf(stderr,"%s: ",progname);
+ d2u_fprintf(stderr, _("Skipping %s, target of symbolic link %s is not a regular file.\n"), infile, outfile);
} else if (pFlag->status & BINARY_FILE) {
- fprintf(stderr,"%s: ",progname);
- fprintf(stderr, _("Skipping binary file %s\n"), infile);
+ d2u_fprintf(stderr,"%s: ",progname);
+ d2u_fprintf(stderr, _("Skipping binary file %s\n"), infile);
} else if (pFlag->status & WRONG_CODEPAGE) {
- fprintf(stderr,"%s: ",progname);
- fprintf(stderr, _("code page %d is not supported.\n"), pFlag->ConvMode);
+ d2u_fprintf(stderr,"%s: ",progname);
+ d2u_fprintf(stderr, _("code page %d is not supported.\n"), pFlag->ConvMode);
} else if (pFlag->status & WCHAR_T_TOO_SMALL) {
- fprintf(stderr,"%s: ",progname);
- fprintf(stderr, _("Skipping UTF-16 file %s, the size of wchar_t is %d bytes.\n"), infile, (int)sizeof(wchar_t));
+ d2u_fprintf(stderr,"%s: ",progname);
+ d2u_fprintf(stderr, _("Skipping UTF-16 file %s, the size of wchar_t is %d bytes.\n"), infile, (int)sizeof(wchar_t));
} else if (pFlag->status & UNICODE_CONVERSION_ERROR) {
- fprintf(stderr,"%s: ",progname);
- fprintf(stderr, _("Skipping UTF-16 file %s, an UTF-16 conversion error occurred.\n"), infile);
+ d2u_fprintf(stderr,"%s: ",progname);
+ d2u_fprintf(stderr, _("Skipping UTF-16 file %s, an UTF-16 conversion error occurred on line %u.\n"), infile, pFlag->line_nr);
} else {
- fprintf(stderr,"%s: ",progname);
+ d2u_fprintf(stderr,"%s: ",progname);
if (informat[0] == '\0') {
- if (is_dos2unix(progname))
- fprintf(stderr, _("converting file %s to file %s in Unix format...\n"), infile, outfile);
- else {
- if (pFlag->FromToMode == FROMTO_UNIX2MAC)
- fprintf(stderr, _("converting file %s to file %s in Mac format...\n"), infile, outfile);
- else
- fprintf(stderr, _("converting file %s to file %s in DOS format...\n"), infile, outfile);
+ if (is_dos2unix(progname)) {
+ if (!RetVal) d2u_fprintf(stderr, _("converting file %s to file %s in Unix format...\n"), infile, outfile);
+ } else {
+ if (pFlag->FromToMode == FROMTO_UNIX2MAC) {
+ if (!RetVal) d2u_fprintf(stderr, _("converting file %s to file %s in Mac format...\n"), infile, outfile);
+ } else {
+ if (!RetVal) d2u_fprintf(stderr, _("converting file %s to file %s in DOS format...\n"), infile, outfile);
+ }
}
} else {
- if (is_dos2unix(progname))
+ if (is_dos2unix(progname)) {
/* TRANSLATORS:
1st %s is encoding of input file.
2nd %s is name of input file.
3rd %s is encoding of output file.
4th %s is name of output file.
E.g.: converting UTF-16LE file in.txt to UTF-8 file out.txt in Unix format... */
- fprintf(stderr, _("converting %s file %s to %s file %s in Unix format...\n"), informat, infile, outformat, outfile);
- else {
- if (pFlag->FromToMode == FROMTO_UNIX2MAC)
- fprintf(stderr, _("converting %s file %s to %s file %s in Mac format...\n"), informat, infile, outformat, outfile);
- else
- fprintf(stderr, _("converting %s file %s to %s file %s in DOS format...\n"), informat, infile, outformat, outfile);
+ if (!RetVal) d2u_fprintf(stderr, _("converting %s file %s to %s file %s in Unix format...\n"), informat, infile, outformat, outfile);
+ } else {
+ if (pFlag->FromToMode == FROMTO_UNIX2MAC) {
+ if (!RetVal) d2u_fprintf(stderr, _("converting %s file %s to %s file %s in Mac format...\n"), informat, infile, outformat, outfile);
+ } else {
+ if (!RetVal) d2u_fprintf(stderr, _("converting %s file %s to %s file %s in DOS format...\n"), informat, infile, outformat, outfile);
+ }
}
}
if (RetVal) {
- fprintf(stderr,"%s: ",progname);
- fprintf(stderr, _("problems converting file %s to file %s\n"), infile, outfile);
+ d2u_fprintf(stderr,"%s: ",progname);
+ d2u_fprintf(stderr, _("problems converting file %s to file %s\n"), infile, outfile);
}
}
}
@@ -1213,59 +1747,77 @@ void print_messages_oldfile(const CFlag *pFlag, const char *infile, const char *
{
char informat[10];
char outformat[32];
+# ifdef D2U_UNIFILE
+ wchar_t informatw[32];
+ wchar_t outformatw[64];
+#endif
print_format(pFlag, informat, outformat, sizeof(informat), sizeof(outformat));
+/* Change informat and outformat to UTF-8 for d2u_fprintf. */
+# ifdef D2U_UNIFILE
+ /* The format string is encoded in the system default
+ * Windows ANSI code page. May have been translated
+ * by gettext. Convert it to wide characters. */
+ d2u_MultiByteToWideChar(CP_ACP,0, informat, -1, informatw, sizeof(informat));
+ d2u_MultiByteToWideChar(CP_ACP,0, outformat, -1, outformatw, sizeof(outformat));
+ /* then convert the format string to UTF-8 */
+ d2u_WideCharToMultiByte(CP_UTF8, 0, informatw, -1, informat, sizeof(informat), NULL, NULL);
+ d2u_WideCharToMultiByte(CP_UTF8, 0, outformatw, -1, outformat, sizeof(outformat), NULL, NULL);
+#endif
+
if (pFlag->status & NO_REGFILE) {
- fprintf(stderr,"%s: ",progname);
- fprintf(stderr, _("Skipping %s, not a regular file.\n"), infile);
+ d2u_fprintf(stderr,"%s: ",progname);
+ d2u_fprintf(stderr, _("Skipping %s, not a regular file.\n"), infile);
} else if (pFlag->status & OUTPUTFILE_SYMLINK) {
- fprintf(stderr,"%s: ",progname);
- fprintf(stderr, _("Skipping symbolic link %s.\n"), infile);
+ d2u_fprintf(stderr,"%s: ",progname);
+ d2u_fprintf(stderr, _("Skipping symbolic link %s.\n"), infile);
} else if (pFlag->status & INPUT_TARGET_NO_REGFILE) {
- fprintf(stderr,"%s: ",progname);
- fprintf(stderr, _("Skipping symbolic link %s, target is not a regular file.\n"), infile);
+ d2u_fprintf(stderr,"%s: ",progname);
+ d2u_fprintf(stderr, _("Skipping symbolic link %s, target is not a regular file.\n"), infile);
} else if (pFlag->status & BINARY_FILE) {
- fprintf(stderr,"%s: ",progname);
- fprintf(stderr, _("Skipping binary file %s\n"), infile);
+ d2u_fprintf(stderr,"%s: ",progname);
+ d2u_fprintf(stderr, _("Skipping binary file %s\n"), infile);
} else if (pFlag->status & WRONG_CODEPAGE) {
- fprintf(stderr,"%s: ",progname);
- fprintf(stderr, _("code page %d is not supported.\n"), pFlag->ConvMode);
+ d2u_fprintf(stderr,"%s: ",progname);
+ d2u_fprintf(stderr, _("code page %d is not supported.\n"), pFlag->ConvMode);
} else if (pFlag->status & WCHAR_T_TOO_SMALL) {
- fprintf(stderr,"%s: ",progname);
- fprintf(stderr, _("Skipping UTF-16 file %s, the size of wchar_t is %d bytes.\n"), infile, (int)sizeof(wchar_t));
+ d2u_fprintf(stderr,"%s: ",progname);
+ d2u_fprintf(stderr, _("Skipping UTF-16 file %s, the size of wchar_t is %d bytes.\n"), infile, (int)sizeof(wchar_t));
} else if (pFlag->status & UNICODE_CONVERSION_ERROR) {
- fprintf(stderr,"%s: ",progname);
- fprintf(stderr, _("Skipping UTF-16 file %s, an UTF-16 conversion error occurred.\n"), infile);
+ d2u_fprintf(stderr,"%s: ",progname);
+ d2u_fprintf(stderr, _("Skipping UTF-16 file %s, an UTF-16 conversion error occurred on line %u.\n"), infile, pFlag->line_nr);
} else {
- fprintf(stderr,"%s: ",progname);
+ d2u_fprintf(stderr,"%s: ",progname);
if (informat[0] == '\0') {
- if (is_dos2unix(progname))
- fprintf(stderr, _("converting file %s to Unix format...\n"), infile);
- else {
- if (pFlag->FromToMode == FROMTO_UNIX2MAC)
- fprintf(stderr, _("converting file %s to Mac format...\n"), infile);
- else
- fprintf(stderr, _("converting file %s to DOS format...\n"), infile);
+ if (is_dos2unix(progname)) {
+ if (!RetVal) d2u_fprintf(stderr, _("converting file %s to Unix format...\n"), infile);
+ } else {
+ if (pFlag->FromToMode == FROMTO_UNIX2MAC) {
+ if (!RetVal) d2u_fprintf(stderr, _("converting file %s to Mac format...\n"), infile);
+ } else {
+ if (!RetVal) d2u_fprintf(stderr, _("converting file %s to DOS format...\n"), infile);
+ }
}
} else {
- if (is_dos2unix(progname))
+ if (is_dos2unix(progname)) {
/* TRANSLATORS:
1st %s is encoding of input file.
2nd %s is name of input file.
3rd %s is encoding of output (input file is overwritten).
E.g.: converting UTF-16LE file foo.txt to UTF-8 Unix format... */
- fprintf(stderr, _("converting %s file %s to %s Unix format...\n"), informat, infile, outformat);
- else {
- if (pFlag->FromToMode == FROMTO_UNIX2MAC)
- fprintf(stderr, _("converting %s file %s to %s Mac format...\n"), informat, infile, outformat);
- else
- fprintf(stderr, _("converting %s file %s to %s DOS format...\n"), informat, infile, outformat);
+ if (!RetVal) d2u_fprintf(stderr, _("converting %s file %s to %s Unix format...\n"), informat, infile, outformat);
+ } else {
+ if (pFlag->FromToMode == FROMTO_UNIX2MAC) {
+ if (!RetVal) d2u_fprintf(stderr, _("converting %s file %s to %s Mac format...\n"), informat, infile, outformat);
+ } else {
+ if (!RetVal) d2u_fprintf(stderr, _("converting %s file %s to %s DOS format...\n"), informat, infile, outformat);
+ }
}
}
if (RetVal) {
- fprintf(stderr,"%s: ",progname);
- fprintf(stderr, _("problems converting file %s\n"), infile);
+ d2u_fprintf(stderr,"%s: ",progname);
+ d2u_fprintf(stderr, _("problems converting file %s\n"), infile);
}
}
}
@@ -1274,30 +1826,31 @@ void print_messages_info(const CFlag *pFlag, const char *infile, const char *pro
{
if (pFlag->status & NO_REGFILE) {
if (pFlag->verbose) {
- fprintf(stderr,"%s: ",progname);
- fprintf(stderr, _("Skipping %s, not a regular file.\n"), infile);
+ d2u_fprintf(stderr,"%s: ",progname);
+ d2u_fprintf(stderr, _("Skipping %s, not a regular file.\n"), infile);
}
} else if (pFlag->status & INPUT_TARGET_NO_REGFILE) {
if (pFlag->verbose) {
- fprintf(stderr,"%s: ",progname);
- fprintf(stderr, _("Skipping symbolic link %s, target is not a regular file.\n"), infile);
+ d2u_fprintf(stderr,"%s: ",progname);
+ d2u_fprintf(stderr, _("Skipping symbolic link %s, target is not a regular file.\n"), infile);
}
} else if (pFlag->status & WCHAR_T_TOO_SMALL) {
if (pFlag->verbose) {
- fprintf(stderr,"%s: ",progname);
- fprintf(stderr, _("Skipping UTF-16 file %s, the size of wchar_t is %d bytes.\n"), infile, (int)sizeof(wchar_t));
+ d2u_fprintf(stderr,"%s: ",progname);
+ d2u_fprintf(stderr, _("Skipping UTF-16 file %s, the size of wchar_t is %d bytes.\n"), infile, (int)sizeof(wchar_t));
}
}
}
#ifdef D2U_UNICODE
-void FileInfoW(FILE* ipInF, CFlag *ipFlag, const char *filename)
+void FileInfoW(FILE* ipInF, CFlag *ipFlag, const char *filename, int bomtype, const char *progname)
{
wint_t TempChar;
wint_t PreviousChar = 0;
unsigned int lb_dos = 0;
unsigned int lb_unix = 0;
unsigned int lb_mac = 0;
+ char *errstr;
ipFlag->status = 0;
@@ -1322,8 +1875,18 @@ void FileInfoW(FILE* ipInF, CFlag *ipFlag, const char *filename)
continue;
}
PreviousChar = TempChar;
- lb_unix++; /* Unix line end (LF). Put CR */
+ lb_unix++; /* Unix line end (LF). */
+ }
+ }
+ if ((TempChar == WEOF) && ferror(ipInF)) {
+ ipFlag->error = errno;
+ if (ipFlag->verbose) {
+ errstr = strerror(errno);
+ d2u_fprintf(stderr, "%s: ", progname);
+ d2u_fprintf(stderr, _("can not read from input file %s:"), filename);
+ d2u_ansi_fprintf(stderr, " %s\n", errstr);
}
+ return;
}
if (ipFlag->file_info & INFO_CONVERT) {
@@ -1338,30 +1901,32 @@ void FileInfoW(FILE* ipInF, CFlag *ipFlag, const char *filename)
}
if (ipFlag->file_info & INFO_DOS)
- printf(" %6u", lb_dos);
+ d2u_fprintf(stdout, " %6u", lb_dos);
if (ipFlag->file_info & INFO_UNIX)
- printf(" %6u", lb_unix);
+ d2u_fprintf(stdout, " %6u", lb_unix);
if (ipFlag->file_info & INFO_MAC)
- printf(" %6u", lb_mac);
+ d2u_fprintf(stdout, " %6u", lb_mac);
if (ipFlag->file_info & INFO_BOM)
- print_bom_info(ipFlag->bomtype);
+ print_bom_info(bomtype);
if (ipFlag->file_info & INFO_TEXT) {
if (ipFlag->status & BINARY_FILE)
- printf(" binary");
+ d2u_fprintf(stdout, " binary");
else
- printf(" text ");
+ d2u_fprintf(stdout, " text ");
}
- printf(" %s\n",filename);
+ d2u_fprintf(stdout, " %s",filename);
+ d2u_fprintf(stdout, "\n");
}
#endif
-void FileInfo(FILE* ipInF, CFlag *ipFlag, const char *filename)
+void FileInfo(FILE* ipInF, CFlag *ipFlag, const char *filename, int bomtype, const char *progname)
{
int TempChar;
int PreviousChar = 0;
unsigned int lb_dos = 0;
unsigned int lb_unix = 0;
unsigned int lb_mac = 0;
+ char *errstr;
ipFlag->status = 0;
@@ -1387,9 +1952,19 @@ void FileInfo(FILE* ipInF, CFlag *ipFlag, const char *filename)
continue;
}
PreviousChar = TempChar;
- lb_unix++; /* Unix line end (LF). Put CR */
+ lb_unix++; /* Unix line end (LF). */
}
}
+ if ((TempChar == EOF) && ferror(ipInF)) {
+ ipFlag->error = errno;
+ if (ipFlag->verbose) {
+ errstr = strerror(errno);
+ d2u_fprintf(stderr, "%s: ", progname);
+ d2u_fprintf(stderr, _("can not read from input file %s:"), filename);
+ d2u_ansi_fprintf(stderr, " %s\n", errstr);
+ }
+ return;
+ }
if (ipFlag->file_info & INFO_CONVERT) {
@@ -1404,20 +1979,21 @@ void FileInfo(FILE* ipInF, CFlag *ipFlag, const char *filename)
}
if (ipFlag->file_info & INFO_DOS)
- printf(" %6u", lb_dos);
+ d2u_fprintf(stdout, " %6u", lb_dos);
if (ipFlag->file_info & INFO_UNIX)
- printf(" %6u", lb_unix);
+ d2u_fprintf(stdout, " %6u", lb_unix);
if (ipFlag->file_info & INFO_MAC)
- printf(" %6u", lb_mac);
+ d2u_fprintf(stdout, " %6u", lb_mac);
if (ipFlag->file_info & INFO_BOM)
- print_bom_info(ipFlag->bomtype);
+ print_bom_info(bomtype);
if (ipFlag->file_info & INFO_TEXT) {
if (ipFlag->status & BINARY_FILE)
- printf(" binary");
+ d2u_fprintf(stdout, " binary");
else
- printf(" text ");
+ d2u_fprintf(stdout, " text ");
}
- printf(" %s\n",filename);
+ d2u_fprintf(stdout, " %s",filename);
+ d2u_fprintf(stdout, "\n");
}
int GetFileInfo(char *ipInFN, CFlag *ipFlag, const char *progname)
@@ -1425,7 +2001,7 @@ int GetFileInfo(char *ipInFN, CFlag *ipFlag, const char *progname)
int RetVal = 0;
FILE *InF = NULL;
char *errstr;
- int bomtype_orig = FILE_MBS;
+ int bomtype_orig = FILE_MBS; /* messages must print the real bomtype, not the assumed bomtype */
ipFlag->status = 0 ;
@@ -1449,12 +2025,13 @@ int GetFileInfo(char *ipInFN, CFlag *ipFlag, const char *progname)
if (InF == NULL) {
ipFlag->error = errno;
errstr = strerror(errno);
- fprintf(stderr, "%s: %s: %s\n", progname, ipInFN, errstr);
+ d2u_fprintf(stderr, "%s: %s: ", progname, ipInFN);
+ d2u_ansi_fprintf(stderr, "%s\n", errstr);
RetVal = -1;
}
- if (!RetVal)
+ if (!RetVal)
if (check_unicode_info(InF, ipFlag, progname, &bomtype_orig))
RetVal = -1;
@@ -1462,16 +2039,15 @@ int GetFileInfo(char *ipInFN, CFlag *ipFlag, const char *progname)
#ifdef D2U_UNICODE
if (!RetVal) {
if ((ipFlag->bomtype == FILE_UTF16LE) || (ipFlag->bomtype == FILE_UTF16BE)) {
- FileInfoW(InF, ipFlag, ipInFN);
+ FileInfoW(InF, ipFlag, ipInFN, bomtype_orig, progname);
} else {
- FileInfo(InF, ipFlag, ipInFN);
+ FileInfo(InF, ipFlag, ipInFN, bomtype_orig, progname);
}
}
#else
if (!RetVal)
- FileInfo(InF, ipFlag, ipInFN);
+ FileInfo(InF, ipFlag, ipInFN, bomtype_orig, progname);
#endif
- ipFlag->bomtype = bomtype_orig; /* messages must print the real bomtype, not the assumed bomtype */
/* can close in file? */
if ((InF) && (fclose(InF) == EOF))
@@ -1483,7 +2059,7 @@ int GetFileInfo(char *ipInFN, CFlag *ipFlag, const char *progname)
int GetFileInfoStdio(CFlag *ipFlag, const char *progname)
{
int RetVal = 0;
- int bomtype_orig = FILE_MBS;
+ int bomtype_orig = FILE_MBS; /* messages must print the real bomtype, not the assumed bomtype */
ipFlag->status = 0 ;
@@ -1509,16 +2085,15 @@ int GetFileInfoStdio(CFlag *ipFlag, const char *progname)
#ifdef D2U_UNICODE
if (!RetVal) {
if ((ipFlag->bomtype == FILE_UTF16LE) || (ipFlag->bomtype == FILE_UTF16BE)) {
- FileInfoW(stdin, ipFlag, "");
+ FileInfoW(stdin, ipFlag, "", bomtype_orig, progname);
} else {
- FileInfo(stdin, ipFlag, "");
+ FileInfo(stdin, ipFlag, "", bomtype_orig, progname);
}
}
#else
if (!RetVal)
- FileInfo(stdin, ipFlag, "");
+ FileInfo(stdin, ipFlag, "", bomtype_orig, progname);
#endif
- ipFlag->bomtype = bomtype_orig; /* messages must print the real bomtype, not the assumed bomtype */
return RetVal;
}
@@ -1557,8 +2132,8 @@ void get_info_options(char *option, CFlag *pFlag, const char *progname)
default:
/* Terminate the program on a wrong option. If pFlag->file_info is
zero and the program goes on, it may do unwanted conversions. */
- fprintf(stderr,"%s: ",progname);
- fprintf(stderr,_("wrong flag '%c' for option -i or --info\n"), *ptr);
+ d2u_fprintf(stderr,"%s: ",progname);
+ d2u_fprintf(stderr,_("wrong flag '%c' for option -i or --info\n"), *ptr);
exit(1);
;
}
@@ -1566,7 +2141,8 @@ void get_info_options(char *option, CFlag *pFlag, const char *progname)
}
}
-int parse_options(int argc, char *argv[], CFlag *pFlag, const char *localedir, const char *progname,
+int parse_options(int argc, char *argv[],
+ CFlag *pFlag, const char *localedir, const char *progname,
void (*PrintLicense)(void),
int (*Convert)(FILE*, FILE*, CFlag *, const char *)
#ifdef D2U_UNICODE
@@ -1579,6 +2155,9 @@ int parse_options(int argc, char *argv[], CFlag *pFlag, const char *localedir, c
int CanSwitchFileMode = 1;
int process_options = 1;
int RetVal = 0;
+#ifdef D2U_UNIFILE
+ char *ptr;
+#endif
/* variable initialisations */
pFlag->NewFile = 0;
@@ -1597,6 +2176,18 @@ int parse_options(int argc, char *argv[], CFlag *pFlag, const char *localedir, c
pFlag->file_info = 0;
pFlag->locale_target = TARGET_UTF8;
+#ifdef D2U_UNIFILE
+ ptr = getenv("DOS2UNIX_DISPLAY_ENC");
+ if (ptr != NULL) {
+ if (strncmp(ptr, "ansi", sizeof("ansi")) == 0)
+ d2u_display_encoding = D2U_DISPLAY_ANSI;
+ else if (strncmp(ptr, "unicode", sizeof("unicode")) == 0)
+ d2u_display_encoding = D2U_DISPLAY_UNICODE;
+ else if (strncmp(ptr, "utf8", sizeof("utf8")) == 0)
+ d2u_display_encoding = D2U_DISPLAY_UTF8;
+ }
+#endif
+
while ((++ArgIdx < argc) && (!ShouldExit))
{
/* is it an option? */
@@ -1666,8 +2257,8 @@ int parse_options(int argc, char *argv[], CFlag *pFlag, const char *localedir, c
else if (strcmp(argv[ArgIdx],"-iso") == 0) {
pFlag->ConvMode = (int)query_con_codepage();
if (pFlag->verbose) {
- fprintf(stderr,"%s: ",progname);
- fprintf(stderr,_("active code page: %d\n"), pFlag->ConvMode);
+ d2u_fprintf(stderr,"%s: ",progname);
+ d2u_fprintf(stderr,_("active code page: %d\n"), pFlag->ConvMode);
}
if (pFlag->ConvMode < 2)
pFlag->ConvMode = CONVMODE_437;
@@ -1703,8 +2294,8 @@ int parse_options(int argc, char *argv[], CFlag *pFlag, const char *localedir, c
else if (strcmpi(argv[ArgIdx], "iso") == 0) {
pFlag->ConvMode = (int)query_con_codepage();
if (pFlag->verbose) {
- fprintf(stderr,"%s: ",progname);
- fprintf(stderr,_("active code page: %d\n"), pFlag->ConvMode);
+ d2u_fprintf(stderr,"%s: ",progname);
+ d2u_fprintf(stderr,_("active code page: %d\n"), pFlag->ConvMode);
}
if (pFlag->ConvMode < 2)
pFlag->ConvMode = CONVMODE_437;
@@ -1715,27 +2306,54 @@ int parse_options(int argc, char *argv[], CFlag *pFlag, const char *localedir, c
else
pFlag->FromToMode = FROMTO_UNIX2MAC;
} else {
- fprintf(stderr,"%s: ",progname);
- fprintf(stderr, _("invalid %s conversion mode specified\n"),argv[ArgIdx]);
+ d2u_fprintf(stderr,"%s: ",progname);
+ d2u_fprintf(stderr, _("invalid %s conversion mode specified\n"),argv[ArgIdx]);
+ pFlag->error = 1;
+ ShouldExit = 1;
+ pFlag->stdio_mode = 0;
+ }
+ } else {
+ ArgIdx--;
+ d2u_fprintf(stderr,"%s: ",progname);
+ d2u_fprintf(stderr,_("option '%s' requires an argument\n"),argv[ArgIdx]);
+ pFlag->error = 1;
+ ShouldExit = 1;
+ pFlag->stdio_mode = 0;
+ }
+ }
+
+#ifdef D2U_UNIFILE
+ else if ((strcmp(argv[ArgIdx],"-D") == 0) || (strcmp(argv[ArgIdx],"--display-enc") == 0)) {
+ if (++ArgIdx < argc) {
+ if (strcmpi(argv[ArgIdx],"ansi") == 0)
+ d2u_display_encoding = D2U_DISPLAY_ANSI;
+ else if (strcmpi(argv[ArgIdx], "unicode") == 0)
+ d2u_display_encoding = D2U_DISPLAY_UNICODE;
+ else if (strcmpi(argv[ArgIdx], "utf8") == 0) {
+ d2u_display_encoding = D2U_DISPLAY_UTF8;
+ } else {
+ d2u_fprintf(stderr,"%s: ",progname);
+ d2u_fprintf(stderr, _("invalid %s display encoding specified\n"),argv[ArgIdx]);
pFlag->error = 1;
ShouldExit = 1;
pFlag->stdio_mode = 0;
}
} else {
ArgIdx--;
- fprintf(stderr,"%s: ",progname);
- fprintf(stderr,_("option '%s' requires an argument\n"),argv[ArgIdx]);
+ d2u_fprintf(stderr,"%s: ",progname);
+ d2u_fprintf(stderr,_("option '%s' requires an argument\n"),argv[ArgIdx]);
pFlag->error = 1;
ShouldExit = 1;
pFlag->stdio_mode = 0;
}
}
+#endif
else if ((strcmp(argv[ArgIdx],"-o") == 0) || (strcmp(argv[ArgIdx],"--oldfile") == 0)) {
/* last convert not paired */
if (!CanSwitchFileMode) {
- fprintf(stderr,"%s: ",progname);
- fprintf(stderr, _("target of file %s not specified in new-file mode\n"), argv[ArgIdx-1]);
+ d2u_fprintf(stderr,"%s: ",progname);
+ d2u_fprintf(stderr, _("target of file %s not specified in new-file mode\n"), argv[ArgIdx-1]);
pFlag->error = 1;
ShouldExit = 1;
pFlag->stdio_mode = 0;
@@ -1747,8 +2365,8 @@ int parse_options(int argc, char *argv[], CFlag *pFlag, const char *localedir, c
else if ((strcmp(argv[ArgIdx],"-n") == 0) || (strcmp(argv[ArgIdx],"--newfile") == 0)) {
/* last convert not paired */
if (!CanSwitchFileMode) {
- fprintf(stderr,"%s: ",progname);
- fprintf(stderr, _("target of file %s not specified in new-file mode\n"), argv[ArgIdx-1]);
+ d2u_fprintf(stderr,"%s: ",progname);
+ d2u_fprintf(stderr, _("target of file %s not specified in new-file mode\n"), argv[ArgIdx-1]);
pFlag->error = 1;
ShouldExit = 1;
pFlag->stdio_mode = 0;
@@ -1796,7 +2414,7 @@ int parse_options(int argc, char *argv[], CFlag *pFlag, const char *localedir, c
}
/* no file argument, use stdin and stdout */
- if (pFlag->stdio_mode) {
+ if ( (argc > 0) && pFlag->stdio_mode) {
if (pFlag->file_info) {
RetVal = GetFileInfoStdio(pFlag, progname);
print_messages_info(pFlag, "stdin", progname);
@@ -1813,14 +2431,52 @@ int parse_options(int argc, char *argv[], CFlag *pFlag, const char *localedir, c
}
if (!CanSwitchFileMode) {
- fprintf(stderr,"%s: ",progname);
- fprintf(stderr, _("target of file %s not specified in new-file mode\n"), argv[ArgIdx-1]);
+ d2u_fprintf(stderr,"%s: ",progname);
+ d2u_fprintf(stderr, _("target of file %s not specified in new-file mode\n"), argv[ArgIdx-1]);
pFlag->error = 1;
}
return pFlag->error;
}
+void d2u_getc_error(CFlag *ipFlag, const char *progname)
+{
+ char *errstr;
+
+ ipFlag->error = errno;
+ if (ipFlag->verbose) {
+ errstr = strerror(errno);
+ d2u_fprintf(stderr, "%s: ", progname);
+ d2u_ansi_fprintf(stderr, _("can not read from input file: %s\n"), errstr);
+ }
+}
+
+void d2u_putc_error(CFlag *ipFlag, const char *progname)
+{
+ char *errstr;
+
+ ipFlag->error = errno;
+ if (ipFlag->verbose) {
+ errstr = strerror(errno);
+ d2u_fprintf(stderr, "%s: ", progname);
+ d2u_ansi_fprintf(stderr, _("can not write to output file: %s\n"), errstr);
+ }
+}
+
#ifdef D2U_UNICODE
+void d2u_putwc_error(CFlag *ipFlag, const char *progname)
+{
+ char *errstr;
+
+ if (!(ipFlag->status & UNICODE_CONVERSION_ERROR)) {
+ ipFlag->error = errno;
+ if (ipFlag->verbose) {
+ errstr = strerror(errno);
+ d2u_fprintf(stderr, "%s: ", progname);
+ d2u_ansi_fprintf(stderr, _("can not write to output file: %s\n"), errstr);
+ }
+ }
+}
+
wint_t d2u_getwc(FILE *f, int bomtype)
{
int c_trail, c_lead;
@@ -1860,13 +2516,18 @@ wint_t d2u_ungetwc(wint_t wc, FILE *f, int bomtype)
}
/* Put wide character */
-wint_t d2u_putwc(wint_t wc, FILE *f, CFlag *ipFlag)
+wint_t d2u_putwc(wint_t wc, FILE *f, CFlag *ipFlag, const char *progname)
{
static char mbs[8];
- static wchar_t lead, trail;
+ static wchar_t lead=0x01, trail; /* lead get's invalid value */
static wchar_t wstr[3];
size_t i,len;
int c_trail, c_lead;
+#if (defined(_WIN32) && !defined(__CYGWIN__))
+ DWORD dwFlags;
+#else
+ char *errstr;
+#endif
if (ipFlag->keep_utf16) {
if (ipFlag->bomtype == FILE_UTF16LE) { /* UTF16 little endian */
@@ -1883,12 +2544,30 @@ wint_t d2u_putwc(wint_t wc, FILE *f, CFlag *ipFlag)
return wc;
}
- if ((wc >= 0xd800) && (wc < 0xdc00)) {
+ /* Note: In the new Unicode standard lead is named "high", and trail is name "low". */
+
+ /* check for lead without a trail */
+ if ((lead >= 0xd800) && (lead < 0xdc00) && ((wc < 0xdc00) || (wc >= 0xe000))) {
+ d2u_fprintf(stderr, "%s: ", progname);
+ d2u_fprintf(stderr, _("error: Invalid surrogate pair. Missing low surrogate.\n"));
+ ipFlag->status |= UNICODE_CONVERSION_ERROR ;
+ return(WEOF);
+ }
+
+ if ((wc >= 0xd800) && (wc < 0xdc00)) { /* Surrogate lead */
/* fprintf(stderr, "UTF-16 lead %x\n",wc); */
lead = (wchar_t)wc; /* lead (high) surrogate */
return(wc);
}
- if ((wc >= 0xdc00) && (wc < 0xe000)) {
+ if ((wc >= 0xdc00) && (wc < 0xe000)) { /* Surrogate trail */
+
+ /* check for trail without a lead */
+ if ((lead < 0xd800) || (lead >= 0xdc00)) {
+ d2u_fprintf(stderr, "%s: ", progname);
+ d2u_fprintf(stderr, _("error: Invalid surrogate pair. Missing high surrogate.\n"));
+ ipFlag->status |= UNICODE_CONVERSION_ERROR ;
+ return(WEOF);
+ }
/* fprintf(stderr, "UTF-16 trail %x\n",wc); */
trail = (wchar_t)wc; /* trail (low) surrogate */
#if defined(_WIN32) || defined(__CYGWIN__)
@@ -1898,6 +2577,7 @@ wint_t d2u_putwc(wint_t wc, FILE *f, CFlag *ipFlag)
wstr[0] = lead;
wstr[1] = trail;
wstr[2] = L'\0';
+ lead = 0x01; /* make lead invalid */
#else
/* On Unix wchar_t is 32 bit */
/* When we don't decode the UTF-16 surrogate pair, wcstombs() does not
@@ -1924,6 +2604,7 @@ wint_t d2u_putwc(wint_t wc, FILE *f, CFlag *ipFlag)
wstr[0] += (lead & 0x03FF) << 10;
wstr[0] += (trail & 0x03FF);
wstr[1] = L'\0';
+ lead = 0x01; /* make lead invalid */
/* fprintf(stderr, "UTF-32 %x\n",wstr[0]); */
#endif
} else {
@@ -1931,12 +2612,25 @@ wint_t d2u_putwc(wint_t wc, FILE *f, CFlag *ipFlag)
wstr[1] = L'\0';
}
+ if (wc == 0x0000) {
+ if (fputc(0, f) == EOF)
+ return(WEOF);
+ return(wc);
+ }
+
#if (defined(_WIN32) && !defined(__CYGWIN__))
+/* The WC_ERR_INVALID_CHARS flag is available since Windows Vista (0x0600). It enables checking for
+ invalid input characters. */
+#if WINVER >= 0x0600
+ dwFlags = WC_ERR_INVALID_CHARS;
+#else
+ dwFlags = 0;
+#endif
/* On Windows we convert UTF-16 always to UTF-8 or GB18030 */
if (ipFlag->locale_target == TARGET_GB18030) {
- len = (size_t)(WideCharToMultiByte(54936, 0, wstr, -1, mbs, sizeof(mbs), NULL, NULL) -1);
+ len = (size_t)(WideCharToMultiByte(54936, dwFlags, wstr, -1, mbs, sizeof(mbs), NULL, NULL) -1);
} else {
- len = (size_t)(WideCharToMultiByte(CP_UTF8, 0, wstr, -1, mbs, sizeof(mbs), NULL, NULL) -1);
+ len = (size_t)(WideCharToMultiByte(CP_UTF8, dwFlags, wstr, -1, mbs, sizeof(mbs), NULL, NULL) -1);
}
#else
/* On Unix we convert UTF-16 to the locale encoding */
@@ -1946,6 +2640,14 @@ wint_t d2u_putwc(wint_t wc, FILE *f, CFlag *ipFlag)
if ( len == (size_t)(-1) ) {
/* Stop when there is a conversion error */
+ /* On Windows we convert UTF-16 always to UTF-8 or GB18030 */
+#if (defined(_WIN32) && !defined(__CYGWIN__))
+ d2u_PrintLastError(progname);
+#else
+ errstr = strerror(errno);
+ d2u_fprintf(stderr, "%s:", progname);
+ d2u_ansi_fprintf(stderr, " %s\n", errstr);
+#endif
ipFlag->status |= UNICODE_CONVERSION_ERROR ;
return(WEOF);
} else {