diff options
Diffstat (limited to 'common.c')
-rw-r--r-- | common.c | 1178 |
1 files changed, 940 insertions, 238 deletions
@@ -1,5 +1,5 @@ /* - * Copyright (C) 2009-2014 Erwin Waterlander + * Copyright (C) 2009-2015 Erwin Waterlander * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -28,14 +28,20 @@ #include "dos2unix.h" #include "querycp.h" -#if defined(D2U_UNICODE) +#include <stdarg.h> +#ifdef D2U_UNIFILE +#include <windows.h> +#elif defined(D2U_UNICODE) #if defined(_WIN32) || defined(__CYGWIN__) #include <windows.h> #endif #endif + +#if defined(D2U_UNICODE) #if !defined(__MSDOS__) && !defined(_WIN32) && !defined(__OS2__) /* Unix, Cygwin */ # include <langinfo.h> #endif +#endif #if defined(__GLIBC__) /* on glibc, canonicalize_file_name() broken prior to 2.4 (06-Mar-2006) */ @@ -51,6 +57,269 @@ # endif #endif +/* global variable */ +#ifdef D2U_UNIFILE +int d2u_display_encoding = D2U_DISPLAY_ANSI ; +#endif + +/* + * Print last system error on Windows. + * + */ +#if (defined(_WIN32) && !defined(__CYGWIN__)) +void d2u_PrintLastError(const char *progname) +{ + /* Retrieve the system error message for the last-error code */ + + LPVOID lpMsgBuf; + DWORD dw; + + dw = GetLastError(); + + FormatMessage( + FORMAT_MESSAGE_ALLOCATE_BUFFER | + FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, + dw, + MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), + (LPTSTR) &lpMsgBuf, + 0, NULL ); + + /* Display the error message */ + + /* MessageBox(NULL, (LPCTSTR)lpMsgBuf, TEXT("Error"), MB_OK); */ + fprintf(stderr, "%s: ",progname); +#ifdef D2U_UNIFILE + fwprintf(stderr, L"%ls\n",(LPCTSTR)lpMsgBuf); +#else + fprintf(stderr, "%s\n",(LPCTSTR)lpMsgBuf); +#endif + + LocalFree(lpMsgBuf); +} + + +int d2u_WideCharToMultiByte(UINT CodePage, DWORD dwFlags, LPCWSTR lpWideCharStr, int cchWideChar, LPSTR lpMultiByteStr, int cbMultiByte, LPCSTR lpDefaultChar, LPBOOL lpUsedDefaultChar) +{ + int i; + + if ( (i = WideCharToMultiByte(CodePage, dwFlags, lpWideCharStr, cchWideChar, lpMultiByteStr, cbMultiByte, lpDefaultChar, lpUsedDefaultChar)) == 0) + d2u_PrintLastError("dos2unix"); + + return i; +} + +int d2u_MultiByteToWideChar(UINT CodePage, DWORD dwFlags, LPCSTR lpMultiByteStr, int cbMultiByte, LPWSTR lpWideCharStr, int cchWideChar) +{ + int i; + + if ( (i = MultiByteToWideChar(CodePage, dwFlags, lpMultiByteStr, cbMultiByte, lpWideCharStr, cchWideChar)) == 0) + d2u_PrintLastError("dos2unix"); + return i; +} + +#endif + +/* + * d2u_fprintf() : printf wrapper, print in Windows Command Prompt in Unicode + * mode, to have consistent output. Regardless of active code page. + * + * On Windows the file system uses always Unicode UTF-16 encoding, regardless + * of the system default code page. This means that files and directories can + * have names that can't be encoded in the default system Windows ANSI code + * page. + * + * Dos2unix for Windows with Unicode file name support translates all directory + * names to UTF-8, to be able to work with char type strings. This is also + * done to keep the code portable. + * + * Dos2unix's messages are encoded in the default Windows ANSI code page, which + * can be translated with gettext. Gettext/libintl recodes messages (format) to + * the system default ANSI code page. + * + * d2u_fprintf() on Windows assumes that: + * - The format string is encoded in the system default ANSI code page. + * - The arguments are encoded in UTF-8. + * + * There are several methods for printing Unicode in the Windows Console, but + * none of them is perfect. There are so many issues that I decided to go back + * to ANSI by default. + */ + +void d2u_fprintf( FILE *stream, const char* format, ... ) { + va_list args; +#ifdef D2U_UNIFILE + char buf[D2U_MAX_PATH]; + char formatmbs[D2U_MAX_PATH]; + wchar_t formatwcs[D2U_MAX_PATH]; + UINT outputCP; + wchar_t wstr[D2U_MAX_PATH]; + int prevmode; +/* HANDLE out_handle; + + if (stream == stderr) + out_handle =GetStdHandle(STD_ERROR_HANDLE); + else + out_handle =GetStdHandle(STD_OUTPUT_HANDLE); +*/ + va_start(args, format); + + /* The format string is encoded in the system default + * Windows ANSI code page. May have been translated + * by gettext. Convert it to wide characters. */ + d2u_MultiByteToWideChar(CP_ACP,0, format, -1, formatwcs, D2U_MAX_PATH); + /* then convert the format string to UTF-8 */ + d2u_WideCharToMultiByte(CP_UTF8, 0, formatwcs, -1, formatmbs, D2U_MAX_PATH, NULL, NULL); + + /* The arguments (file names) are in UTF-8 encoding, because + * in dos2unix for Windows all file names are in UTF-8 format. + * Print to buffer (UTF-8) */ + vsnprintf(buf, sizeof(buf), formatmbs, args); + + if (d2u_display_encoding == D2U_DISPLAY_UTF8) { + + /* Using UTF-8 has my preference. The following method works fine when NLS is + disabled. But when I enable NLS (ENABLE_NLS=1) all non-ASCII characters are + printed as a square with a question mark in it. This will make the screen + output of dos2unix for most languages unreadable. + When I redirect the output to a file, the output is correct UTF-8. I don't + know why NLS causes wrong printed text in the console. I just turn NLS off. + A disadvantage of this method is that all non-ASCII characters are printed + wrongly when the console uses raster font (which is the default). + I tried on a Chinese Windows 7 (code page 936) and then all non-ASCII + is printed wrongly, using raster and TrueType font. Only in ConEmu I + get correct output. I'm afraid that most people use the default Command Prompt + and PowerShell consolse, so for many people the text will be unreadable. + On a Chinese Windows there was a lot of flickering during the printing of the + lines of text. This is not acceptable, but I'm not sure it this was because the + Windows Command Prompt was broken. It sometimes crashes. + */ +#ifdef ENABLE_NLS + /* temporarely disable NLS */ + setlocale (LC_ALL, "C"); +#endif + /* print UTF-8 buffer to console in UTF-8 mode */ + outputCP = GetConsoleOutputCP(); + SetConsoleOutputCP(CP_UTF8); + fwprintf(stream,L"%S",buf); + SetConsoleOutputCP(outputCP); +#ifdef ENABLE_NLS + /* re-enable NLS */ + setlocale (LC_ALL, ""); +#endif + + /* The following UTF-8 method does not give correct output. I don't know why. */ + //prevmode = _setmode(_fileno(stream), _O_U8TEXT); + //fwprintf(stream,L"%S",buf); + //_setmode(_fileno(stream), prevmode); + + } else if (d2u_display_encoding == D2U_DISPLAY_UNICODE) { + + /* Another method for printing Unicode is using WriteConsoleW(). + WriteConsoleW always prints output correct in the console. Even when + using raster font WriteConsoleW prints correctly when possible. + WriteConsoleW has one big disadvantage: The output of WriteConsoleW + can't be redirected. The output can't be piped to a log file. */ + /* Convert UTF-8 buffer to wide characters. */ + //d2u_MultiByteToWideChar(CP_UTF8,0, buf, -1, wstr, D2U_MAX_PATH); + //WriteConsoleW(out_handle, wstr, wcslen(wstr), NULL, NULL); + + /* Printing UTF-16 works correctly like WriteConsoleW, with and without NLS enabled. + Works also good with raster fonts. In a Chinese CP936 locale it works correctly + in the Windows Command Prompt. The downside is that it is UTF-16. When this is + redirected to a file it gives a big mess. It is not compatible with ASCII. So + even a simple ASCII grep on the screen output will not work. + When the output is redirected in a Windows Command Prompt to a file all line breaks end up as + 0d0a 00 (instead of 0d00 0a00), which makes it a corrupt UTF-16 file. + In PowerShell you get correct line breaks 0d00 0a00 when you redirect to a file, but there are + null characters (0000) inserted, as if it is UTF-32 with UTF-16 BOM and UTF-16 line breaks. + See also test/testu16.c. */ + d2u_MultiByteToWideChar(CP_UTF8,0, buf, -1, wstr, D2U_MAX_PATH); + prevmode = _setmode(_fileno(stream), _O_U16TEXT); + fwprintf(stream,L"%ls",wstr); + _setmode(_fileno(stream), prevmode); + } else { /* ANSI */ + d2u_MultiByteToWideChar(CP_UTF8,0, buf, -1, wstr, D2U_MAX_PATH); + /* Convert the whole message to ANSI, some Unicode characters may fail to translate to ANSI. + They will be displayed as a question mark. */ + d2u_WideCharToMultiByte(CP_ACP, 0, wstr, -1, buf, D2U_MAX_PATH, NULL, NULL); + fprintf(stream,"%s",buf); + } + +#else + va_start(args, format); + vfprintf(stream, format, args); +#endif + va_end( args ); +} + +/* d2u_ansi_fprintf() + fprintf wrapper for Windows console. + + Format and arguments are in ANSI format. + Redirect the printing to d2u_fprintf such that the output + format is consistent. To prevent a mix of ANSI/UTF-8/UTF-16 + encodings in the print output. + */ + +void d2u_ansi_fprintf( FILE *stream, const char* format, ... ) { + va_list args; +#ifdef D2U_UNIFILE + char buf[D2U_MAX_PATH]; /* ANSI encoded string */ + char bufmbs[D2U_MAX_PATH]; /* UTF-8 encoded string */ + wchar_t bufwcs[D2U_MAX_PATH]; /* Wide encoded string */ + + va_start(args, format); + + vsnprintf(buf, sizeof(buf), format, args); + /* The format string and arguments are encoded in the system default + * Windows ANSI code page. May have been translated + * by gettext. Convert it to wide characters. */ + d2u_MultiByteToWideChar(CP_ACP,0, buf, -1, bufwcs, D2U_MAX_PATH); + /* then convert the format string to UTF-8 */ + d2u_WideCharToMultiByte(CP_UTF8, 0, bufwcs, -1, bufmbs, D2U_MAX_PATH, NULL, NULL); + + d2u_fprintf(stream, "%s",bufmbs); + +#else + va_start(args, format); + vfprintf(stream, format, args); +#endif + va_end( args ); +} + +/* d2u_rename + * wrapper for rename(). + * On Windows file names are encoded in UTF-8. + */ +int d2u_rename(const char *oldname, const char *newname) +{ +#ifdef D2U_UNIFILE + wchar_t oldnamew[D2U_MAX_PATH]; + wchar_t newnamew[D2U_MAX_PATH]; + d2u_MultiByteToWideChar(CP_UTF8, 0, oldname, -1, oldnamew, D2U_MAX_PATH); + d2u_MultiByteToWideChar(CP_UTF8, 0, newname, -1, newnamew, D2U_MAX_PATH); + return _wrename(oldnamew, newnamew); +#else + return rename(oldname, newname); +#endif +} + +/* d2u_unlink + * wrapper for unlink(). + * On Windows file names are encoded in UTF-8. + */ +int d2u_unlink(const char *filename) +{ +#ifdef D2U_UNIFILE + wchar_t filenamew[D2U_MAX_PATH]; + d2u_MultiByteToWideChar(CP_UTF8, 0, filename, -1, filenamew, D2U_MAX_PATH); + return _wunlink(filenamew); +#else + return unlink(filename); +#endif +} /****************************************************************** * @@ -62,7 +331,23 @@ * ******************************************************************/ -#if (defined(_WIN32) && !defined(__CYGWIN__)) +#ifdef D2U_UNIFILE + +int symbolic_link(const char *path) +{ + DWORD attrs; + wchar_t pathw[D2U_MAX_PATH]; + + d2u_MultiByteToWideChar(CP_UTF8, 0, path, -1, pathw, D2U_MAX_PATH); + attrs = GetFileAttributesW(pathw); + + if (attrs == INVALID_FILE_ATTRIBUTES) + return(0); + + return ((attrs & FILE_ATTRIBUTE_REPARSE_POINT) != 0); +} + +#elif(defined(_WIN32) && !defined(__CYGWIN__)) int symbolic_link(const char *path) { @@ -103,33 +388,44 @@ int symbolic_link(const char *path) ******************************************************************/ int regfile(char *path, int allowSymlinks, CFlag *ipFlag, const char *progname) { +#ifdef D2U_UNIFILE + struct _stat buf; + wchar_t pathw[D2U_MAX_PATH]; +#else struct stat buf; +#endif char *errstr; +#ifdef D2U_UNIFILE + d2u_MultiByteToWideChar(CP_UTF8, 0, path, -1, pathw, D2U_MAX_PATH); + if (_wstat(pathw, &buf) == 0) { +#else if (STAT(path, &buf) == 0) { +#endif #if DEBUG - fprintf(stderr, "%s: %s MODE 0%o ", progname, path, buf.st_mode); + d2u_fprintf(stderr, "%s: %s", progname, path); + d2u_fprintf(stderr, " MODE 0%o ", buf.st_mode); #ifdef S_ISSOCK if (S_ISSOCK(buf.st_mode)) - fprintf(stderr, " (socket)"); + d2u_fprintf(stderr, " (socket)"); #endif #ifdef S_ISLNK if (S_ISLNK(buf.st_mode)) - fprintf(stderr, " (symbolic link)"); + d2u_fprintf(stderr, " (symbolic link)"); #endif if (S_ISREG(buf.st_mode)) - fprintf(stderr, " (regular file)"); + d2u_fprintf(stderr, " (regular file)"); #ifdef S_ISBLK if (S_ISBLK(buf.st_mode)) - fprintf(stderr, " (block device)"); + d2u_fprintf(stderr, " (block device)"); #endif if (S_ISDIR(buf.st_mode)) - fprintf(stderr, " (directory)"); + d2u_fprintf(stderr, " (directory)"); if (S_ISCHR(buf.st_mode)) - fprintf(stderr, " (character device)"); + d2u_fprintf(stderr, " (character device)"); if (S_ISFIFO(buf.st_mode)) - fprintf(stderr, " (FIFO)"); - fprintf(stderr, "\n"); + d2u_fprintf(stderr, " (FIFO)"); + d2u_fprintf(stderr, "\n"); #endif if ((S_ISREG(buf.st_mode)) #ifdef S_ISLNK @@ -144,7 +440,8 @@ int regfile(char *path, int allowSymlinks, CFlag *ipFlag, const char *progname) if (ipFlag->verbose) { ipFlag->error = errno; errstr = strerror(errno); - fprintf(stderr, "%s: %s: %s\n", progname, path, errstr); + d2u_fprintf(stderr, "%s: %s:", progname, path); + d2u_ansi_fprintf(stderr, " %s\n", errstr); } return(-1); } @@ -161,10 +458,20 @@ int regfile(char *path, int allowSymlinks, CFlag *ipFlag, const char *progname) ******************************************************************/ int regfile_target(char *path, CFlag *ipFlag, const char *progname) { +#ifdef D2U_UNIFILE + struct _stat buf; + wchar_t pathw[D2U_MAX_PATH]; +#else struct stat buf; +#endif char *errstr; +#ifdef D2U_UNIFILE + d2u_MultiByteToWideChar(CP_UTF8, 0, path, -1, pathw, D2U_MAX_PATH); + if (_wstat(pathw, &buf) == 0) { +#else if (stat(path, &buf) == 0) { +#endif if (S_ISREG(buf.st_mode)) return(0); else @@ -174,12 +481,119 @@ int regfile_target(char *path, CFlag *ipFlag, const char *progname) if (ipFlag->verbose) { ipFlag->error = errno; errstr = strerror(errno); - fprintf(stderr, "%s: %s: %s\n", progname, path, errstr); + d2u_fprintf(stderr, "%s: %s:", progname, path); + d2u_ansi_fprintf(stderr, " %s\n", errstr); } return(-1); } } +/* + * glob_warg() expands the wide command line arguments. + * Input : wide Unicode arguments. + * Output : argv : expanded arguments in UTF-8 format. + * Returns: new argc value. + * -1 when an error occurred. + * + */ + +#ifdef D2U_UNIFILE +int glob_warg(int argc, wchar_t *wargv[], char ***argv, CFlag *ipFlag, const char *progname) +{ + int i; + int argc_glob = 0; + wchar_t *warg; + wchar_t *path; + wchar_t *path_and_filename; + wchar_t *ptr; + char *arg; + char **argv_new; + char *errstr; + size_t len; + int found, add_path; + WIN32_FIND_DATA FindFileData; + HANDLE hFind; + + argv_new = (char **)malloc(sizeof(char**)); + if (argv_new == NULL) goto glob_failed; + + len = (size_t)d2u_WideCharToMultiByte(CP_UTF8, 0, wargv[0], -1, NULL, 0, NULL, NULL); + arg = (char *)malloc(len); + if (argv == NULL) goto glob_failed; + d2u_WideCharToMultiByte(CP_UTF8, 0, wargv[argc_glob], -1, arg, (int)len, NULL, NULL); + argv_new[argc_glob] = arg; + + for (i=1; i<argc; ++i) + { + warg = wargv[i]; + found = 0; + add_path = 0; + /* FindFileData.cFileName has the path stripped off. We need to add it again. */ + path = _wcsdup(warg); + /* replace all back slashes with slashes */ + while ( (ptr = wcschr(path,L'\\')) != NULL) { + *ptr = L'/'; + } + if ( (ptr = wcsrchr(path,L'/')) != NULL) { + ptr++; + *ptr = L'\0'; + add_path = 1; + } + + hFind = FindFirstFileW(warg, &FindFileData); + while (hFind != INVALID_HANDLE_VALUE) + { + len = wcslen(path) + wcslen(FindFileData.cFileName) + 2; + path_and_filename = (wchar_t *)malloc(len*sizeof(wchar_t)); + if (path_and_filename == NULL) goto glob_failed; + if (add_path) { + wcsncpy_s(path_and_filename, len, path, wcslen(path)); + wcsncat_s(path_and_filename, len, FindFileData.cFileName, wcslen(FindFileData.cFileName)); + } else { + wcsncpy_s(path_and_filename, len, FindFileData.cFileName, wcslen(FindFileData.cFileName)); + } + + found = 1; + ++argc_glob; + len =(size_t) d2u_WideCharToMultiByte(CP_UTF8, 0, path_and_filename, -1, NULL, 0, NULL, NULL); + arg = (char *)malloc((size_t)len); + if (argv == NULL) goto glob_failed; + d2u_WideCharToMultiByte(CP_UTF8, 0, path_and_filename, -1, arg, (int)len, NULL, NULL); + free(path_and_filename); + argv_new = (char **)realloc(argv_new, (size_t)(argc_glob+1)*sizeof(char**)); + if (argv_new == NULL) goto glob_failed; + argv_new[argc_glob] = arg; + + if (!FindNextFileW(hFind, &FindFileData)) { + FindClose(hFind); + hFind = INVALID_HANDLE_VALUE; + } + } + free(path); + if (found == 0) { + /* Not a file. Just copy the argument */ + ++argc_glob; + len =(size_t) d2u_WideCharToMultiByte(CP_UTF8, 0, warg, -1, NULL, 0, NULL, NULL); + arg = (char *)malloc((size_t)len); + if (argv == NULL) goto glob_failed; + d2u_WideCharToMultiByte(CP_UTF8, 0, warg, -1, arg, (int)len, NULL, NULL); + argv_new = (char **)realloc(argv_new, (size_t)(argc_glob+1)*sizeof(char**)); + if (argv_new == NULL) goto glob_failed; + argv_new[argc_glob] = arg; + } + } + *argv = argv_new; + return ++argc_glob; + + glob_failed: + ipFlag->error = errno; + errstr = strerror(errno); + d2u_fprintf(stderr, "%s:", progname); + d2u_ansi_fprintf(stderr, " %s\n", errstr); + return -1; +} +#endif + void PrintBSDLicense(void) { printf("%s", _("\ @@ -233,6 +647,10 @@ void PrintUsage(const char *progname) printf(_(" -b, --keep-bom keep Byte Order Mark (default)\n")); printf(_(" -c, --convmode conversion mode\n\ convmode ascii, 7bit, iso, mac, default to ascii\n")); +#ifdef D2U_UNIFILE + printf(_(" -D, --display-enc set encoding of displayed text messages\n\ + encoding ansi, unicode, utf8, default to ansi\n")); +#endif printf(_(" -f, --force force conversion of binary files\n")); #ifdef D2U_UNICODE #if (defined(_WIN32) && !defined(__CYGWIN__)) @@ -313,11 +731,21 @@ void PrintVersion(const char *progname, const char *localedir) #elif defined(__OS) printf(_("%s version.\n"), __OS); #endif +#if defined(_WIN32) && defined(WINVER) + printf("WINVER 0x%X\n",WINVER); +#endif #ifdef D2U_UNICODE printf("%s", _("With Unicode UTF-16 support.\n")); #else printf("%s", _("Without Unicode UTF-16 support.\n")); #endif +#ifdef _WIN32 +#ifdef D2U_UNIFILE + printf("%s", _("With Unicode file name support.\n")); +#else + printf("%s", _("Without Unicode file name support.\n")); +#endif +#endif #ifdef ENABLE_NLS printf("%s", _("With native language support.\n")); #else @@ -336,7 +764,14 @@ void PrintVersion(const char *progname, const char *localedir) */ FILE* OpenInFile(char *ipFN) { +#ifdef D2U_UNIFILE + wchar_t pathw[D2U_MAX_PATH]; + + d2u_MultiByteToWideChar(CP_UTF8, 0, ipFN, -1, pathw, D2U_MAX_PATH); + return _wfopen(pathw, R_CNTRLW); +#else return (fopen(ipFN, R_CNTRL)); +#endif } @@ -379,6 +814,10 @@ int MakeTempFileFrom(const char *OutFN, char **fname_ret) #else int fd = -1; #endif +#ifdef D2U_UNIFILE + wchar_t fname_strw[D2U_MAX_PATH]; + wchar_t *namew; +#endif *fname_ret = NULL; @@ -387,8 +826,12 @@ int MakeTempFileFrom(const char *OutFN, char **fname_ret) dir = dirname(cpy); +#ifdef D2U_UNIFILE + fname_len = D2U_MAX_PATH; +#else fname_len = strlen(dir) + strlen("/d2utmpXXXXXX") + sizeof (char); - if (!(fname_str = malloc(fname_len))) +#endif + if (!(fname_str = (char *)malloc(fname_len))) goto make_failed; sprintf(fname_str, "%s%s", dir, "/d2utmpXXXXXX"); *fname_ret = fname_str; @@ -396,10 +839,19 @@ int MakeTempFileFrom(const char *OutFN, char **fname_ret) free(cpy); #ifdef NO_MKSTEMP +#ifdef D2U_UNIFILE + d2u_MultiByteToWideChar(CP_UTF8, 0, fname_str, -1, fname_strw, D2U_MAX_PATH); + namew = _wmktemp(fname_strw); + d2u_WideCharToMultiByte(CP_UTF8, 0, namew, -1, fname_str, (int)fname_len, NULL, NULL); + *fname_ret = fname_str; + if ((fd = _wfopen(fname_strw, W_CNTRLW)) == NULL) + goto make_failed; +#else name = mktemp(fname_str); *fname_ret = name; if ((fd = fopen(fname_str, W_CNTRL)) == NULL) goto make_failed; +#endif #else if ((fd = mkstemp(fname_str)) == -1) goto make_failed; @@ -443,7 +895,8 @@ int ResolveSymbolicLink(char *lFN, char **rFN, CFlag *ipFlag, const char *progna if (ipFlag->verbose) { ipFlag->error = errno; errstr = strerror(errno); - fprintf(stderr, "%s: %s: %s\n", progname, lFN, errstr); + d2u_fprintf(stderr, "%s: %s:", progname, lFN); + d2u_ansi_fprintf(stderr, " %s\n", errstr); } RetVal = -1; } @@ -454,7 +907,8 @@ int ResolveSymbolicLink(char *lFN, char **rFN, CFlag *ipFlag, const char *progna if (ipFlag->verbose) { ipFlag->error = errno; errstr = strerror(errno); - fprintf(stderr, "%s: %s: %s\n", progname, lFN, errstr); + d2u_fprintf(stderr, "%s: %s:", progname, lFN); + d2u_ansi_fprintf(stderr, " %s\n", errstr); } RetVal = -1; } @@ -471,7 +925,8 @@ int ResolveSymbolicLink(char *lFN, char **rFN, CFlag *ipFlag, const char *progna if (ipFlag->verbose) { ipFlag->error = errno; errstr = strerror(errno); - fprintf(stderr, "%s: %s: %s\n", progname, lFN, errstr); + d2u_fprintf(stderr, "%s: %s:", progname, lFN); + d2u_ansi_fprintf(stderr, " %s\n", errstr); } RetVal = -1; } @@ -482,7 +937,8 @@ int ResolveSymbolicLink(char *lFN, char **rFN, CFlag *ipFlag, const char *progna if (ipFlag->verbose) { ipFlag->error = errno; errstr = strerror(errno); - fprintf(stderr, "%s: %s: %s\n", progname, lFN, errstr); + d2u_fprintf(stderr, "%s: %s:", progname, lFN); + d2u_ansi_fprintf(stderr, " %s\n", errstr); } free(targetFN); RetVal = -1; @@ -502,6 +958,9 @@ int ResolveSymbolicLink(char *lFN, char **rFN, CFlag *ipFlag, const char *progna return RetVal; } +/* Read the Byte Order Mark. + Returns file pointer or NULL in case of a read error */ + FILE *read_bom (FILE *f, int *bomtype) { int bom[4]; @@ -517,18 +976,23 @@ FILE *read_bom (FILE *f, int *bomtype) /* Check for BOM */ if (f != NULL) { if ((bom[0] = fgetc(f)) == EOF) { - ungetc(bom[0], f); + if (ferror(f)) { + return NULL; + } *bomtype = FILE_MBS; return(f); } if ((bom[0] != 0xff) && (bom[0] != 0xfe) && (bom[0] != 0xef) && (bom[0] != 0x84)) { - ungetc(bom[0], f); + if (ungetc(bom[0], f) == EOF) return NULL; *bomtype = FILE_MBS; return(f); } if ((bom[1] = fgetc(f)) == EOF) { - ungetc(bom[1], f); - ungetc(bom[0], f); + if (ferror(f)) { + return NULL; + } + if (ungetc(bom[1], f) == EOF) return NULL; + if (ungetc(bom[0], f) == EOF) return NULL; *bomtype = FILE_MBS; return(f); } @@ -541,9 +1005,12 @@ FILE *read_bom (FILE *f, int *bomtype) return(f); } if ((bom[2] = fgetc(f)) == EOF) { - ungetc(bom[2], f); - ungetc(bom[1], f); - ungetc(bom[0], f); + if (ferror(f)) { + return NULL; + } + if (ungetc(bom[2], f) == EOF) return NULL; + if (ungetc(bom[1], f) == EOF) return NULL; + if (ungetc(bom[0], f) == EOF) return NULL; *bomtype = FILE_MBS; return(f); } @@ -552,16 +1019,19 @@ FILE *read_bom (FILE *f, int *bomtype) return(f); } if ((bom[0] == 0x84) && (bom[1] == 0x31) && (bom[2]== 0x95)) { - bom[3] = fgetc(f); - if (bom[3]== 0x33) { /* GB18030 */ - *bomtype = FILE_GB18030; - return(f); - } - ungetc(bom[3], f); + bom[3] = fgetc(f); + if (ferror(f)) { + return NULL; + } + if (bom[3]== 0x33) { /* GB18030 */ + *bomtype = FILE_GB18030; + return(f); + } + if (ungetc(bom[3], f) == EOF) return NULL; } - ungetc(bom[2], f); - ungetc(bom[1], f); - ungetc(bom[0], f); + if (ungetc(bom[2], f) == EOF) return NULL; + if (ungetc(bom[1], f) == EOF) return NULL; + if (ungetc(bom[0], f) == EOF) return NULL; *bomtype = FILE_MBS; return(f); } @@ -579,31 +1049,31 @@ FILE *write_bom (FILE *f, CFlag *ipFlag, const char *progname) { switch (bomtype) { case FILE_UTF16LE: /* UTF-16 Little Endian */ - fprintf(f, "%s", "\xFF\xFE"); + if (fprintf(f, "%s", "\xFF\xFE") < 0) return NULL; if (ipFlag->verbose > 1) { - fprintf(stderr, "%s: ", progname); - fprintf(stderr, _("Writing %s BOM.\n"), _("UTF-16LE")); + d2u_fprintf(stderr, "%s: ", progname); + d2u_ansi_fprintf(stderr, _("Writing %s BOM.\n"), _("UTF-16LE")); } break; case FILE_UTF16BE: /* UTF-16 Big Endian */ - fprintf(f, "%s", "\xFE\xFF"); + if (fprintf(f, "%s", "\xFE\xFF") < 0) return NULL; if (ipFlag->verbose > 1) { - fprintf(stderr, "%s: ", progname); - fprintf(stderr, _("Writing %s BOM.\n"), _("UTF-16BE")); + d2u_fprintf(stderr, "%s: ", progname); + d2u_ansi_fprintf(stderr, _("Writing %s BOM.\n"), _("UTF-16BE")); } break; case FILE_GB18030: /* GB18030 */ - fprintf(f, "%s", "\x84\x31\x95\x33"); + if (fprintf(f, "%s", "\x84\x31\x95\x33") < 0) return NULL; if (ipFlag->verbose > 1) { - fprintf(stderr, "%s: ", progname); - fprintf(stderr, _("Writing %s BOM.\n"), _("GB18030")); + d2u_fprintf(stderr, "%s: ", progname); + d2u_ansi_fprintf(stderr, _("Writing %s BOM.\n"), _("GB18030")); } break; default: /* UTF-8 */ - fprintf(f, "%s", "\xEF\xBB\xBF"); + if (fprintf(f, "%s", "\xEF\xBB\xBF") < 0) return NULL; if (ipFlag->verbose > 1) { - fprintf(stderr, "%s: ", progname); - fprintf(stderr, _("Writing %s BOM.\n"), _("UTF-8")); + d2u_fprintf(stderr, "%s: ", progname); + d2u_ansi_fprintf(stderr, _("Writing %s BOM.\n"), _("UTF-8")); } ; } @@ -611,18 +1081,18 @@ FILE *write_bom (FILE *f, CFlag *ipFlag, const char *progname) if ((bomtype == FILE_GB18030) || (((bomtype == FILE_UTF16LE)||(bomtype == FILE_UTF16BE))&&(ipFlag->locale_target == TARGET_GB18030)) ) { - fprintf(f, "%s", "\x84\x31\x95\x33"); /* GB18030 */ + if (fprintf(f, "%s", "\x84\x31\x95\x33") < 0) return NULL; /* GB18030 */ if (ipFlag->verbose > 1) { - fprintf(stderr, "%s: ", progname); - fprintf(stderr, _("Writing %s BOM.\n"), _("GB18030")); + d2u_fprintf(stderr, "%s: ", progname); + d2u_ansi_fprintf(stderr, _("Writing %s BOM.\n"), _("GB18030")); } } else { - fprintf(f, "%s", "\xEF\xBB\xBF"); /* UTF-8 */ + if (fprintf(f, "%s", "\xEF\xBB\xBF") < 0) return NULL; /* UTF-8 */ if (ipFlag->verbose > 1) { - fprintf(stderr, "%s: ", progname); - fprintf(stderr, _("Writing %s BOM.\n"), _("UTF-8")); + d2u_fprintf(stderr, "%s: ", progname); + d2u_ansi_fprintf(stderr, _("Writing %s BOM.\n"), _("UTF-8")); } } } @@ -631,26 +1101,45 @@ FILE *write_bom (FILE *f, CFlag *ipFlag, const char *progname) void print_bom (const int bomtype, const char *filename, const char *progname) { + char informat[64]; +# ifdef D2U_UNIFILE + wchar_t informatw[64]; +#endif + switch (bomtype) { case FILE_UTF16LE: /* UTF-16 Little Endian */ - fprintf(stderr, "%s: ", progname); - fprintf(stderr, _("Input file %s has %s BOM.\n"), filename, _("UTF-16LE")); + strncpy(informat,_("UTF-16LE"),sizeof(informat)); break; case FILE_UTF16BE: /* UTF-16 Big Endian */ - fprintf(stderr, "%s: ", progname); - fprintf(stderr, _("Input file %s has %s BOM.\n"), filename, _("UTF-16BE")); + strncpy(informat,_("UTF-16BE"),sizeof(informat)); break; case FILE_UTF8: /* UTF-8 */ - fprintf(stderr, "%s: ", progname); - fprintf(stderr, _("Input file %s has %s BOM.\n"), filename, _("UTF-8")); + strncpy(informat,_("UTF-8"),sizeof(informat)); break; case FILE_GB18030: /* GB18030 */ - fprintf(stderr, "%s: ", progname); - fprintf(stderr, _("Input file %s has %s BOM.\n"), filename, _("GB18030")); + strncpy(informat,_("GB18030"),sizeof(informat)); break; default: ; } + + if (bomtype > 0) { + informat[sizeof(informat)-1] = '\0'; + +/* Change informat to UTF-8 for d2u_fprintf. */ +# ifdef D2U_UNIFILE + /* The format string is encoded in the system default + * Windows ANSI code page. May have been translated + * by gettext. Convert it to wide characters. */ + d2u_MultiByteToWideChar(CP_ACP,0, informat, -1, informatw, sizeof(informat)); + /* then convert the format string to UTF-8 */ + d2u_WideCharToMultiByte(CP_UTF8, 0, informatw, -1, informat, sizeof(informat), NULL, NULL); +#endif + + d2u_fprintf(stderr, "%s: ", progname); + d2u_fprintf(stderr, _("Input file %s has %s BOM.\n"), filename, informat); + } + } void print_bom_info (const int bomtype) @@ -659,19 +1148,19 @@ void print_bom_info (const int bomtype) that process the output may not work in other than English locales. */ switch (bomtype) { case FILE_UTF16LE: /* UTF-16 Little Endian */ - printf(" UTF-16LE"); + d2u_fprintf(stdout, " UTF-16LE"); break; case FILE_UTF16BE: /* UTF-16 Big Endian */ - printf(" UTF-16BE"); + d2u_fprintf(stdout, " UTF-16BE"); break; case FILE_UTF8: /* UTF-8 */ - printf(" UTF-8 "); + d2u_fprintf(stdout, " UTF-8 "); break; case FILE_GB18030: /* GB18030 */ - printf(" GB18030 "); + d2u_fprintf(stdout, " GB18030 "); break; default: - printf(" no_bom "); + d2u_fprintf(stdout, " no_bom "); ; } } @@ -683,16 +1172,19 @@ int check_unicode_info(FILE *InF, CFlag *ipFlag, const char *progname, int *bomt #ifdef D2U_UNICODE if (ipFlag->verbose > 1) { if (ipFlag->ConvMode == CONVMODE_UTF16LE) { - fprintf(stderr, "%s: ", progname); - fprintf(stderr, _("Assuming UTF-16LE encoding.\n") ); + d2u_fprintf(stderr, "%s: ", progname); + d2u_fprintf(stderr, _("Assuming UTF-16LE encoding.\n") ); } if (ipFlag->ConvMode == CONVMODE_UTF16BE) { - fprintf(stderr, "%s: ", progname); - fprintf(stderr, _("Assuming UTF-16BE encoding.\n") ); + d2u_fprintf(stderr, "%s: ", progname); + d2u_fprintf(stderr, _("Assuming UTF-16BE encoding.\n") ); } } #endif - InF = read_bom(InF, &ipFlag->bomtype); + if ((InF = read_bom(InF, &ipFlag->bomtype)) == NULL) { + d2u_getc_error(ipFlag,progname); + return -1; + } *bomtype_orig = ipFlag->bomtype; #ifdef D2U_UNICODE if ((ipFlag->bomtype == FILE_MBS) && (ipFlag->ConvMode == CONVMODE_UTF16LE)) @@ -723,16 +1215,19 @@ int check_unicode(FILE *InF, FILE *TempF, CFlag *ipFlag, const char *ipInFN, co #ifdef D2U_UNICODE if (ipFlag->verbose > 1) { if (ipFlag->ConvMode == CONVMODE_UTF16LE) { - fprintf(stderr, "%s: ", progname); - fprintf(stderr, _("Assuming UTF-16LE encoding.\n") ); + d2u_fprintf(stderr, "%s: ", progname); + d2u_fprintf(stderr, _("Assuming UTF-16LE encoding.\n") ); } if (ipFlag->ConvMode == CONVMODE_UTF16BE) { - fprintf(stderr, "%s: ", progname); - fprintf(stderr, _("Assuming UTF-16BE encoding.\n") ); + d2u_fprintf(stderr, "%s: ", progname); + d2u_fprintf(stderr, _("Assuming UTF-16BE encoding.\n") ); } } #endif - InF = read_bom(InF, &ipFlag->bomtype); + if ((InF = read_bom(InF, &ipFlag->bomtype)) == NULL) { + d2u_getc_error(ipFlag,progname); + return -1; + } if (ipFlag->verbose > 1) print_bom(ipFlag->bomtype, ipInFN, progname); #ifdef D2U_UNICODE @@ -752,15 +1247,15 @@ int check_unicode(FILE *InF, FILE *TempF, CFlag *ipFlag, const char *ipInFN, co } } #endif -#endif #if !defined(__MSDOS__) && !defined(_WIN32) && !defined(__OS2__) /* Unix, Cygwin */ if (strcmp(nl_langinfo(CODESET), "GB18030") == 0) ipFlag->locale_target = TARGET_GB18030; #endif +#endif if ((!RetVal) && ((ipFlag->add_bom) || ((ipFlag->keep_bom) && (ipFlag->bomtype > 0)))) - write_bom(TempF, ipFlag, progname); + if (write_bom(TempF, ipFlag, progname) == NULL) return -1; return RetVal; } @@ -781,7 +1276,12 @@ int ConvertNewFile(char *ipInFN, char *ipOutFN, CFlag *ipFlag, const char *progn FILE *TempF = NULL; char *TempPath; char *errstr; +#ifdef D2U_UNIFILE + struct _stat StatBuf; + wchar_t pathw[D2U_MAX_PATH]; +#else struct stat StatBuf; +#endif struct utimbuf UTimeBuf; #ifndef NO_CHMOD mode_t mask; @@ -826,11 +1326,17 @@ int ConvertNewFile(char *ipInFN, char *ipOutFN, CFlag *ipFlag, const char *progn } /* retrieve ipInFN file date stamp */ +#ifdef D2U_UNIFILE + d2u_MultiByteToWideChar(CP_UTF8, 0, ipInFN, -1, pathw, D2U_MAX_PATH); + if (_wstat(pathw, &StatBuf)) { +#else if (stat(ipInFN, &StatBuf)) { +#endif if (ipFlag->verbose) { ipFlag->error = errno; errstr = strerror(errno); - fprintf(stderr, "%s: %s: %s\n", progname, ipInFN, errstr); + d2u_fprintf(stderr, "%s: %s:", progname, ipInFN); + d2u_ansi_fprintf(stderr, " %s\n", errstr); } RetVal = -1; } @@ -843,15 +1349,15 @@ int ConvertNewFile(char *ipInFN, char *ipOutFN, CFlag *ipFlag, const char *progn if (ipFlag->verbose) { ipFlag->error = errno; errstr = strerror(errno); - fprintf(stderr, "%s: ", progname); - fprintf(stderr, _("Failed to open temporary output file: %s\n"), errstr); + d2u_fprintf(stderr, "%s: ", progname); + d2u_ansi_fprintf(stderr, _("Failed to open temporary output file: %s\n"), errstr); } RetVal = -1; } #if DEBUG - fprintf(stderr, "%s: ", progname); - fprintf(stderr, _("using %s as temporary file\n"), TempPath); + d2u_fprintf(stderr, "%s: ", progname); + d2u_fprintf(stderr, _("using %s as temporary file\n"), TempPath); #endif /* can open in file? */ @@ -860,7 +1366,8 @@ int ConvertNewFile(char *ipInFN, char *ipOutFN, CFlag *ipFlag, const char *progn if (InF == NULL) { ipFlag->error = errno; errstr = strerror(errno); - fprintf(stderr, "%s: %s: %s\n", progname, ipInFN, errstr); + d2u_fprintf(stderr, "%s: %s:", progname, ipInFN); + d2u_ansi_fprintf(stderr, " %s\n", errstr); RetVal = -1; } } @@ -873,7 +1380,8 @@ int ConvertNewFile(char *ipInFN, char *ipOutFN, CFlag *ipFlag, const char *progn if ((TempF=OpenOutFile(fd)) == NULL) { ipFlag->error = errno; errstr = strerror(errno); - fprintf(stderr, "%s: %s\n", progname, errstr); + d2u_fprintf(stderr, "%s:", progname); + d2u_ansi_fprintf(stderr, " %s\n", errstr); #endif fclose (InF); InF = NULL; @@ -913,8 +1421,9 @@ int ConvertNewFile(char *ipInFN, char *ipOutFN, CFlag *ipFlag, const char *progn if (ipFlag->verbose) { ipFlag->error = errno; errstr = strerror(errno); - fprintf(stderr, "%s: ", progname); - fprintf(stderr, _("Failed to write to temporary output file %s: %s\n"), TempPath, errstr); + d2u_fprintf(stderr, "%s: ", progname); + d2u_fprintf(stderr, _("Failed to write to temporary output file %s:"), TempPath); + d2u_ansi_fprintf(stderr, " %s\n", errstr); } RetVal = -1; } @@ -943,8 +1452,9 @@ int ConvertNewFile(char *ipInFN, char *ipOutFN, CFlag *ipFlag, const char *progn if (ipFlag->verbose) { ipFlag->error = errno; errstr = strerror(errno); - fprintf(stderr, "%s: ", progname); - fprintf(stderr, _("Failed to change the permissions of temporary output file %s: %s\n"), TempPath, errstr); + d2u_fprintf(stderr, "%s: ", progname); + d2u_fprintf(stderr, _("Failed to change the permissions of temporary output file %s:"), TempPath); + d2u_ansi_fprintf(stderr, " %s\n", errstr); } } } @@ -959,8 +1469,9 @@ int ConvertNewFile(char *ipInFN, char *ipOutFN, CFlag *ipFlag, const char *progn if (ipFlag->verbose) { ipFlag->error = errno; errstr = strerror(errno); - fprintf(stderr, "%s: ", progname); - fprintf(stderr, _("Failed to change the owner and group of temporary output file %s: %s\n"), TempPath, errstr); + d2u_fprintf(stderr, "%s: ", progname); + d2u_fprintf(stderr, _("Failed to change the owner and group of temporary output file %s:"), TempPath); + d2u_ansi_fprintf(stderr, " %s\n", errstr); } RetVal = -1; } @@ -976,7 +1487,8 @@ int ConvertNewFile(char *ipInFN, char *ipOutFN, CFlag *ipFlag, const char *progn if (ipFlag->verbose) { ipFlag->error = errno; errstr = strerror(errno); - fprintf(stderr, "%s: %s: %s\n", progname, TempPath, errstr); + d2u_fprintf(stderr, "%s: %s:", progname, TempPath); + d2u_ansi_fprintf(stderr, " %s\n", errstr); } RetVal = -1; } @@ -984,11 +1496,12 @@ int ConvertNewFile(char *ipInFN, char *ipOutFN, CFlag *ipFlag, const char *progn /* any error? cleanup the temp file */ if (RetVal && (TempPath != NULL)) { - if (unlink(TempPath) && (errno != ENOENT)) { + if (d2u_unlink(TempPath) && (errno != ENOENT)) { if (ipFlag->verbose) { ipFlag->error = errno; errstr = strerror(errno); - fprintf(stderr, "%s: %s: %s\n", progname, TempPath, errstr); + d2u_fprintf(stderr, "%s: %s:", progname, TempPath); + d2u_ansi_fprintf(stderr, " %s\n", errstr); } RetVal = -1; } @@ -1003,9 +1516,9 @@ int ConvertNewFile(char *ipInFN, char *ipOutFN, CFlag *ipFlag, const char *progn ResolveSymlinkResult = ResolveSymbolicLink(ipOutFN, &TargetFN, ipFlag, progname); if (ResolveSymlinkResult < 0) { if (ipFlag->verbose) { - fprintf(stderr, "%s: ", progname); - fprintf(stderr, _("problems resolving symbolic link '%s'\n"), ipOutFN); - fprintf(stderr, _(" output file remains in '%s'\n"), TempPath); + d2u_fprintf(stderr, "%s: ", progname); + d2u_fprintf(stderr, _("problems resolving symbolic link '%s'\n"), ipOutFN); + d2u_fprintf(stderr, _(" output file remains in '%s'\n"), TempPath); } RetVal = -1; } @@ -1015,26 +1528,29 @@ int ConvertNewFile(char *ipInFN, char *ipOutFN, CFlag *ipFlag, const char *progn /* can rename temporary file to output file? */ if (!RetVal) { #ifdef NEED_REMOVE - if (unlink(TargetFN) && (errno != ENOENT)) { + if (d2u_unlink(TargetFN) && (errno != ENOENT)) { if (ipFlag->verbose) { ipFlag->error = errno; errstr = strerror(errno); - fprintf(stderr, "%s: %s: %s\n", progname, TargetFN, errstr); + d2u_fprintf(stderr, "%s: %s:", progname, TargetFN); + d2u_ansi_fprintf(stderr, " %s\n", errstr); } RetVal = -1; } #endif - if (rename(TempPath, TargetFN) == -1) { + + if (d2u_rename(TempPath, TargetFN) != 0) { if (ipFlag->verbose) { ipFlag->error = errno; errstr = strerror(errno); - fprintf(stderr, "%s: ", progname); - fprintf(stderr, _("problems renaming '%s' to '%s': %s\n"), TempPath, TargetFN, errstr); + d2u_fprintf(stderr, "%s: ", progname); + d2u_fprintf(stderr, _("problems renaming '%s' to '%s':"), TempPath, TargetFN); + d2u_ansi_fprintf(stderr, " %s\n", errstr); #ifdef S_ISLNK if (ResolveSymlinkResult > 0) - fprintf(stderr, _(" which is the target of symbolic link '%s'\n"), ipOutFN); + d2u_fprintf(stderr, _(" which is the target of symbolic link '%s'\n"), ipOutFN); #endif - fprintf(stderr, _(" output file remains in '%s'\n"), TempPath); + d2u_fprintf(stderr, _(" output file remains in '%s'\n"), TempPath); } RetVal = -1; } @@ -1094,17 +1610,17 @@ int ConvertStdio(CFlag *ipFlag, const char *progname, void print_messages_stdio(const CFlag *pFlag, const char *progname) { if (pFlag->status & BINARY_FILE) { - fprintf(stderr,"%s: ",progname); - fprintf(stderr, _("Skipping binary file %s\n"), "stdin"); + d2u_fprintf(stderr,"%s: ",progname); + d2u_fprintf(stderr, _("Skipping binary file %s\n"), "stdin"); } else if (pFlag->status & WRONG_CODEPAGE) { - fprintf(stderr,"%s: ",progname); - fprintf(stderr, _("code page %d is not supported.\n"), pFlag->ConvMode); + d2u_fprintf(stderr,"%s: ",progname); + d2u_fprintf(stderr, _("code page %d is not supported.\n"), pFlag->ConvMode); } else if (pFlag->status & WCHAR_T_TOO_SMALL) { - fprintf(stderr,"%s: ",progname); - fprintf(stderr, _("Skipping UTF-16 file %s, the size of wchar_t is %d bytes.\n"), "stdin", (int)sizeof(wchar_t)); + d2u_fprintf(stderr,"%s: ",progname); + d2u_fprintf(stderr, _("Skipping UTF-16 file %s, the size of wchar_t is %d bytes.\n"), "stdin", (int)sizeof(wchar_t)); } else if (pFlag->status & UNICODE_CONVERSION_ERROR) { - fprintf(stderr,"%s: ",progname); - fprintf(stderr, _("Skipping UTF-16 file %s, an UTF-16 conversion error occurred.\n"), "stdin"); + d2u_fprintf(stderr,"%s: ",progname); + d2u_fprintf(stderr, _("Skipping UTF-16 file %s, an UTF-16 conversion error occurred on line %u.\n"), "stdin", pFlag->line_nr); } } @@ -1146,65 +1662,83 @@ void print_format(const CFlag *pFlag, char *informat, char *outformat, size_t li void print_messages_newfile(const CFlag *pFlag, const char *infile, const char *outfile, const char *progname, const int RetVal) { - char informat[10]; - char outformat[32]; + char informat[32]; + char outformat[64]; +# ifdef D2U_UNIFILE + wchar_t informatw[32]; + wchar_t outformatw[64]; +#endif print_format(pFlag, informat, outformat, sizeof(informat), sizeof(outformat)); +/* Change informat and outformat to UTF-8 for d2u_fprintf. */ +# ifdef D2U_UNIFILE + /* The format string is encoded in the system default + * Windows ANSI code page. May have been translated + * by gettext. Convert it to wide characters. */ + d2u_MultiByteToWideChar(CP_ACP,0, informat, -1, informatw, sizeof(informat)); + d2u_MultiByteToWideChar(CP_ACP,0, outformat, -1, outformatw, sizeof(outformat)); + /* then convert the format string to UTF-8 */ + d2u_WideCharToMultiByte(CP_UTF8, 0, informatw, -1, informat, sizeof(informat), NULL, NULL); + d2u_WideCharToMultiByte(CP_UTF8, 0, outformatw, -1, outformat, sizeof(outformat), NULL, NULL); +#endif + if (pFlag->status & NO_REGFILE) { - fprintf(stderr,"%s: ",progname); - fprintf(stderr, _("Skipping %s, not a regular file.\n"), infile); + d2u_fprintf(stderr,"%s: ",progname); + d2u_fprintf(stderr, _("Skipping %s, not a regular file.\n"), infile); } else if (pFlag->status & OUTPUTFILE_SYMLINK) { - fprintf(stderr,"%s: ",progname); - fprintf(stderr, _("Skipping %s, output file %s is a symbolic link.\n"), infile, outfile); + d2u_fprintf(stderr,"%s: ",progname); + d2u_fprintf(stderr, _("Skipping %s, output file %s is a symbolic link.\n"), infile, outfile); } else if (pFlag->status & INPUT_TARGET_NO_REGFILE) { - fprintf(stderr,"%s: ",progname); - fprintf(stderr, _("Skipping symbolic link %s, target is not a regular file.\n"), infile); + d2u_fprintf(stderr,"%s: ",progname); + d2u_fprintf(stderr, _("Skipping symbolic link %s, target is not a regular file.\n"), infile); } else if (pFlag->status & OUTPUT_TARGET_NO_REGFILE) { - fprintf(stderr,"%s: ",progname); - fprintf(stderr, _("Skipping %s, target of symbolic link %s is not a regular file.\n"), infile, outfile); + d2u_fprintf(stderr,"%s: ",progname); + d2u_fprintf(stderr, _("Skipping %s, target of symbolic link %s is not a regular file.\n"), infile, outfile); } else if (pFlag->status & BINARY_FILE) { - fprintf(stderr,"%s: ",progname); - fprintf(stderr, _("Skipping binary file %s\n"), infile); + d2u_fprintf(stderr,"%s: ",progname); + d2u_fprintf(stderr, _("Skipping binary file %s\n"), infile); } else if (pFlag->status & WRONG_CODEPAGE) { - fprintf(stderr,"%s: ",progname); - fprintf(stderr, _("code page %d is not supported.\n"), pFlag->ConvMode); + d2u_fprintf(stderr,"%s: ",progname); + d2u_fprintf(stderr, _("code page %d is not supported.\n"), pFlag->ConvMode); } else if (pFlag->status & WCHAR_T_TOO_SMALL) { - fprintf(stderr,"%s: ",progname); - fprintf(stderr, _("Skipping UTF-16 file %s, the size of wchar_t is %d bytes.\n"), infile, (int)sizeof(wchar_t)); + d2u_fprintf(stderr,"%s: ",progname); + d2u_fprintf(stderr, _("Skipping UTF-16 file %s, the size of wchar_t is %d bytes.\n"), infile, (int)sizeof(wchar_t)); } else if (pFlag->status & UNICODE_CONVERSION_ERROR) { - fprintf(stderr,"%s: ",progname); - fprintf(stderr, _("Skipping UTF-16 file %s, an UTF-16 conversion error occurred.\n"), infile); + d2u_fprintf(stderr,"%s: ",progname); + d2u_fprintf(stderr, _("Skipping UTF-16 file %s, an UTF-16 conversion error occurred on line %u.\n"), infile, pFlag->line_nr); } else { - fprintf(stderr,"%s: ",progname); + d2u_fprintf(stderr,"%s: ",progname); if (informat[0] == '\0') { - if (is_dos2unix(progname)) - fprintf(stderr, _("converting file %s to file %s in Unix format...\n"), infile, outfile); - else { - if (pFlag->FromToMode == FROMTO_UNIX2MAC) - fprintf(stderr, _("converting file %s to file %s in Mac format...\n"), infile, outfile); - else - fprintf(stderr, _("converting file %s to file %s in DOS format...\n"), infile, outfile); + if (is_dos2unix(progname)) { + if (!RetVal) d2u_fprintf(stderr, _("converting file %s to file %s in Unix format...\n"), infile, outfile); + } else { + if (pFlag->FromToMode == FROMTO_UNIX2MAC) { + if (!RetVal) d2u_fprintf(stderr, _("converting file %s to file %s in Mac format...\n"), infile, outfile); + } else { + if (!RetVal) d2u_fprintf(stderr, _("converting file %s to file %s in DOS format...\n"), infile, outfile); + } } } else { - if (is_dos2unix(progname)) + if (is_dos2unix(progname)) { /* TRANSLATORS: 1st %s is encoding of input file. 2nd %s is name of input file. 3rd %s is encoding of output file. 4th %s is name of output file. E.g.: converting UTF-16LE file in.txt to UTF-8 file out.txt in Unix format... */ - fprintf(stderr, _("converting %s file %s to %s file %s in Unix format...\n"), informat, infile, outformat, outfile); - else { - if (pFlag->FromToMode == FROMTO_UNIX2MAC) - fprintf(stderr, _("converting %s file %s to %s file %s in Mac format...\n"), informat, infile, outformat, outfile); - else - fprintf(stderr, _("converting %s file %s to %s file %s in DOS format...\n"), informat, infile, outformat, outfile); + if (!RetVal) d2u_fprintf(stderr, _("converting %s file %s to %s file %s in Unix format...\n"), informat, infile, outformat, outfile); + } else { + if (pFlag->FromToMode == FROMTO_UNIX2MAC) { + if (!RetVal) d2u_fprintf(stderr, _("converting %s file %s to %s file %s in Mac format...\n"), informat, infile, outformat, outfile); + } else { + if (!RetVal) d2u_fprintf(stderr, _("converting %s file %s to %s file %s in DOS format...\n"), informat, infile, outformat, outfile); + } } } if (RetVal) { - fprintf(stderr,"%s: ",progname); - fprintf(stderr, _("problems converting file %s to file %s\n"), infile, outfile); + d2u_fprintf(stderr,"%s: ",progname); + d2u_fprintf(stderr, _("problems converting file %s to file %s\n"), infile, outfile); } } } @@ -1213,59 +1747,77 @@ void print_messages_oldfile(const CFlag *pFlag, const char *infile, const char * { char informat[10]; char outformat[32]; +# ifdef D2U_UNIFILE + wchar_t informatw[32]; + wchar_t outformatw[64]; +#endif print_format(pFlag, informat, outformat, sizeof(informat), sizeof(outformat)); +/* Change informat and outformat to UTF-8 for d2u_fprintf. */ +# ifdef D2U_UNIFILE + /* The format string is encoded in the system default + * Windows ANSI code page. May have been translated + * by gettext. Convert it to wide characters. */ + d2u_MultiByteToWideChar(CP_ACP,0, informat, -1, informatw, sizeof(informat)); + d2u_MultiByteToWideChar(CP_ACP,0, outformat, -1, outformatw, sizeof(outformat)); + /* then convert the format string to UTF-8 */ + d2u_WideCharToMultiByte(CP_UTF8, 0, informatw, -1, informat, sizeof(informat), NULL, NULL); + d2u_WideCharToMultiByte(CP_UTF8, 0, outformatw, -1, outformat, sizeof(outformat), NULL, NULL); +#endif + if (pFlag->status & NO_REGFILE) { - fprintf(stderr,"%s: ",progname); - fprintf(stderr, _("Skipping %s, not a regular file.\n"), infile); + d2u_fprintf(stderr,"%s: ",progname); + d2u_fprintf(stderr, _("Skipping %s, not a regular file.\n"), infile); } else if (pFlag->status & OUTPUTFILE_SYMLINK) { - fprintf(stderr,"%s: ",progname); - fprintf(stderr, _("Skipping symbolic link %s.\n"), infile); + d2u_fprintf(stderr,"%s: ",progname); + d2u_fprintf(stderr, _("Skipping symbolic link %s.\n"), infile); } else if (pFlag->status & INPUT_TARGET_NO_REGFILE) { - fprintf(stderr,"%s: ",progname); - fprintf(stderr, _("Skipping symbolic link %s, target is not a regular file.\n"), infile); + d2u_fprintf(stderr,"%s: ",progname); + d2u_fprintf(stderr, _("Skipping symbolic link %s, target is not a regular file.\n"), infile); } else if (pFlag->status & BINARY_FILE) { - fprintf(stderr,"%s: ",progname); - fprintf(stderr, _("Skipping binary file %s\n"), infile); + d2u_fprintf(stderr,"%s: ",progname); + d2u_fprintf(stderr, _("Skipping binary file %s\n"), infile); } else if (pFlag->status & WRONG_CODEPAGE) { - fprintf(stderr,"%s: ",progname); - fprintf(stderr, _("code page %d is not supported.\n"), pFlag->ConvMode); + d2u_fprintf(stderr,"%s: ",progname); + d2u_fprintf(stderr, _("code page %d is not supported.\n"), pFlag->ConvMode); } else if (pFlag->status & WCHAR_T_TOO_SMALL) { - fprintf(stderr,"%s: ",progname); - fprintf(stderr, _("Skipping UTF-16 file %s, the size of wchar_t is %d bytes.\n"), infile, (int)sizeof(wchar_t)); + d2u_fprintf(stderr,"%s: ",progname); + d2u_fprintf(stderr, _("Skipping UTF-16 file %s, the size of wchar_t is %d bytes.\n"), infile, (int)sizeof(wchar_t)); } else if (pFlag->status & UNICODE_CONVERSION_ERROR) { - fprintf(stderr,"%s: ",progname); - fprintf(stderr, _("Skipping UTF-16 file %s, an UTF-16 conversion error occurred.\n"), infile); + d2u_fprintf(stderr,"%s: ",progname); + d2u_fprintf(stderr, _("Skipping UTF-16 file %s, an UTF-16 conversion error occurred on line %u.\n"), infile, pFlag->line_nr); } else { - fprintf(stderr,"%s: ",progname); + d2u_fprintf(stderr,"%s: ",progname); if (informat[0] == '\0') { - if (is_dos2unix(progname)) - fprintf(stderr, _("converting file %s to Unix format...\n"), infile); - else { - if (pFlag->FromToMode == FROMTO_UNIX2MAC) - fprintf(stderr, _("converting file %s to Mac format...\n"), infile); - else - fprintf(stderr, _("converting file %s to DOS format...\n"), infile); + if (is_dos2unix(progname)) { + if (!RetVal) d2u_fprintf(stderr, _("converting file %s to Unix format...\n"), infile); + } else { + if (pFlag->FromToMode == FROMTO_UNIX2MAC) { + if (!RetVal) d2u_fprintf(stderr, _("converting file %s to Mac format...\n"), infile); + } else { + if (!RetVal) d2u_fprintf(stderr, _("converting file %s to DOS format...\n"), infile); + } } } else { - if (is_dos2unix(progname)) + if (is_dos2unix(progname)) { /* TRANSLATORS: 1st %s is encoding of input file. 2nd %s is name of input file. 3rd %s is encoding of output (input file is overwritten). E.g.: converting UTF-16LE file foo.txt to UTF-8 Unix format... */ - fprintf(stderr, _("converting %s file %s to %s Unix format...\n"), informat, infile, outformat); - else { - if (pFlag->FromToMode == FROMTO_UNIX2MAC) - fprintf(stderr, _("converting %s file %s to %s Mac format...\n"), informat, infile, outformat); - else - fprintf(stderr, _("converting %s file %s to %s DOS format...\n"), informat, infile, outformat); + if (!RetVal) d2u_fprintf(stderr, _("converting %s file %s to %s Unix format...\n"), informat, infile, outformat); + } else { + if (pFlag->FromToMode == FROMTO_UNIX2MAC) { + if (!RetVal) d2u_fprintf(stderr, _("converting %s file %s to %s Mac format...\n"), informat, infile, outformat); + } else { + if (!RetVal) d2u_fprintf(stderr, _("converting %s file %s to %s DOS format...\n"), informat, infile, outformat); + } } } if (RetVal) { - fprintf(stderr,"%s: ",progname); - fprintf(stderr, _("problems converting file %s\n"), infile); + d2u_fprintf(stderr,"%s: ",progname); + d2u_fprintf(stderr, _("problems converting file %s\n"), infile); } } } @@ -1274,30 +1826,31 @@ void print_messages_info(const CFlag *pFlag, const char *infile, const char *pro { if (pFlag->status & NO_REGFILE) { if (pFlag->verbose) { - fprintf(stderr,"%s: ",progname); - fprintf(stderr, _("Skipping %s, not a regular file.\n"), infile); + d2u_fprintf(stderr,"%s: ",progname); + d2u_fprintf(stderr, _("Skipping %s, not a regular file.\n"), infile); } } else if (pFlag->status & INPUT_TARGET_NO_REGFILE) { if (pFlag->verbose) { - fprintf(stderr,"%s: ",progname); - fprintf(stderr, _("Skipping symbolic link %s, target is not a regular file.\n"), infile); + d2u_fprintf(stderr,"%s: ",progname); + d2u_fprintf(stderr, _("Skipping symbolic link %s, target is not a regular file.\n"), infile); } } else if (pFlag->status & WCHAR_T_TOO_SMALL) { if (pFlag->verbose) { - fprintf(stderr,"%s: ",progname); - fprintf(stderr, _("Skipping UTF-16 file %s, the size of wchar_t is %d bytes.\n"), infile, (int)sizeof(wchar_t)); + d2u_fprintf(stderr,"%s: ",progname); + d2u_fprintf(stderr, _("Skipping UTF-16 file %s, the size of wchar_t is %d bytes.\n"), infile, (int)sizeof(wchar_t)); } } } #ifdef D2U_UNICODE -void FileInfoW(FILE* ipInF, CFlag *ipFlag, const char *filename) +void FileInfoW(FILE* ipInF, CFlag *ipFlag, const char *filename, int bomtype, const char *progname) { wint_t TempChar; wint_t PreviousChar = 0; unsigned int lb_dos = 0; unsigned int lb_unix = 0; unsigned int lb_mac = 0; + char *errstr; ipFlag->status = 0; @@ -1322,8 +1875,18 @@ void FileInfoW(FILE* ipInF, CFlag *ipFlag, const char *filename) continue; } PreviousChar = TempChar; - lb_unix++; /* Unix line end (LF). Put CR */ + lb_unix++; /* Unix line end (LF). */ + } + } + if ((TempChar == WEOF) && ferror(ipInF)) { + ipFlag->error = errno; + if (ipFlag->verbose) { + errstr = strerror(errno); + d2u_fprintf(stderr, "%s: ", progname); + d2u_fprintf(stderr, _("can not read from input file %s:"), filename); + d2u_ansi_fprintf(stderr, " %s\n", errstr); } + return; } if (ipFlag->file_info & INFO_CONVERT) { @@ -1338,30 +1901,32 @@ void FileInfoW(FILE* ipInF, CFlag *ipFlag, const char *filename) } if (ipFlag->file_info & INFO_DOS) - printf(" %6u", lb_dos); + d2u_fprintf(stdout, " %6u", lb_dos); if (ipFlag->file_info & INFO_UNIX) - printf(" %6u", lb_unix); + d2u_fprintf(stdout, " %6u", lb_unix); if (ipFlag->file_info & INFO_MAC) - printf(" %6u", lb_mac); + d2u_fprintf(stdout, " %6u", lb_mac); if (ipFlag->file_info & INFO_BOM) - print_bom_info(ipFlag->bomtype); + print_bom_info(bomtype); if (ipFlag->file_info & INFO_TEXT) { if (ipFlag->status & BINARY_FILE) - printf(" binary"); + d2u_fprintf(stdout, " binary"); else - printf(" text "); + d2u_fprintf(stdout, " text "); } - printf(" %s\n",filename); + d2u_fprintf(stdout, " %s",filename); + d2u_fprintf(stdout, "\n"); } #endif -void FileInfo(FILE* ipInF, CFlag *ipFlag, const char *filename) +void FileInfo(FILE* ipInF, CFlag *ipFlag, const char *filename, int bomtype, const char *progname) { int TempChar; int PreviousChar = 0; unsigned int lb_dos = 0; unsigned int lb_unix = 0; unsigned int lb_mac = 0; + char *errstr; ipFlag->status = 0; @@ -1387,9 +1952,19 @@ void FileInfo(FILE* ipInF, CFlag *ipFlag, const char *filename) continue; } PreviousChar = TempChar; - lb_unix++; /* Unix line end (LF). Put CR */ + lb_unix++; /* Unix line end (LF). */ } } + if ((TempChar == EOF) && ferror(ipInF)) { + ipFlag->error = errno; + if (ipFlag->verbose) { + errstr = strerror(errno); + d2u_fprintf(stderr, "%s: ", progname); + d2u_fprintf(stderr, _("can not read from input file %s:"), filename); + d2u_ansi_fprintf(stderr, " %s\n", errstr); + } + return; + } if (ipFlag->file_info & INFO_CONVERT) { @@ -1404,20 +1979,21 @@ void FileInfo(FILE* ipInF, CFlag *ipFlag, const char *filename) } if (ipFlag->file_info & INFO_DOS) - printf(" %6u", lb_dos); + d2u_fprintf(stdout, " %6u", lb_dos); if (ipFlag->file_info & INFO_UNIX) - printf(" %6u", lb_unix); + d2u_fprintf(stdout, " %6u", lb_unix); if (ipFlag->file_info & INFO_MAC) - printf(" %6u", lb_mac); + d2u_fprintf(stdout, " %6u", lb_mac); if (ipFlag->file_info & INFO_BOM) - print_bom_info(ipFlag->bomtype); + print_bom_info(bomtype); if (ipFlag->file_info & INFO_TEXT) { if (ipFlag->status & BINARY_FILE) - printf(" binary"); + d2u_fprintf(stdout, " binary"); else - printf(" text "); + d2u_fprintf(stdout, " text "); } - printf(" %s\n",filename); + d2u_fprintf(stdout, " %s",filename); + d2u_fprintf(stdout, "\n"); } int GetFileInfo(char *ipInFN, CFlag *ipFlag, const char *progname) @@ -1425,7 +2001,7 @@ int GetFileInfo(char *ipInFN, CFlag *ipFlag, const char *progname) int RetVal = 0; FILE *InF = NULL; char *errstr; - int bomtype_orig = FILE_MBS; + int bomtype_orig = FILE_MBS; /* messages must print the real bomtype, not the assumed bomtype */ ipFlag->status = 0 ; @@ -1449,12 +2025,13 @@ int GetFileInfo(char *ipInFN, CFlag *ipFlag, const char *progname) if (InF == NULL) { ipFlag->error = errno; errstr = strerror(errno); - fprintf(stderr, "%s: %s: %s\n", progname, ipInFN, errstr); + d2u_fprintf(stderr, "%s: %s: ", progname, ipInFN); + d2u_ansi_fprintf(stderr, "%s\n", errstr); RetVal = -1; } - if (!RetVal) + if (!RetVal) if (check_unicode_info(InF, ipFlag, progname, &bomtype_orig)) RetVal = -1; @@ -1462,16 +2039,15 @@ int GetFileInfo(char *ipInFN, CFlag *ipFlag, const char *progname) #ifdef D2U_UNICODE if (!RetVal) { if ((ipFlag->bomtype == FILE_UTF16LE) || (ipFlag->bomtype == FILE_UTF16BE)) { - FileInfoW(InF, ipFlag, ipInFN); + FileInfoW(InF, ipFlag, ipInFN, bomtype_orig, progname); } else { - FileInfo(InF, ipFlag, ipInFN); + FileInfo(InF, ipFlag, ipInFN, bomtype_orig, progname); } } #else if (!RetVal) - FileInfo(InF, ipFlag, ipInFN); + FileInfo(InF, ipFlag, ipInFN, bomtype_orig, progname); #endif - ipFlag->bomtype = bomtype_orig; /* messages must print the real bomtype, not the assumed bomtype */ /* can close in file? */ if ((InF) && (fclose(InF) == EOF)) @@ -1483,7 +2059,7 @@ int GetFileInfo(char *ipInFN, CFlag *ipFlag, const char *progname) int GetFileInfoStdio(CFlag *ipFlag, const char *progname) { int RetVal = 0; - int bomtype_orig = FILE_MBS; + int bomtype_orig = FILE_MBS; /* messages must print the real bomtype, not the assumed bomtype */ ipFlag->status = 0 ; @@ -1509,16 +2085,15 @@ int GetFileInfoStdio(CFlag *ipFlag, const char *progname) #ifdef D2U_UNICODE if (!RetVal) { if ((ipFlag->bomtype == FILE_UTF16LE) || (ipFlag->bomtype == FILE_UTF16BE)) { - FileInfoW(stdin, ipFlag, ""); + FileInfoW(stdin, ipFlag, "", bomtype_orig, progname); } else { - FileInfo(stdin, ipFlag, ""); + FileInfo(stdin, ipFlag, "", bomtype_orig, progname); } } #else if (!RetVal) - FileInfo(stdin, ipFlag, ""); + FileInfo(stdin, ipFlag, "", bomtype_orig, progname); #endif - ipFlag->bomtype = bomtype_orig; /* messages must print the real bomtype, not the assumed bomtype */ return RetVal; } @@ -1557,8 +2132,8 @@ void get_info_options(char *option, CFlag *pFlag, const char *progname) default: /* Terminate the program on a wrong option. If pFlag->file_info is zero and the program goes on, it may do unwanted conversions. */ - fprintf(stderr,"%s: ",progname); - fprintf(stderr,_("wrong flag '%c' for option -i or --info\n"), *ptr); + d2u_fprintf(stderr,"%s: ",progname); + d2u_fprintf(stderr,_("wrong flag '%c' for option -i or --info\n"), *ptr); exit(1); ; } @@ -1566,7 +2141,8 @@ void get_info_options(char *option, CFlag *pFlag, const char *progname) } } -int parse_options(int argc, char *argv[], CFlag *pFlag, const char *localedir, const char *progname, +int parse_options(int argc, char *argv[], + CFlag *pFlag, const char *localedir, const char *progname, void (*PrintLicense)(void), int (*Convert)(FILE*, FILE*, CFlag *, const char *) #ifdef D2U_UNICODE @@ -1579,6 +2155,9 @@ int parse_options(int argc, char *argv[], CFlag *pFlag, const char *localedir, c int CanSwitchFileMode = 1; int process_options = 1; int RetVal = 0; +#ifdef D2U_UNIFILE + char *ptr; +#endif /* variable initialisations */ pFlag->NewFile = 0; @@ -1597,6 +2176,18 @@ int parse_options(int argc, char *argv[], CFlag *pFlag, const char *localedir, c pFlag->file_info = 0; pFlag->locale_target = TARGET_UTF8; +#ifdef D2U_UNIFILE + ptr = getenv("DOS2UNIX_DISPLAY_ENC"); + if (ptr != NULL) { + if (strncmp(ptr, "ansi", sizeof("ansi")) == 0) + d2u_display_encoding = D2U_DISPLAY_ANSI; + else if (strncmp(ptr, "unicode", sizeof("unicode")) == 0) + d2u_display_encoding = D2U_DISPLAY_UNICODE; + else if (strncmp(ptr, "utf8", sizeof("utf8")) == 0) + d2u_display_encoding = D2U_DISPLAY_UTF8; + } +#endif + while ((++ArgIdx < argc) && (!ShouldExit)) { /* is it an option? */ @@ -1666,8 +2257,8 @@ int parse_options(int argc, char *argv[], CFlag *pFlag, const char *localedir, c else if (strcmp(argv[ArgIdx],"-iso") == 0) { pFlag->ConvMode = (int)query_con_codepage(); if (pFlag->verbose) { - fprintf(stderr,"%s: ",progname); - fprintf(stderr,_("active code page: %d\n"), pFlag->ConvMode); + d2u_fprintf(stderr,"%s: ",progname); + d2u_fprintf(stderr,_("active code page: %d\n"), pFlag->ConvMode); } if (pFlag->ConvMode < 2) pFlag->ConvMode = CONVMODE_437; @@ -1703,8 +2294,8 @@ int parse_options(int argc, char *argv[], CFlag *pFlag, const char *localedir, c else if (strcmpi(argv[ArgIdx], "iso") == 0) { pFlag->ConvMode = (int)query_con_codepage(); if (pFlag->verbose) { - fprintf(stderr,"%s: ",progname); - fprintf(stderr,_("active code page: %d\n"), pFlag->ConvMode); + d2u_fprintf(stderr,"%s: ",progname); + d2u_fprintf(stderr,_("active code page: %d\n"), pFlag->ConvMode); } if (pFlag->ConvMode < 2) pFlag->ConvMode = CONVMODE_437; @@ -1715,27 +2306,54 @@ int parse_options(int argc, char *argv[], CFlag *pFlag, const char *localedir, c else pFlag->FromToMode = FROMTO_UNIX2MAC; } else { - fprintf(stderr,"%s: ",progname); - fprintf(stderr, _("invalid %s conversion mode specified\n"),argv[ArgIdx]); + d2u_fprintf(stderr,"%s: ",progname); + d2u_fprintf(stderr, _("invalid %s conversion mode specified\n"),argv[ArgIdx]); + pFlag->error = 1; + ShouldExit = 1; + pFlag->stdio_mode = 0; + } + } else { + ArgIdx--; + d2u_fprintf(stderr,"%s: ",progname); + d2u_fprintf(stderr,_("option '%s' requires an argument\n"),argv[ArgIdx]); + pFlag->error = 1; + ShouldExit = 1; + pFlag->stdio_mode = 0; + } + } + +#ifdef D2U_UNIFILE + else if ((strcmp(argv[ArgIdx],"-D") == 0) || (strcmp(argv[ArgIdx],"--display-enc") == 0)) { + if (++ArgIdx < argc) { + if (strcmpi(argv[ArgIdx],"ansi") == 0) + d2u_display_encoding = D2U_DISPLAY_ANSI; + else if (strcmpi(argv[ArgIdx], "unicode") == 0) + d2u_display_encoding = D2U_DISPLAY_UNICODE; + else if (strcmpi(argv[ArgIdx], "utf8") == 0) { + d2u_display_encoding = D2U_DISPLAY_UTF8; + } else { + d2u_fprintf(stderr,"%s: ",progname); + d2u_fprintf(stderr, _("invalid %s display encoding specified\n"),argv[ArgIdx]); pFlag->error = 1; ShouldExit = 1; pFlag->stdio_mode = 0; } } else { ArgIdx--; - fprintf(stderr,"%s: ",progname); - fprintf(stderr,_("option '%s' requires an argument\n"),argv[ArgIdx]); + d2u_fprintf(stderr,"%s: ",progname); + d2u_fprintf(stderr,_("option '%s' requires an argument\n"),argv[ArgIdx]); pFlag->error = 1; ShouldExit = 1; pFlag->stdio_mode = 0; } } +#endif else if ((strcmp(argv[ArgIdx],"-o") == 0) || (strcmp(argv[ArgIdx],"--oldfile") == 0)) { /* last convert not paired */ if (!CanSwitchFileMode) { - fprintf(stderr,"%s: ",progname); - fprintf(stderr, _("target of file %s not specified in new-file mode\n"), argv[ArgIdx-1]); + d2u_fprintf(stderr,"%s: ",progname); + d2u_fprintf(stderr, _("target of file %s not specified in new-file mode\n"), argv[ArgIdx-1]); pFlag->error = 1; ShouldExit = 1; pFlag->stdio_mode = 0; @@ -1747,8 +2365,8 @@ int parse_options(int argc, char *argv[], CFlag *pFlag, const char *localedir, c else if ((strcmp(argv[ArgIdx],"-n") == 0) || (strcmp(argv[ArgIdx],"--newfile") == 0)) { /* last convert not paired */ if (!CanSwitchFileMode) { - fprintf(stderr,"%s: ",progname); - fprintf(stderr, _("target of file %s not specified in new-file mode\n"), argv[ArgIdx-1]); + d2u_fprintf(stderr,"%s: ",progname); + d2u_fprintf(stderr, _("target of file %s not specified in new-file mode\n"), argv[ArgIdx-1]); pFlag->error = 1; ShouldExit = 1; pFlag->stdio_mode = 0; @@ -1796,7 +2414,7 @@ int parse_options(int argc, char *argv[], CFlag *pFlag, const char *localedir, c } /* no file argument, use stdin and stdout */ - if (pFlag->stdio_mode) { + if ( (argc > 0) && pFlag->stdio_mode) { if (pFlag->file_info) { RetVal = GetFileInfoStdio(pFlag, progname); print_messages_info(pFlag, "stdin", progname); @@ -1813,14 +2431,52 @@ int parse_options(int argc, char *argv[], CFlag *pFlag, const char *localedir, c } if (!CanSwitchFileMode) { - fprintf(stderr,"%s: ",progname); - fprintf(stderr, _("target of file %s not specified in new-file mode\n"), argv[ArgIdx-1]); + d2u_fprintf(stderr,"%s: ",progname); + d2u_fprintf(stderr, _("target of file %s not specified in new-file mode\n"), argv[ArgIdx-1]); pFlag->error = 1; } return pFlag->error; } +void d2u_getc_error(CFlag *ipFlag, const char *progname) +{ + char *errstr; + + ipFlag->error = errno; + if (ipFlag->verbose) { + errstr = strerror(errno); + d2u_fprintf(stderr, "%s: ", progname); + d2u_ansi_fprintf(stderr, _("can not read from input file: %s\n"), errstr); + } +} + +void d2u_putc_error(CFlag *ipFlag, const char *progname) +{ + char *errstr; + + ipFlag->error = errno; + if (ipFlag->verbose) { + errstr = strerror(errno); + d2u_fprintf(stderr, "%s: ", progname); + d2u_ansi_fprintf(stderr, _("can not write to output file: %s\n"), errstr); + } +} + #ifdef D2U_UNICODE +void d2u_putwc_error(CFlag *ipFlag, const char *progname) +{ + char *errstr; + + if (!(ipFlag->status & UNICODE_CONVERSION_ERROR)) { + ipFlag->error = errno; + if (ipFlag->verbose) { + errstr = strerror(errno); + d2u_fprintf(stderr, "%s: ", progname); + d2u_ansi_fprintf(stderr, _("can not write to output file: %s\n"), errstr); + } + } +} + wint_t d2u_getwc(FILE *f, int bomtype) { int c_trail, c_lead; @@ -1860,13 +2516,18 @@ wint_t d2u_ungetwc(wint_t wc, FILE *f, int bomtype) } /* Put wide character */ -wint_t d2u_putwc(wint_t wc, FILE *f, CFlag *ipFlag) +wint_t d2u_putwc(wint_t wc, FILE *f, CFlag *ipFlag, const char *progname) { static char mbs[8]; - static wchar_t lead, trail; + static wchar_t lead=0x01, trail; /* lead get's invalid value */ static wchar_t wstr[3]; size_t i,len; int c_trail, c_lead; +#if (defined(_WIN32) && !defined(__CYGWIN__)) + DWORD dwFlags; +#else + char *errstr; +#endif if (ipFlag->keep_utf16) { if (ipFlag->bomtype == FILE_UTF16LE) { /* UTF16 little endian */ @@ -1883,12 +2544,30 @@ wint_t d2u_putwc(wint_t wc, FILE *f, CFlag *ipFlag) return wc; } - if ((wc >= 0xd800) && (wc < 0xdc00)) { + /* Note: In the new Unicode standard lead is named "high", and trail is name "low". */ + + /* check for lead without a trail */ + if ((lead >= 0xd800) && (lead < 0xdc00) && ((wc < 0xdc00) || (wc >= 0xe000))) { + d2u_fprintf(stderr, "%s: ", progname); + d2u_fprintf(stderr, _("error: Invalid surrogate pair. Missing low surrogate.\n")); + ipFlag->status |= UNICODE_CONVERSION_ERROR ; + return(WEOF); + } + + if ((wc >= 0xd800) && (wc < 0xdc00)) { /* Surrogate lead */ /* fprintf(stderr, "UTF-16 lead %x\n",wc); */ lead = (wchar_t)wc; /* lead (high) surrogate */ return(wc); } - if ((wc >= 0xdc00) && (wc < 0xe000)) { + if ((wc >= 0xdc00) && (wc < 0xe000)) { /* Surrogate trail */ + + /* check for trail without a lead */ + if ((lead < 0xd800) || (lead >= 0xdc00)) { + d2u_fprintf(stderr, "%s: ", progname); + d2u_fprintf(stderr, _("error: Invalid surrogate pair. Missing high surrogate.\n")); + ipFlag->status |= UNICODE_CONVERSION_ERROR ; + return(WEOF); + } /* fprintf(stderr, "UTF-16 trail %x\n",wc); */ trail = (wchar_t)wc; /* trail (low) surrogate */ #if defined(_WIN32) || defined(__CYGWIN__) @@ -1898,6 +2577,7 @@ wint_t d2u_putwc(wint_t wc, FILE *f, CFlag *ipFlag) wstr[0] = lead; wstr[1] = trail; wstr[2] = L'\0'; + lead = 0x01; /* make lead invalid */ #else /* On Unix wchar_t is 32 bit */ /* When we don't decode the UTF-16 surrogate pair, wcstombs() does not @@ -1924,6 +2604,7 @@ wint_t d2u_putwc(wint_t wc, FILE *f, CFlag *ipFlag) wstr[0] += (lead & 0x03FF) << 10; wstr[0] += (trail & 0x03FF); wstr[1] = L'\0'; + lead = 0x01; /* make lead invalid */ /* fprintf(stderr, "UTF-32 %x\n",wstr[0]); */ #endif } else { @@ -1931,12 +2612,25 @@ wint_t d2u_putwc(wint_t wc, FILE *f, CFlag *ipFlag) wstr[1] = L'\0'; } + if (wc == 0x0000) { + if (fputc(0, f) == EOF) + return(WEOF); + return(wc); + } + #if (defined(_WIN32) && !defined(__CYGWIN__)) +/* The WC_ERR_INVALID_CHARS flag is available since Windows Vista (0x0600). It enables checking for + invalid input characters. */ +#if WINVER >= 0x0600 + dwFlags = WC_ERR_INVALID_CHARS; +#else + dwFlags = 0; +#endif /* On Windows we convert UTF-16 always to UTF-8 or GB18030 */ if (ipFlag->locale_target == TARGET_GB18030) { - len = (size_t)(WideCharToMultiByte(54936, 0, wstr, -1, mbs, sizeof(mbs), NULL, NULL) -1); + len = (size_t)(WideCharToMultiByte(54936, dwFlags, wstr, -1, mbs, sizeof(mbs), NULL, NULL) -1); } else { - len = (size_t)(WideCharToMultiByte(CP_UTF8, 0, wstr, -1, mbs, sizeof(mbs), NULL, NULL) -1); + len = (size_t)(WideCharToMultiByte(CP_UTF8, dwFlags, wstr, -1, mbs, sizeof(mbs), NULL, NULL) -1); } #else /* On Unix we convert UTF-16 to the locale encoding */ @@ -1946,6 +2640,14 @@ wint_t d2u_putwc(wint_t wc, FILE *f, CFlag *ipFlag) if ( len == (size_t)(-1) ) { /* Stop when there is a conversion error */ + /* On Windows we convert UTF-16 always to UTF-8 or GB18030 */ +#if (defined(_WIN32) && !defined(__CYGWIN__)) + d2u_PrintLastError(progname); +#else + errstr = strerror(errno); + d2u_fprintf(stderr, "%s:", progname); + d2u_ansi_fprintf(stderr, " %s\n", errstr); +#endif ipFlag->status |= UNICODE_CONVERSION_ERROR ; return(WEOF); } else { |