summaryrefslogtreecommitdiff
path: root/common.c
diff options
context:
space:
mode:
Diffstat (limited to 'common.c')
-rw-r--r--common.c406
1 files changed, 390 insertions, 16 deletions
diff --git a/common.c b/common.c
index 5378bcb..7f5020a 100644
--- a/common.c
+++ b/common.c
@@ -235,6 +235,8 @@ void PrintUsage(const char *progname)
convmode ascii, 7bit, iso, mac, default to ascii\n"));
printf(_(" -f, --force force conversion of binary files\n"));
printf(_(" -h, --help display this help text\n"));
+ printf(_(" -i, --info[=FLAGS] display file information\n\
+ file ... files to analyze\n"));
printf(_(" -k, --keepdate keep output file date\n"));
printf(_(" -L, --license display software license\n"));
printf(_(" -l, --newline add additional newline\n"));
@@ -303,6 +305,8 @@ void PrintVersion(const char *progname, const char *localedir)
printf("%s", _("OS/2 version (WATCOMC).\n"));
#elif defined (__OS2__) && defined(__EMX__) /* OS/2 Warp */
printf("%s", _("OS/2 version (EMX).\n"));
+#elif defined(__OS)
+ printf(_("%s version.\n"), __OS);
#endif
#ifdef D2U_UNICODE
printf("%s", _("With Unicode UTF-16 support.\n"));
@@ -608,6 +612,64 @@ void print_bom (const int bomtype, const char *filename, const char *progname)
}
}
+void print_bom_info (const int bomtype)
+{
+ switch (bomtype) {
+ case FILE_UTF16LE: /* UTF-16 Little Endian */
+ printf(" UTF-16LE");
+ break;
+ case FILE_UTF16BE: /* UTF-16 Big Endian */
+ printf(" UTF-16BE");
+ break;
+ case FILE_UTF8: /* UTF-8 */
+ printf(" UTF-8 ");
+ break;
+ default:
+ printf(" no_bom ");
+ ;
+ }
+}
+
+int check_unicode_info(FILE *InF, CFlag *ipFlag, const char *progname, int *bomtype_orig)
+{
+ int RetVal = 0;
+
+#ifdef D2U_UNICODE
+ if (ipFlag->verbose > 1) {
+ if (ipFlag->ConvMode == CONVMODE_UTF16LE) {
+ fprintf(stderr, "%s: ", progname);
+ fprintf(stderr, _("Assuming UTF-16LE encoding.\n") );
+ }
+ if (ipFlag->ConvMode == CONVMODE_UTF16BE) {
+ fprintf(stderr, "%s: ", progname);
+ fprintf(stderr, _("Assuming UTF-16BE encoding.\n") );
+ }
+ }
+#endif
+ InF = read_bom(InF, &ipFlag->bomtype);
+ *bomtype_orig = ipFlag->bomtype;
+#ifdef D2U_UNICODE
+ if ((ipFlag->bomtype == FILE_MBS) && (ipFlag->ConvMode == CONVMODE_UTF16LE))
+ ipFlag->bomtype = FILE_UTF16LE;
+ if ((ipFlag->bomtype == FILE_MBS) && (ipFlag->ConvMode == CONVMODE_UTF16BE))
+ ipFlag->bomtype = FILE_UTF16BE;
+
+
+#if !defined(_WIN32) && !defined(__CYGWIN__) /* Not Windows or Cygwin */
+ if (!ipFlag->keep_utf16 && ((ipFlag->bomtype == FILE_UTF16LE) || (ipFlag->bomtype == FILE_UTF16BE))) {
+ if (sizeof(wchar_t) < 4) {
+ /* A decoded UTF-16 surrogate pair must fit in a wchar_t */
+ ipFlag->status |= WCHAR_T_TOO_SMALL ;
+ if (!ipFlag->error) ipFlag->error = 1;
+ RetVal = -1;
+ }
+ }
+#endif
+#endif
+
+ return RetVal;
+}
+
int check_unicode(FILE *InF, FILE *TempF, CFlag *ipFlag, const char *ipInFN, const char *progname)
{
int RetVal = 0;
@@ -632,9 +694,8 @@ int check_unicode(FILE *InF, FILE *TempF, CFlag *ipFlag, const char *ipInFN, co
ipFlag->bomtype = FILE_UTF16LE;
if ((ipFlag->bomtype == FILE_MBS) && (ipFlag->ConvMode == CONVMODE_UTF16BE))
ipFlag->bomtype = FILE_UTF16BE;
-#endif
-#ifdef D2U_UNICODE
+
#if !defined(__MSDOS__) && !defined(_WIN32) && !defined(__OS2__) /* Unix, Cygwin */
if (!ipFlag->keep_utf16 && ((ipFlag->bomtype == FILE_UTF16LE) || (ipFlag->bomtype == FILE_UTF16BE))) {
if (strcmp(nl_langinfo(CODESET), "UTF-8") != 0) {
@@ -977,7 +1038,7 @@ int ConvertStdio(CFlag *ipFlag, const char *progname,
#endif
if (check_unicode(stdin, stdout, ipFlag, "stdin", progname))
- return 1;
+ return -1;
#ifdef D2U_UNICODE
if ((ipFlag->bomtype == FILE_UTF16LE) || (ipFlag->bomtype == FILE_UTF16BE)) {
@@ -1099,6 +1160,302 @@ void print_messages_oldfile(const CFlag *pFlag, const char *infile, const char *
}
}
+void print_messages_info(const CFlag *pFlag, const char *infile, const char *progname)
+{
+ if (pFlag->status & NO_REGFILE) {
+ if (pFlag->verbose) {
+ fprintf(stderr,"%s: ",progname);
+ fprintf(stderr, _("Skipping %s, not a regular file.\n"), infile);
+ }
+ } else if (pFlag->status & INPUT_TARGET_NO_REGFILE) {
+ if (pFlag->verbose) {
+ fprintf(stderr,"%s: ",progname);
+ fprintf(stderr, _("Skipping symbolic link %s, target is not a regular file.\n"), infile);
+ }
+ } else if (pFlag->status & WCHAR_T_TOO_SMALL) {
+ if (pFlag->verbose) {
+ fprintf(stderr,"%s: ",progname);
+ fprintf(stderr, _("Skipping UTF-16 file %s, the size of wchar_t is %d bytes.\n"), infile, (int)sizeof(wchar_t));
+ }
+ }
+}
+
+#ifdef D2U_UNICODE
+void FileInfoW(FILE* ipInF, CFlag *ipFlag, const char *filename)
+{
+ wint_t TempChar;
+ wint_t PreviousChar = 0;
+ unsigned int lb_dos = 0;
+ unsigned int lb_unix = 0;
+ unsigned int lb_mac = 0;
+
+ ipFlag->status = 0;
+
+ while ((TempChar = d2u_getwc(ipInF, ipFlag->bomtype)) != WEOF) {
+ if ( (TempChar < 32) &&
+ (TempChar != 0x0a) && /* Not an LF */
+ (TempChar != 0x0d) && /* Not a CR */
+ (TempChar != 0x09) && /* Not a TAB */
+ (TempChar != 0x0c)) { /* Not a form feed */
+ ipFlag->status |= BINARY_FILE ;
+ }
+ if (TempChar != 0x0a) { /* Not an LF */
+ PreviousChar = TempChar;
+ if (TempChar == 0x0d) /* CR */
+ lb_mac++;
+ } else{
+ /* TempChar is an LF */
+ if ( PreviousChar == 0x0d ) { /* CR,LF pair. */
+ lb_dos++;
+ lb_mac--;
+ PreviousChar = TempChar;
+ continue;
+ }
+ PreviousChar = TempChar;
+ lb_unix++; /* Unix line end (LF). Put CR */
+ }
+ }
+
+ if (ipFlag->file_info & INFO_CONVERT) {
+ if ((ipFlag->FromToMode == FROMTO_DOS2UNIX) && (lb_dos == 0))
+ return;
+ if (((ipFlag->FromToMode == FROMTO_UNIX2DOS)||(ipFlag->FromToMode == FROMTO_UNIX2MAC)) && (lb_unix == 0))
+ return;
+ if ((ipFlag->FromToMode == FROMTO_MAC2UNIX) && (lb_mac == 0))
+ return;
+ if ((ipFlag->Force == 0) && (ipFlag->status & BINARY_FILE))
+ return;
+ }
+
+ if (ipFlag->file_info & INFO_DOS)
+ printf(" %6d", lb_dos);
+ if (ipFlag->file_info & INFO_UNIX)
+ printf(" %6d", lb_unix);
+ if (ipFlag->file_info & INFO_MAC)
+ printf(" %6d", lb_mac);
+ if (ipFlag->file_info & INFO_BOM)
+ print_bom_info(ipFlag->bomtype);
+ if (ipFlag->file_info & INFO_TEXT) {
+ if (ipFlag->status & BINARY_FILE)
+ printf(" binary");
+ else
+ printf(" text ");
+ }
+ printf(" %s\n",filename);
+}
+#endif
+
+void FileInfo(FILE* ipInF, CFlag *ipFlag, const char *filename)
+{
+ int TempChar;
+ int PreviousChar = 0;
+ unsigned int lb_dos = 0;
+ unsigned int lb_unix = 0;
+ unsigned int lb_mac = 0;
+
+
+ ipFlag->status = 0;
+
+ while ((TempChar = fgetc(ipInF)) != EOF) {
+ if ( (TempChar < 32) &&
+ (TempChar != '\x0a') && /* Not an LF */
+ (TempChar != '\x0d') && /* Not a CR */
+ (TempChar != '\x09') && /* Not a TAB */
+ (TempChar != '\x0c')) { /* Not a form feed */
+ ipFlag->status |= BINARY_FILE ;
+ }
+ if (TempChar != '\x0a') { /* Not an LF */
+ PreviousChar = TempChar;
+ if (TempChar == '\x0d') /* CR */
+ lb_mac++;
+ } else {
+ /* TempChar is an LF */
+ if ( PreviousChar == '\x0d' ) { /* CR,LF pair. */
+ lb_dos++;
+ lb_mac--;
+ PreviousChar = TempChar;
+ continue;
+ }
+ PreviousChar = TempChar;
+ lb_unix++; /* Unix line end (LF). Put CR */
+ }
+ }
+
+
+ if (ipFlag->file_info & INFO_CONVERT) {
+ if ((ipFlag->FromToMode == FROMTO_DOS2UNIX) && (lb_dos == 0))
+ return;
+ if (((ipFlag->FromToMode == FROMTO_UNIX2DOS)||(ipFlag->FromToMode == FROMTO_UNIX2MAC)) && (lb_unix == 0))
+ return;
+ if ((ipFlag->FromToMode == FROMTO_MAC2UNIX) && (lb_mac == 0))
+ return;
+ if ((ipFlag->Force == 0) && (ipFlag->status & BINARY_FILE))
+ return;
+ }
+
+ if (ipFlag->file_info & INFO_DOS)
+ printf(" %6d", lb_dos);
+ if (ipFlag->file_info & INFO_UNIX)
+ printf(" %6d", lb_unix);
+ if (ipFlag->file_info & INFO_MAC)
+ printf(" %6d", lb_mac);
+ if (ipFlag->file_info & INFO_BOM)
+ print_bom_info(ipFlag->bomtype);
+ if (ipFlag->file_info & INFO_TEXT) {
+ if (ipFlag->status & BINARY_FILE)
+ printf(" binary");
+ else
+ printf(" text ");
+ }
+ printf(" %s\n",filename);
+}
+
+int GetFileInfo(char *ipInFN, CFlag *ipFlag, const char *progname)
+{
+ int RetVal = 0;
+ FILE *InF = NULL;
+ char *errstr;
+ int bomtype_orig = FILE_MBS;
+
+ ipFlag->status = 0 ;
+
+ /* Test if input file is a regular file or symbolic link */
+ if (regfile(ipInFN, 1, ipFlag, progname)) {
+ ipFlag->status |= NO_REGFILE ;
+ /* Not a failure, skipping non-regular input file according spec. */
+ return -1;
+ }
+
+ /* Test if input file target is a regular file */
+ if (symbolic_link(ipInFN) && regfile_target(ipInFN, ipFlag,progname)) {
+ ipFlag->status |= INPUT_TARGET_NO_REGFILE ;
+ /* Not a failure, skipping non-regular input file according spec. */
+ return -1;
+ }
+
+
+ /* can open in file? */
+ InF=OpenInFile(ipInFN);
+ if (InF == NULL) {
+ ipFlag->error = errno;
+ errstr = strerror(errno);
+ fprintf(stderr, "%s: %s: %s\n", progname, ipInFN, errstr);
+ RetVal = -1;
+ }
+
+
+ if (!RetVal)
+ if (check_unicode_info(InF, ipFlag, progname, &bomtype_orig))
+ RetVal = -1;
+
+ /* info sucessful? */
+#ifdef D2U_UNICODE
+ if (!RetVal) {
+ if ((ipFlag->bomtype == FILE_UTF16LE) || (ipFlag->bomtype == FILE_UTF16BE)) {
+ FileInfoW(InF, ipFlag, ipInFN);
+ } else {
+ FileInfo(InF, ipFlag, ipInFN);
+ }
+ }
+#else
+ if (!RetVal)
+ FileInfo(InF, ipFlag, ipInFN);
+#endif
+ ipFlag->bomtype = bomtype_orig; /* messages must print the real bomtype, not the assumed bomtype */
+
+ /* can close in file? */
+ if ((InF) && (fclose(InF) == EOF))
+ RetVal = -1;
+
+ return RetVal;
+}
+
+int GetFileInfoStdio(CFlag *ipFlag, const char *progname)
+{
+ int RetVal = 0;
+ int bomtype_orig = FILE_MBS;
+
+ ipFlag->status = 0 ;
+
+#if defined(_WIN32) && !defined(__CYGWIN__)
+
+ /* stdin and stdout are by default text streams. We need
+ * to set them to binary mode. Otherwise an LF will
+ * automatically be converted to CR-LF on DOS/Windows.
+ * Erwin */
+
+ /* POSIX 'setmode' was deprecated by MicroSoft since
+ * Visual C++ 2005. Use ISO C++ conformant '_setmode' instead. */
+
+ _setmode(_fileno(stdin), _O_BINARY);
+#elif defined(__MSDOS__) || defined(__CYGWIN__) || defined(__OS2__)
+ setmode(fileno(stdin), O_BINARY);
+#endif
+
+ if (check_unicode_info(stdin, ipFlag, progname, &bomtype_orig))
+ RetVal = -1;
+
+ /* info sucessful? */
+#ifdef D2U_UNICODE
+ if (!RetVal) {
+ if ((ipFlag->bomtype == FILE_UTF16LE) || (ipFlag->bomtype == FILE_UTF16BE)) {
+ FileInfoW(stdin, ipFlag, "");
+ } else {
+ FileInfo(stdin, ipFlag, "");
+ }
+ }
+#else
+ if (!RetVal)
+ FileInfo(stdin, ipFlag, "");
+#endif
+ ipFlag->bomtype = bomtype_orig; /* messages must print the real bomtype, not the assumed bomtype */
+
+ return RetVal;
+}
+
+void get_info_options(char *option, CFlag *pFlag, const char *progname)
+{
+ char *ptr;
+
+ ptr = option;
+
+ if (*ptr == '\0') { /* no flags */
+ pFlag->file_info |= INFO_DEFAULT;
+ return;
+ }
+
+ while (*ptr != '\0') {
+ switch (*ptr) {
+ case 'd': /* Print nr of DOS line breaks. */
+ pFlag->file_info |= INFO_DOS;
+ break;
+ case 'u': /* Print nr of Unix line breaks. */
+ pFlag->file_info |= INFO_UNIX;
+ break;
+ case 'm': /* Print nr of Mac line breaks. */
+ pFlag->file_info |= INFO_MAC;
+ break;
+ case 'b': /* Print BOM. */
+ pFlag->file_info |= INFO_BOM;
+ break;
+ case 't': /* Text or binary. */
+ pFlag->file_info |= INFO_TEXT;
+ break;
+ case 'c': /* Print only files that would be converted. */
+ pFlag->file_info |= INFO_CONVERT;
+ break;
+ default:
+ /* Terminate the program on a wrong option. If pFlag->file_info is
+ zero and the program goes on, it may do unwanted conversions. */
+ fprintf(stderr,"%s: ",progname);
+ fprintf(stderr,_("wrong flag '%c' for option -i or --info\n"), *ptr);
+ exit(1);
+ ;
+ }
+ ptr++;
+ }
+}
+
int parse_options(int argc, char *argv[], CFlag *pFlag, const char *localedir, const char *progname,
void (*PrintLicense)(void),
int (*Convert)(FILE*, FILE*, CFlag *, const char *)
@@ -1127,6 +1484,7 @@ int parse_options(int argc, char *argv[], CFlag *pFlag, const char *localedir, c
pFlag->bomtype = FILE_MBS;
pFlag->add_bom = 0;
pFlag->keep_utf16 = 0;
+ pFlag->file_info = 0;
while ((++ArgIdx < argc) && (!ShouldExit))
{
@@ -1155,7 +1513,13 @@ int parse_options(int argc, char *argv[], CFlag *pFlag, const char *localedir, c
pFlag->verbose = 2;
else if ((strcmp(argv[ArgIdx],"-l") == 0) || (strcmp(argv[ArgIdx],"--newline") == 0))
pFlag->NewLine = 1;
- else if ((strcmp(argv[ArgIdx],"-m") == 0) || (strcmp(argv[ArgIdx],"--add-bom") == 0))
+ else if (strcmp(argv[ArgIdx],"--info") == 0)
+ pFlag->file_info |= INFO_DEFAULT;
+ else if (strncmp(argv[ArgIdx],"--info=", (size_t)7) == 0) {
+ get_info_options(argv[ArgIdx]+7, pFlag, progname);
+ } else if (strncmp(argv[ArgIdx],"-i", (size_t)2) == 0) {
+ get_info_options(argv[ArgIdx]+2, pFlag, progname);
+ } else if ((strcmp(argv[ArgIdx],"-m") == 0) || (strcmp(argv[ArgIdx],"--add-bom") == 0))
pFlag->add_bom = 1;
else if ((strcmp(argv[ArgIdx],"-r") == 0) || (strcmp(argv[ArgIdx],"--remove-bom") == 0)) {
pFlag->keep_bom = 0;
@@ -1259,6 +1623,7 @@ int parse_options(int argc, char *argv[], CFlag *pFlag, const char *localedir, c
pFlag->stdio_mode = 0;
}
pFlag->NewFile = 0;
+ pFlag->file_info = 0;
}
else if ((strcmp(argv[ArgIdx],"-n") == 0) || (strcmp(argv[ArgIdx],"--newfile") == 0)) {
@@ -1271,6 +1636,7 @@ int parse_options(int argc, char *argv[], CFlag *pFlag, const char *localedir, c
pFlag->stdio_mode = 0;
}
pFlag->NewFile = 1;
+ pFlag->file_info = 0;
}
else { /* wrong option */
PrintUsage(progname);
@@ -1278,8 +1644,7 @@ int parse_options(int argc, char *argv[], CFlag *pFlag, const char *localedir, c
pFlag->error = 1;
pFlag->stdio_mode = 0;
}
- }
- else {
+ } else {
pFlag->stdio_mode = 0;
/* not an option */
if (pFlag->NewFile) {
@@ -1295,28 +1660,37 @@ int parse_options(int argc, char *argv[], CFlag *pFlag, const char *localedir, c
print_messages_newfile(pFlag, argv[ArgIdx-1], argv[ArgIdx], progname, RetVal);
CanSwitchFileMode = 1;
}
- }
- else {
+ } else {
+ if (pFlag->file_info) {
+ RetVal = GetFileInfo(argv[ArgIdx], pFlag, progname);
+ print_messages_info(pFlag, argv[ArgIdx], progname);
+ } else {
#ifdef D2U_UNICODE
- RetVal = ConvertNewFile(argv[ArgIdx], argv[ArgIdx], pFlag, progname, Convert, ConvertW);
+ RetVal = ConvertNewFile(argv[ArgIdx], argv[ArgIdx], pFlag, progname, Convert, ConvertW);
#else
- RetVal = ConvertNewFile(argv[ArgIdx], argv[ArgIdx], pFlag, progname, Convert);
+ RetVal = ConvertNewFile(argv[ArgIdx], argv[ArgIdx], pFlag, progname, Convert);
#endif
- if (pFlag->verbose)
- print_messages_oldfile(pFlag, argv[ArgIdx], progname, RetVal);
+ if (pFlag->verbose)
+ print_messages_oldfile(pFlag, argv[ArgIdx], progname, RetVal);
+ }
}
}
}
/* no file argument, use stdin and stdout */
if (pFlag->stdio_mode) {
+ if (pFlag->file_info) {
+ RetVal = GetFileInfoStdio(pFlag, progname);
+ print_messages_info(pFlag, "stdin", progname);
+ } else {
#ifdef D2U_UNICODE
- ConvertStdio(pFlag, progname, Convert, ConvertW);
+ ConvertStdio(pFlag, progname, Convert, ConvertW);
#else
- ConvertStdio(pFlag, progname, Convert);
+ ConvertStdio(pFlag, progname, Convert);
#endif
- if (pFlag->verbose)
- print_messages_stdio(pFlag, progname);
+ if (pFlag->verbose)
+ print_messages_stdio(pFlag, progname);
+ }
return pFlag->error;
}