diff options
Diffstat (limited to 'src/chars.c')
-rw-r--r-- | src/chars.c | 963 |
1 files changed, 963 insertions, 0 deletions
diff --git a/src/chars.c b/src/chars.c new file mode 100644 index 0000000..e4309b7 --- /dev/null +++ b/src/chars.c @@ -0,0 +1,963 @@ +/* $Id: chars.c 4534 2011-02-24 02:47:25Z astyanax $ */ +/************************************************************************** + * chars.c * + * * + * Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 * + * Free Software Foundation, Inc. * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 3, or (at your option) * + * any later version. * + * * + * This program is distributed in the hope that it will be useful, but * + * WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * + * General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the Free Software * + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA * + * 02110-1301, USA. * + * * + **************************************************************************/ + +#include "proto.h" + +#include <string.h> +#include <ctype.h> + +#ifdef ENABLE_UTF8 +#ifdef HAVE_WCHAR_H +#include <wchar.h> +#endif +#ifdef HAVE_WCTYPE_H +#include <wctype.h> +#endif + +static bool use_utf8 = FALSE; + /* Whether we've enabled UTF-8 support. */ +static const wchar_t bad_wchar = 0xFFFD; + /* If we get an invalid multibyte sequence, we treat it as + * Unicode FFFD (Replacement Character), unless we're searching + * for a match to it. */ +static const char *const bad_mbchar = "\xEF\xBF\xBD"; +static const int bad_mbchar_len = 3; + +/* Enable UTF-8 support. */ +void utf8_init(void) +{ + use_utf8 = TRUE; +} + +/* Is UTF-8 support enabled? */ +bool using_utf8(void) +{ + return use_utf8; +} +#endif + +#ifndef HAVE_ISBLANK +/* This function is equivalent to isblank(). */ +bool nisblank(int c) +{ + return isspace(c) && (c == '\t' || !is_cntrl_char(c)); +} +#endif + +#if !defined(HAVE_ISWBLANK) && defined(ENABLE_UTF8) +/* This function is equivalent to iswblank(). */ +bool niswblank(wchar_t wc) +{ + return iswspace(wc) && (wc == '\t' || !is_cntrl_wchar(wc)); +} +#endif + +/* Return TRUE if the value of c is in byte range, and FALSE + * otherwise. */ +bool is_byte(int c) +{ + return ((unsigned int)c == (unsigned char)c); +} + +static void mbtowc_reset(void) +{ + IGNORE_CALL_RESULT(mbtowc(NULL, NULL, 0)); +} + +static void wctomb_reset(void) +{ + IGNORE_CALL_RESULT(wctomb(NULL, 0)); +} + +/* This function is equivalent to isalnum() for multibyte characters. */ +bool is_alnum_mbchar(const char *c) +{ + assert(c != NULL); + +#ifdef ENABLE_UTF8 + if (use_utf8) { + wchar_t wc; + + if (mbtowc(&wc, c, MB_CUR_MAX) < 0) { + mbtowc_reset(); + wc = bad_wchar; + } + + return iswalnum(wc); + } else +#endif + return isalnum((unsigned char)*c); +} + +/* This function is equivalent to isblank() for multibyte characters. */ +bool is_blank_mbchar(const char *c) +{ + assert(c != NULL); + +#ifdef ENABLE_UTF8 + if (use_utf8) { + wchar_t wc; + + if (mbtowc(&wc, c, MB_CUR_MAX) < 0) { + mbtowc_reset(); + wc = bad_wchar; + } + + return iswblank(wc); + } else +#endif + return isblank((unsigned char)*c); +} + +/* This function is equivalent to iscntrl(), except in that it only + * handles non-high-bit control characters. */ +bool is_ascii_cntrl_char(int c) +{ + return (0 <= c && c < 32); +} + +/* This function is equivalent to iscntrl(), except in that it also + * handles high-bit control characters. */ +bool is_cntrl_char(int c) +{ + return (-128 <= c && c < -96) || (0 <= c && c < 32) || + (127 <= c && c < 160); +} + +#ifdef ENABLE_UTF8 +/* This function is equivalent to iscntrl() for wide characters, except + * in that it also handles wide control characters with their high bits + * set. */ +bool is_cntrl_wchar(wchar_t wc) +{ + return (0 <= wc && wc < 32) || (127 <= wc && wc < 160); +} +#endif + +/* This function is equivalent to iscntrl() for multibyte characters, + * except in that it also handles multibyte control characters with + * their high bits set. */ +bool is_cntrl_mbchar(const char *c) +{ + assert(c != NULL); + +#ifdef ENABLE_UTF8 + if (use_utf8) { + wchar_t wc; + + if (mbtowc(&wc, c, MB_CUR_MAX) < 0) { + mbtowc_reset(); + wc = bad_wchar; + } + + return is_cntrl_wchar(wc); + } else +#endif + return is_cntrl_char((unsigned char)*c); +} + +/* This function is equivalent to ispunct() for multibyte characters. */ +bool is_punct_mbchar(const char *c) +{ + assert(c != NULL); + +#ifdef ENABLE_UTF8 + if (use_utf8) { + wchar_t wc; + int c_mb_len = mbtowc(&wc, c, MB_CUR_MAX); + + if (c_mb_len < 0) { + mbtowc_reset(); + wc = bad_wchar; + } + + return iswpunct(wc); + } else +#endif + return ispunct((unsigned char)*c); +} + +/* Return TRUE for a multibyte character found in a word (currently only + * an alphanumeric or punctuation character, and only the latter if + * allow_punct is TRUE) and FALSE otherwise. */ +bool is_word_mbchar(const char *c, bool allow_punct) +{ + assert(c != NULL); + + return is_alnum_mbchar(c) || (allow_punct ? is_punct_mbchar(c) : + FALSE); +} + +/* c is a control character. It displays as ^@, ^?, or ^[ch], where ch + * is (c + 64). We return that character. */ +char control_rep(char c) +{ + assert(is_cntrl_char(c)); + + /* Treat newlines embedded in a line as encoded nulls. */ + if (c == '\n') + return '@'; + else if (c == NANO_CONTROL_8) + return '?'; + else + return c + 64; +} + +#ifdef ENABLE_UTF8 +/* c is a wide control character. It displays as ^@, ^?, or ^[ch], + * where ch is (c + 64). We return that wide character. */ +wchar_t control_wrep(wchar_t wc) +{ + assert(is_cntrl_wchar(wc)); + + /* Treat newlines embedded in a line as encoded nulls. */ + if (wc == '\n') + return '@'; + else if (wc == NANO_CONTROL_8) + return '?'; + else + return wc + 64; +} +#endif + +/* c is a multibyte control character. It displays as ^@, ^?, or ^[ch], + * where ch is (c + 64). We return that multibyte character. If crep + * is an invalid multibyte sequence, it will be replaced with Unicode + * 0xFFFD (Replacement Character). */ +char *control_mbrep(const char *c, char *crep, int *crep_len) +{ + assert(c != NULL && crep != NULL && crep_len != NULL); + +#ifdef ENABLE_UTF8 + if (use_utf8) { + wchar_t wc; + + if (mbtowc(&wc, c, MB_CUR_MAX) < 0) { + mbtowc_reset(); + *crep_len = bad_mbchar_len; + strncpy(crep, bad_mbchar, *crep_len); + } else { + *crep_len = wctomb(crep, control_wrep(wc)); + + if (*crep_len < 0) { + wctomb_reset(); + *crep_len = 0; + } + } + } else { +#endif + *crep_len = 1; + *crep = control_rep(*c); +#ifdef ENABLE_UTF8 + } +#endif + + return crep; +} + +/* c is a multibyte non-control character. We return that multibyte + * character. If crep is an invalid multibyte sequence, it will be + * replaced with Unicode 0xFFFD (Replacement Character). */ +char *mbrep(const char *c, char *crep, int *crep_len) +{ + assert(c != NULL && crep != NULL && crep_len != NULL); + +#ifdef ENABLE_UTF8 + if (use_utf8) { + wchar_t wc; + + /* Reject invalid Unicode characters. */ + if (mbtowc(&wc, c, MB_CUR_MAX) < 0 || !is_valid_unicode(wc)) { + mbtowc_reset(); + *crep_len = bad_mbchar_len; + strncpy(crep, bad_mbchar, *crep_len); + } else { + *crep_len = wctomb(crep, wc); + + if (*crep_len < 0) { + wctomb_reset(); + *crep_len = 0; + } + } + } else { +#endif + *crep_len = 1; + *crep = *c; +#ifdef ENABLE_UTF8 + } +#endif + + return crep; +} + +/* This function is equivalent to wcwidth() for multibyte characters. */ +int mbwidth(const char *c) +{ + assert(c != NULL); + +#ifdef ENABLE_UTF8 + if (use_utf8) { + wchar_t wc; + int width; + + if (mbtowc(&wc, c, MB_CUR_MAX) < 0) { + mbtowc_reset(); + wc = bad_wchar; + } + + width = wcwidth(wc); + + if (width == -1) { + wc = bad_wchar; + width = wcwidth(wc); + } + + return width; + } else +#endif + return 1; +} + +/* Return the maximum width in bytes of a multibyte character. */ +int mb_cur_max(void) +{ + return +#ifdef ENABLE_UTF8 + use_utf8 ? MB_CUR_MAX : +#endif + 1; +} + +/* Convert the Unicode value in chr to a multibyte character with the + * same wide character value as chr, if possible. If the conversion + * succeeds, return the (dynamically allocated) multibyte character and + * its length. Otherwise, return an undefined (dynamically allocated) + * multibyte character and a length of zero. */ +char *make_mbchar(long chr, int *chr_mb_len) +{ + char *chr_mb; + + assert(chr_mb_len != NULL); + +#ifdef ENABLE_UTF8 + if (use_utf8) { + chr_mb = charalloc(MB_CUR_MAX); + *chr_mb_len = wctomb(chr_mb, (wchar_t)chr); + + /* Reject invalid Unicode characters. */ + if (*chr_mb_len < 0 || !is_valid_unicode((wchar_t)chr)) { + wctomb_reset(); + *chr_mb_len = 0; + } + } else { +#endif + *chr_mb_len = 1; + chr_mb = mallocstrncpy(NULL, (char *)&chr, 1); +#ifdef ENABLE_UTF8 + } +#endif + + return chr_mb; +} + +/* Parse a multibyte character from buf. Return the number of bytes + * used. If chr isn't NULL, store the multibyte character in it. If + * col isn't NULL, store the new display width in it. If *buf is '\t', + * we expect col to have the current display width. */ +int parse_mbchar(const char *buf, char *chr, size_t *col) +{ + int buf_mb_len; + + assert(buf != NULL); + +#ifdef ENABLE_UTF8 + if (use_utf8) { + /* Get the number of bytes in the multibyte character. */ + buf_mb_len = mblen(buf, MB_CUR_MAX); + + /* If buf contains an invalid multibyte character, only + * interpret buf's first byte. */ + if (buf_mb_len < 0) { + IGNORE_CALL_RESULT(mblen(NULL, 0)); + buf_mb_len = 1; + } else if (buf_mb_len == 0) + buf_mb_len++; + + /* Save the multibyte character in chr. */ + if (chr != NULL) { + int i; + + for (i = 0; i < buf_mb_len; i++) + chr[i] = buf[i]; + } + + /* Save the column width of the wide character in col. */ + if (col != NULL) { + /* If we have a tab, get its width in columns using the + * current value of col. */ + if (*buf == '\t') + *col += tabsize - *col % tabsize; + /* If we have a control character, get its width using one + * column for the "^" that will be displayed in front of it, + * and the width in columns of its visible equivalent as + * returned by control_mbrep(). */ + else if (is_cntrl_mbchar(buf)) { + char *ctrl_buf_mb = charalloc(MB_CUR_MAX); + int ctrl_buf_mb_len; + + (*col)++; + + ctrl_buf_mb = control_mbrep(buf, ctrl_buf_mb, + &ctrl_buf_mb_len); + + *col += mbwidth(ctrl_buf_mb); + + free(ctrl_buf_mb); + /* If we have a normal character, get its width in columns + * normally. */ + } else + *col += mbwidth(buf); + } + } else { +#endif + /* Get the number of bytes in the byte character. */ + buf_mb_len = 1; + + /* Save the byte character in chr. */ + if (chr != NULL) + *chr = *buf; + + if (col != NULL) { + /* If we have a tab, get its width in columns using the + * current value of col. */ + if (*buf == '\t') + *col += tabsize - *col % tabsize; + /* If we have a control character, it's two columns wide: + * one column for the "^" that will be displayed in front of + * it, and one column for its visible equivalent as returned + * by control_mbrep(). */ + else if (is_cntrl_char((unsigned char)*buf)) + *col += 2; + /* If we have a normal character, it's one column wide. */ + else + (*col)++; + } +#ifdef ENABLE_UTF8 + } +#endif + + return buf_mb_len; +} + +/* Return the index in buf of the beginning of the multibyte character + * before the one at pos. */ +size_t move_mbleft(const char *buf, size_t pos) +{ + size_t pos_prev = pos; + + assert(buf != NULL && pos <= strlen(buf)); + + /* There is no library function to move backward one multibyte + * character. Here is the naive, O(pos) way to do it. */ + while (TRUE) { + int buf_mb_len = parse_mbchar(buf + pos - pos_prev, NULL, NULL); + + if (pos_prev <= buf_mb_len) + break; + + pos_prev -= buf_mb_len; + } + + return pos - pos_prev; +} + +/* Return the index in buf of the beginning of the multibyte character + * after the one at pos. */ +size_t move_mbright(const char *buf, size_t pos) +{ + return pos + parse_mbchar(buf + pos, NULL, NULL); +} + +#ifndef HAVE_STRCASECMP +/* This function is equivalent to strcasecmp(). */ +int nstrcasecmp(const char *s1, const char *s2) +{ + return strncasecmp(s1, s2, (size_t)-1); +} +#endif + +/* This function is equivalent to strcasecmp() for multibyte strings. */ +int mbstrcasecmp(const char *s1, const char *s2) +{ + return mbstrncasecmp(s1, s2, (size_t)-1); +} + +#ifndef HAVE_STRNCASECMP +/* This function is equivalent to strncasecmp(). */ +int nstrncasecmp(const char *s1, const char *s2, size_t n) +{ + if (s1 == s2) + return 0; + + assert(s1 != NULL && s2 != NULL); + + for (; *s1 != '\0' && *s2 != '\0' && n > 0; s1++, s2++, n--) { + if (tolower(*s1) != tolower(*s2)) + break; + } + + return (n > 0) ? tolower(*s1) - tolower(*s2) : 0; +} +#endif + +/* This function is equivalent to strncasecmp() for multibyte + * strings. */ +int mbstrncasecmp(const char *s1, const char *s2, size_t n) +{ +#ifdef ENABLE_UTF8 + if (use_utf8) { + char *s1_mb, *s2_mb; + wchar_t ws1, ws2; + + if (s1 == s2) + return 0; + + assert(s1 != NULL && s2 != NULL); + + s1_mb = charalloc(MB_CUR_MAX); + s2_mb = charalloc(MB_CUR_MAX); + + for (; *s1 != '\0' && *s2 != '\0' && n > 0; s1 += + move_mbright(s1, 0), s2 += move_mbright(s2, 0), n--) { + bool bad_s1_mb = FALSE, bad_s2_mb = FALSE; + int s1_mb_len, s2_mb_len; + + s1_mb_len = parse_mbchar(s1, s1_mb, NULL); + + if (mbtowc(&ws1, s1_mb, s1_mb_len) < 0) { + mbtowc_reset(); + ws1 = (unsigned char)*s1_mb; + bad_s1_mb = TRUE; + } + + s2_mb_len = parse_mbchar(s2, s2_mb, NULL); + + if (mbtowc(&ws2, s2_mb, s2_mb_len) < 0) { + mbtowc_reset(); + ws2 = (unsigned char)*s2_mb; + bad_s2_mb = TRUE; + } + + if (bad_s1_mb != bad_s2_mb || towlower(ws1) != + towlower(ws2)) + break; + } + + free(s1_mb); + free(s2_mb); + + return (n > 0) ? towlower(ws1) - towlower(ws2) : 0; + } else +#endif + return strncasecmp(s1, s2, n); +} + +#ifndef HAVE_STRCASESTR +/* This function is equivalent to strcasestr(). */ +char *nstrcasestr(const char *haystack, const char *needle) +{ + size_t haystack_len, needle_len; + + assert(haystack != NULL && needle != NULL); + + if (*needle == '\0') + return (char *)haystack; + + haystack_len = strlen(haystack); + needle_len = strlen(needle); + + for (; *haystack != '\0' && haystack_len >= needle_len; haystack++, + haystack_len--) { + if (strncasecmp(haystack, needle, needle_len) == 0) + return (char *)haystack; + } + + return NULL; +} +#endif + +/* This function is equivalent to strcasestr() for multibyte strings. */ +char *mbstrcasestr(const char *haystack, const char *needle) +{ +#ifdef ENABLE_UTF8 + if (use_utf8) { + size_t haystack_len, needle_len; + + assert(haystack != NULL && needle != NULL); + + if (*needle == '\0') + return (char *)haystack; + + haystack_len = mbstrlen(haystack); + needle_len = mbstrlen(needle); + + for (; *haystack != '\0' && haystack_len >= needle_len; + haystack += move_mbright(haystack, 0), haystack_len--) { + if (mbstrncasecmp(haystack, needle, needle_len) == 0) + return (char *)haystack; + } + + return NULL; + } else +#endif + return (char *) strcasestr(haystack, needle); +} + +#if !defined(NANO_TINY) || !defined(DISABLE_TABCOMP) +/* This function is equivalent to strstr(), except in that it scans the + * string in reverse, starting at rev_start. */ +char *revstrstr(const char *haystack, const char *needle, const char + *rev_start) +{ + size_t rev_start_len, needle_len; + + assert(haystack != NULL && needle != NULL && rev_start != NULL); + + if (*needle == '\0') + return (char *)rev_start; + + needle_len = strlen(needle); + + if (strlen(haystack) < needle_len) + return NULL; + + rev_start_len = strlen(rev_start); + + for (; rev_start >= haystack; rev_start--, rev_start_len++) { + if (rev_start_len >= needle_len && strncmp(rev_start, needle, + needle_len) == 0) + return (char *)rev_start; + } + + return NULL; +} +#endif /* !NANO_TINY || !DISABLE_TABCOMP */ + +#ifndef NANO_TINY +/* This function is equivalent to strcasestr(), except in that it scans + * the string in reverse, starting at rev_start. */ +char *revstrcasestr(const char *haystack, const char *needle, const char + *rev_start) +{ + size_t rev_start_len, needle_len; + + assert(haystack != NULL && needle != NULL && rev_start != NULL); + + if (*needle == '\0') + return (char *)rev_start; + + needle_len = strlen(needle); + + if (strlen(haystack) < needle_len) + return NULL; + + rev_start_len = strlen(rev_start); + + for (; rev_start >= haystack; rev_start--, rev_start_len++) { + if (rev_start_len >= needle_len && strncasecmp(rev_start, + needle, needle_len) == 0) + return (char *)rev_start; + } + + return NULL; +} + +/* This function is equivalent to strcasestr() for multibyte strings, + * except in that it scans the string in reverse, starting at + * rev_start. */ +char *mbrevstrcasestr(const char *haystack, const char *needle, const + char *rev_start) +{ +#ifdef ENABLE_UTF8 + if (use_utf8) { + bool begin_line = FALSE; + size_t rev_start_len, needle_len; + + assert(haystack != NULL && needle != NULL && rev_start != NULL); + + if (*needle == '\0') + return (char *)rev_start; + + needle_len = mbstrlen(needle); + + if (mbstrlen(haystack) < needle_len) + return NULL; + + rev_start_len = mbstrlen(rev_start); + + while (!begin_line) { + if (rev_start_len >= needle_len && mbstrncasecmp(rev_start, + needle, needle_len) == 0) + return (char *)rev_start; + + if (rev_start == haystack) + begin_line = TRUE; + else { + rev_start = haystack + move_mbleft(haystack, rev_start - + haystack); + rev_start_len++; + } + } + + return NULL; + } else +#endif + return revstrcasestr(haystack, needle, rev_start); +} +#endif /* !NANO_TINY */ + +/* This function is equivalent to strlen() for multibyte strings. */ +size_t mbstrlen(const char *s) +{ + return mbstrnlen(s, (size_t)-1); +} + +#ifndef HAVE_STRNLEN +/* This function is equivalent to strnlen(). */ +size_t nstrnlen(const char *s, size_t maxlen) +{ + size_t n = 0; + + assert(s != NULL); + + for (; *s != '\0' && maxlen > 0; s++, maxlen--, n++) + ; + + return n; +} +#endif + +/* This function is equivalent to strnlen() for multibyte strings. */ +size_t mbstrnlen(const char *s, size_t maxlen) +{ + assert(s != NULL); + +#ifdef ENABLE_UTF8 + if (use_utf8) { + size_t n = 0; + + for (; *s != '\0' && maxlen > 0; s += move_mbright(s, 0), + maxlen--, n++) + ; + + return n; + } else +#endif + return strnlen(s, maxlen); +} + +#if !defined(NANO_TINY) || !defined(DISABLE_JUSTIFY) +/* This function is equivalent to strchr() for multibyte strings. */ +char *mbstrchr(const char *s, const char *c) +{ + assert(s != NULL && c != NULL); + +#ifdef ENABLE_UTF8 + if (use_utf8) { + bool bad_s_mb = FALSE, bad_c_mb = FALSE; + char *s_mb = charalloc(MB_CUR_MAX); + const char *q = s; + wchar_t ws, wc; + int c_mb_len = mbtowc(&wc, c, MB_CUR_MAX); + + if (c_mb_len < 0) { + mbtowc_reset(); + wc = (unsigned char)*c; + bad_c_mb = TRUE; + } + + while (*s != '\0') { + int s_mb_len = parse_mbchar(s, s_mb, NULL); + + if (mbtowc(&ws, s_mb, s_mb_len) < 0) { + mbtowc_reset(); + ws = (unsigned char)*s; + bad_s_mb = TRUE; + } + + if (bad_s_mb == bad_c_mb && ws == wc) + break; + + s += s_mb_len; + q += s_mb_len; + } + + free(s_mb); + + if (*s == '\0') + q = NULL; + + return (char *)q; + } else +#endif + return (char *) strchr(s, *c); +} +#endif /* !NANO_TINY || !DISABLE_JUSTIFY */ + +#ifndef NANO_TINY +/* This function is equivalent to strpbrk() for multibyte strings. */ +char *mbstrpbrk(const char *s, const char *accept) +{ + assert(s != NULL && accept != NULL); + +#ifdef ENABLE_UTF8 + if (use_utf8) { + for (; *s != '\0'; s += move_mbright(s, 0)) { + if (mbstrchr(accept, s) != NULL) + return (char *)s; + } + + return NULL; + } else +#endif + return (char *) strpbrk(s, accept); +} + +/* This function is equivalent to strpbrk(), except in that it scans the + * string in reverse, starting at rev_start. */ +char *revstrpbrk(const char *s, const char *accept, const char + *rev_start) +{ + assert(s != NULL && accept != NULL && rev_start != NULL); + + for (; rev_start >= s; rev_start--) { + const char *q = (*rev_start == '\0') ? NULL : strchr(accept, + *rev_start); + + if (q != NULL) + return (char *)rev_start; + } + + return NULL; +} + +/* This function is equivalent to strpbrk() for multibyte strings, + * except in that it scans the string in reverse, starting at + * rev_start. */ +char *mbrevstrpbrk(const char *s, const char *accept, const char + *rev_start) +{ + assert(s != NULL && accept != NULL && rev_start != NULL); + +#ifdef ENABLE_UTF8 + if (use_utf8) { + bool begin_line = FALSE; + + while (!begin_line) { + const char *q = (*rev_start == '\0') ? NULL : + mbstrchr(accept, rev_start); + + if (q != NULL) + return (char *)rev_start; + + if (rev_start == s) + begin_line = TRUE; + else + rev_start = s + move_mbleft(s, rev_start - s); + } + + return NULL; + } else +#endif + return revstrpbrk(s, accept, rev_start); +} +#endif /* !NANO_TINY */ + +#if defined(ENABLE_NANORC) && (!defined(NANO_TINY) || !defined(DISABLE_JUSTIFY)) +/* Return TRUE if the string s contains one or more blank characters, + * and FALSE otherwise. */ +bool has_blank_chars(const char *s) +{ + assert(s != NULL); + + for (; *s != '\0'; s++) { + if (isblank(*s)) + return TRUE; + } + + return FALSE; +} + +/* Return TRUE if the multibyte string s contains one or more blank + * multibyte characters, and FALSE otherwise. */ +bool has_blank_mbchars(const char *s) +{ + assert(s != NULL); + +#ifdef ENABLE_UTF8 + if (use_utf8) { + bool retval = FALSE; + char *chr_mb = charalloc(MB_CUR_MAX); + + for (; *s != '\0'; s += move_mbright(s, 0)) { + parse_mbchar(s, chr_mb, NULL); + + if (is_blank_mbchar(chr_mb)) { + retval = TRUE; + break; + } + } + + free(chr_mb); + + return retval; + } else +#endif + return has_blank_chars(s); +} +#endif /* ENABLE_NANORC && (!NANO_TINY || !DISABLE_JUSTIFY) */ + +#ifdef ENABLE_UTF8 +/* Return TRUE if wc is valid Unicode, and FALSE otherwise. */ +bool is_valid_unicode(wchar_t wc) +{ + return ((0 <= wc && wc <= 0x10FFFF) && (wc <= 0xD7FF || 0xE000 <= + wc) && (wc <= 0xFDCF || 0xFDF0 <= wc) && ((wc & 0xFFFF) <= + 0xFFFD)); +} +#endif + +#ifdef ENABLE_NANORC +/* Check if the string s is a valid multibyte string. Return TRUE if it + * is, and FALSE otherwise. */ +bool is_valid_mbstring(const char *s) +{ + assert(s != NULL); + + return +#ifdef ENABLE_UTF8 + use_utf8 ? (mbstowcs(NULL, s, 0) != (size_t)-1) : +#endif + TRUE; +} +#endif /* ENABLE_NANORC */ |