summaryrefslogtreecommitdiff
path: root/src/thwchar/thwchar.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/thwchar/thwchar.c')
-rw-r--r--src/thwchar/thwchar.c258
1 files changed, 258 insertions, 0 deletions
diff --git a/src/thwchar/thwchar.c b/src/thwchar/thwchar.c
new file mode 100644
index 0000000..b29b425
--- /dev/null
+++ b/src/thwchar/thwchar.c
@@ -0,0 +1,258 @@
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * libthai - Thai Language Support Library
+ * Copyright (C) 2001 Theppitak Karoonboonyanan <thep@linux.thai.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/*
+ * thwchar.c - wide char support for Thai
+ * Created: 2001-07-27
+ * Author: Pattara Kiatisevi <ott@linux.thai.net>,
+ * Theppitak Karoonboonyanan <thep@linux.thai.net>
+ */
+
+#include <thai/thwchar.h>
+
+#define WC_ERR THWCHAR_ERR
+#define TH_ERR THCHAR_ERR
+
+static thwchar_t tis620_0_uni_map_[128] = {
+ WC_ERR, WC_ERR, WC_ERR, WC_ERR, WC_ERR, WC_ERR, WC_ERR, WC_ERR,
+ WC_ERR, WC_ERR, WC_ERR, WC_ERR, WC_ERR, WC_ERR, WC_ERR, WC_ERR,
+ WC_ERR, WC_ERR, WC_ERR, WC_ERR, WC_ERR, WC_ERR, WC_ERR, WC_ERR,
+ WC_ERR, WC_ERR, WC_ERR, WC_ERR, WC_ERR, WC_ERR, WC_ERR, WC_ERR,
+ WC_ERR, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
+ 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
+ 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
+ 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
+ 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
+ 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
+ 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
+ 0x0e38, 0x0e39, 0x0e3a, WC_ERR, WC_ERR, WC_ERR, WC_ERR, 0x0e3f,
+ 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
+ 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
+ 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
+ 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, WC_ERR, WC_ERR, WC_ERR, WC_ERR
+};
+
+static thchar_t uni_tis620_0_map_[96] = {
+ TH_ERR, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
+ 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
+ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
+ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
+ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
+ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
+ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
+ 0xd8, 0xd9, 0xda, TH_ERR, TH_ERR, TH_ERR, TH_ERR, 0xdf,
+ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
+ 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
+ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
+ 0xf8, 0xf9, 0xfa, 0xfb, TH_ERR, TH_ERR, TH_ERR, TH_ERR
+};
+
+static thwchar_t tis620_1_uni_map_[128] = {
+ 0x00ab, 0x00bb, 0x2026, 0xf88c, 0xf88f, 0xf892, 0xf895, 0xf898,
+ 0xf88b, 0xf88e, 0xf891, 0xf894, 0xf897, 0x201c, 0x201d, 0xf899,
+ WC_ERR, 0x2022, 0xf884, 0xf889, 0xf885, 0xf886, 0xf887, 0xf888,
+ 0xf88a, 0xf88d, 0xf890, 0xf893, 0xf896, 0x2018, 0x2019, WC_ERR,
+ 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
+ 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
+ 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
+ 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
+ 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
+ 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
+ 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
+ 0x0e38, 0x0e39, 0x0e3a, 0xfeff, 0x200b, 0x2013, 0x2014, 0x0e3f,
+ 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
+ 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x2122, 0x0e4f,
+ 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
+ 0x0e58, 0x0e59, 0x00ae, 0x00a9, WC_ERR, WC_ERR, WC_ERR, WC_ERR
+};
+
+static thwchar_t tis620_2_uni_map_[128] = {
+ 0xf700, 0xf701, 0xf702, 0xf703, 0xf704, 0x2026, 0xf705, 0xf706,
+ 0xf707, 0xf708, 0xf709, 0xf70a, 0xf70b, 0xf70c, 0xf70d, 0xf70e,
+ 0xf70f, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
+ 0xf710, 0xf711, 0xf712, 0xf713, 0xf714, 0xf715, 0xf716, 0xf717,
+ 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
+ 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
+ 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
+ 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
+ 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
+ 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
+ 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
+ 0x0e38, 0x0e39, 0x0e3a, WC_ERR, WC_ERR, WC_ERR, WC_ERR, 0x0e3f,
+ 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
+ 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
+ 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
+ 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0xf718, 0xf719, 0xf71a, WC_ERR
+};
+
+/**
+ * @brief Convert character code from TIS-620 to Unicode
+ *
+ * @param c : TIS-620 character to convert
+ *
+ * @return Corresponding Unicode code
+ */
+thwchar_t
+th_tis2uni (thchar_t c)
+{
+ return (c < 0x80) ? (thwchar_t)c : tis620_0_uni_map_[c-0x80];
+}
+
+/**
+ * @brief Convert string from TIS-620 to Unicode
+ *
+ * @param s : source TIS-620 string (null-terminated)
+ * @param result : buffer for storing resulting Unicode string
+ * @param n : size of @a result buffer (as number of elements)
+ *
+ * @return the length of the output Unicode string
+ */
+int
+th_tis2uni_line (const thchar_t *s, thwchar_t *result, size_t n)
+{
+ int left = n;
+ while (*s && left > 1) {
+ *result++ = th_tis2uni (*s++);
+ --left;
+ }
+ *result = 0;
+ return n - left;
+}
+
+/**
+ * @brief Convert character code from Thai Windows extended code to Unicode
+ *
+ * @param c : Thai Windows character/glyph to convert
+ *
+ * @return Corresponding Unicode code
+ */
+thwchar_t
+th_winthai2uni (thchar_t c)
+{
+ return (c < 0x80) ? (thwchar_t) c : tis620_2_uni_map_[c-0x80];
+}
+
+/**
+ * @brief Convert character code from Mac Thai extended code to Unicode
+ *
+ * @param c : Mac Thai character/glyph to convert
+ *
+ * @return Corresponding Unicode code
+ */
+thwchar_t
+th_macthai2uni (thchar_t c)
+{
+ return (c < 0x80) ? (thwchar_t) c : tis620_1_uni_map_[c-0x80];
+}
+
+
+/**
+ * @brief Convert character code from Unicode to TIS-620
+ *
+ * @param wc : Unicode character to convert
+ *
+ * @return Corresponding TIS-620 code,
+ * or @c TH_ERR if conversion is impossible
+ */
+thchar_t
+th_uni2tis (thwchar_t wc)
+{
+ if (wc < 0x0080) {
+ /* BASIC_LATIN range */
+ return (thchar_t) wc ;
+ } else if (0x0e00 <= wc && wc <= 0x0e5f) {
+ /* THAI range */
+ return uni_tis620_0_map_[wc-0x0e00];
+ } else {
+ /* out of range */
+ return TH_ERR;
+ }
+}
+
+/**
+ * @brief Convert string from Unicode to TIS-620
+ *
+ * @param s : source Unicode string (null-terminated)
+ * @param result : buffer for storing resulting TIS-620 string
+ * @param n : size of @a result buffer (as number of elements)
+ *
+ * @return the length of the output TIS-620 string
+ *
+ * Note that, since the conversion is lossy, some characters in the
+ * convesion result may be @c TH_ERR, indicating conversion error.
+ */
+int
+th_uni2tis_line (const thwchar_t *s, thchar_t *result, size_t n)
+{
+ int left = n;
+ while (*s && left > 1) {
+ *result++ = th_uni2tis (*s++);
+ --left;
+ }
+ *result = 0;
+ return n - left;
+}
+
+
+static thchar_t
+uni2thai_ext_ (thwchar_t wc, const thwchar_t rev_map[])
+{
+ /* wc assumed out of TIS range */
+ thchar_t c = 0x80;
+ do {
+ if (rev_map[c-0x80] == wc)
+ return c;
+ } while (c++ != 0xff);
+ return TH_ERR;
+}
+
+/**
+ * @brief Convert character code from Unicode to Thai Windows extended code
+ *
+ * @param wc : Unicode code to convert
+ *
+ * @return Corresponding Thai Windows extended code,
+ * or @c TH_ERR if conversion is impossible
+ */
+thchar_t
+th_uni2winthai (thwchar_t wc)
+{
+ thchar_t c = th_uni2tis (wc);
+ return (c == TH_ERR) ? uni2thai_ext_ (wc, tis620_2_uni_map_) : c;
+}
+
+/**
+ * @brief Convert character code from Unicode to Mac Thai extended code
+ *
+ * @param wc : Unicode code to convert
+ *
+ * @return Corresponding Mac Thai extended code,
+ * or @c TH_ERR if conversion is impossible
+ */
+thchar_t
+th_uni2macthai (thwchar_t wc)
+{
+ thchar_t c = th_uni2tis (wc);
+ return (c == TH_ERR) ? uni2thai_ext_ (wc, tis620_1_uni_map_) : c;
+}
+
+/*
+vi:ts=4:ai:expandtab
+*/