diff options
Diffstat (limited to 'src/collation.c')
-rw-r--r-- | src/collation.c | 561 |
1 files changed, 561 insertions, 0 deletions
diff --git a/src/collation.c b/src/collation.c new file mode 100644 index 0000000..e3358d9 --- /dev/null +++ b/src/collation.c @@ -0,0 +1,561 @@ +/* + * libslp-db-util + * + * Copyright (c) 2000 - 2011 Samsung Electronics Co., Ltd. All rights reserved. + * + * Contact: Hakjoo Ko <hakjoo.ko@samsung.com> + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +#include <string.h> +#include <math.h> +#include <wchar.h> +#include <wctype.h> + +#include <unistd.h> +#include <glib.h> + +#include <dlfcn.h> + +#include <unicode/utypes.h> +#include <unicode/ucol.h> +#include <unicode/uiter.h> +#include <unicode/ustring.h> + +#include "collation.h" + +#include "db-util-debug.h" +#include <vconf.h> + +#define DB_UTIL_RETV_IF(cond, ret) \ + do { \ + if (cond) { \ + DB_UTIL_TRACE_WARNING("\x1b[33m[%s()][RET_IF]\x1b[0m", __FUNCTION__); \ + return ret; \ + } \ + } while (0) + +#define DB_UTIL_RET_IF(cond) \ + do { \ + if (cond) { \ + DB_UTIL_TRACE_WARNING("\x1b[33m[%s()][RET_IF]\x1b[0m", __FUNCTION__); \ + return; \ + } \ + } while (0) + +#define DB_UTIL_ERR_COL_FUNC_RET DB_UTIL_ERROR + +#ifdef DB_UTIL_ARCH_64 +#define ICUI18N_LIBPATH "/usr/lib64/libicui18n.so" +#else +#define ICUI18N_LIBPATH "/usr/lib/libicui18n.so" +#endif + +enum { + DB_UTIL_ERR_DLOPEN = -10, + DB_UTIL_ERR_DLSYM, + DB_UTIL_ERR_ENV, + DB_UTIL_ERR_ICU, + DB_UTIL_ERR_PARAM +}; + +enum { + DB_UTIL_CMP_SC, + /* Loop comparison */ + DB_UTIL_CMP_LC +}; + +#define ICU_FUNC_CNT 8 + +typedef UCollator *(*ICU_UCOL_OPEN)(const char *, UErrorCode *); +typedef void (*ICU_UCOL_CLOSE)(UCollator *); +typedef void (*ICU_UCOL_SETSTRENGTH)(UCollator *, UCollationStrength); +typedef UCollationResult (*ICU_UCOL_STRCOLL)(const UCollator *, const UChar *, int32_t, const UChar *, int32_t); +typedef UCollationResult (*ICU_UCOL_STRCOLLITER)(const UCollator *, UCharIterator *, UCharIterator *, UErrorCode *); +typedef void (*ICU_UITER_SETUTF8)(UCharIterator *, const char *, int32_t); +typedef void (*ICU_ULOC_SETDEFAULT)(const char* localeID, UErrorCode* status); +typedef const char* (*ICU_ULOC_GETDEFAULT)(void); + +typedef struct { + ICU_UCOL_OPEN icu_ucol_open; + ICU_UCOL_CLOSE icu_ucol_close; + ICU_UCOL_STRCOLL icu_ucol_strcoll; + ICU_UCOL_STRCOLLITER icu_ucol_strcollIter; + ICU_UCOL_SETSTRENGTH icu_ucol_setStrength; + ICU_UITER_SETUTF8 icu_uiter_setUTF8; + ICU_ULOC_SETDEFAULT icu_uloc_setDefault; + ICU_ULOC_GETDEFAULT icu_uloc_getDefault; +} db_util_icu_func_t; + +db_util_icu_func_t icu_symbol; + +void *g_dl_icu_handle = NULL; + +#ifdef DB_UTIL_ENABLE_DEVDEBUG +static char *strtoutf8(const UChar * unichars, int len); +#endif + +static int __db_util_dl_load_icu() +{ + void *handle = NULL; + void *icu_handle[ICU_FUNC_CNT] = { 0 }; + char *dl_error; + int i = 0; + + const char *ICU_API[] = { + "ucol_open", + "ucol_close", + "ucol_strcoll", + "ucol_strcollIter", + "ucol_setStrength", + "uiter_setUTF8", + "uloc_setDefault", + "uloc_getDefault" + }; + + if (g_dl_icu_handle == NULL) { + g_dl_icu_handle = dlopen(ICUI18N_LIBPATH, RTLD_LAZY | RTLD_GLOBAL); + if (g_dl_icu_handle == NULL) { + DB_UTIL_TRACE_WARNING("dlopen icu so fail"); + return DB_UTIL_ERR_DLOPEN; + } + } + + for (i = 0; i < ICU_FUNC_CNT; i++) { + handle = dlsym(g_dl_icu_handle, ICU_API[i]); + if ((dl_error = dlerror()) != NULL) { + DB_UTIL_TRACE_WARNING("dlsym(%s) is failed for %s", + dl_error, ICU_API[i]); + return DB_UTIL_ERR_DLSYM; + } + icu_handle[i] = handle; + } + + memcpy((void*)&icu_symbol, (const void*)icu_handle, sizeof(icu_handle)); + + return DB_UTIL_OK; +} + +/* The collating function must return an integer that is negative, zero or positive */ +static int __db_util_collate_icu_16(void *ucol, int str1_len, const void *str1, int str2_len, const void *str2) +{ +#ifdef DB_UTIL_ENABLE_DEVDEBUG + DB_UTIL_TRACE_DEBUG("__db_util_collate_icu_16 func start \n"); + + UChar* tmp_v1 = (UChar *)str1; + UChar* tmp_v2 = (UChar *)str2; + char* utf8_v1 = strtoutf8(tmp_v1, str1_len); + char* utf8_v2 = strtoutf8(tmp_v2, str2_len); + + DB_UTIL_TRACE_DEBUG("v1(%d) : %s\n", str1_len, utf8_v1); + DB_UTIL_TRACE_DEBUG("v2(%d) : %s\n", str2_len, utf8_v2); +#endif + + DB_UTIL_RETV_IF(icu_symbol.icu_ucol_strcoll == NULL, DB_UTIL_ERR_COL_FUNC_RET); + + UCollationResult result = icu_symbol.icu_ucol_strcoll( + (UCollator *) ucol, + (const UChar *) str1, str1_len, + (const UChar *) str2, str2_len); + +#ifdef DB_UTIL_ENABLE_DEVDEBUG + if (utf8_v1) + free(utf8_v1); + if (utf8_v2) + free(utf8_v2); + + if (result == UCOL_LESS) { + DB_UTIL_TRACE_DEBUG("less \n"); + } else if (result == UCOL_GREATER) { + DB_UTIL_TRACE_DEBUG("greater \n"); + } else { + DB_UTIL_TRACE_DEBUG("equal \n"); + } +#endif + + return result; +} + +/* The collating function must return an integer that is negative, zero or positive */ +static int __db_util_collate_icu_16_lc(void *ucol, int str1_len, const void *str1, int str2_len, const void *str2) +{ +#ifdef DB_UTIL_ENABLE_DEVDEBUG + DB_UTIL_TRACE_DEBUG("__db_util_collate_icu_16_lc func start \n"); + + UChar* tmp_v1 = (UChar *)str1; + UChar* tmp_v2 = (UChar *)str2; + + char* utf8_v1 = strtoutf8(tmp_v1, str1_len); + char* utf8_v2 = strtoutf8(tmp_v2, str2_len); + + DB_UTIL_TRACE_DEBUG("v1(%d) : %s\n", str1_len, utf8_v1); + DB_UTIL_TRACE_DEBUG("v2(%d) : %s\n", str2_len, utf8_v2); + + if (utf8_v1) + free(utf8_v1); + if (utf8_v2) + free(utf8_v2); +#endif + + UCollationResult result = 0; + + UChar* str_to = (UChar *)str1; + UChar* str_from = (UChar *)str1; + + int i; + + DB_UTIL_RETV_IF(icu_symbol.icu_ucol_strcoll == NULL, DB_UTIL_ERR_COL_FUNC_RET); + + if (str1_len > str2_len) { + for (i = 0; i < str2_len; i = i+2) { + str_to++; + } + + while ((int)(str_to-(UChar*)str1) <= str1_len) { + result = icu_symbol.icu_ucol_strcoll( + (UCollator *) ucol, + (UChar *) str_from, str_to-str_from, + (const UChar *) str2, str2_len); + + if (result == UCOL_EQUAL) { +#ifdef DB_UTIL_ENABLE_DEVDEBUG + DB_UTIL_TRACE_DEBUG("equal \n"); +#endif + return UCOL_EQUAL; + } + + str_to++; + str_from++; + } + } else { + result = icu_symbol.icu_ucol_strcoll( + (UCollator *) ucol, + (const UChar *) str1, str1_len, + (const UChar *) str2, str2_len); + } + +#ifdef DB_UTIL_ENABLE_DEVDEBUG + if (result == UCOL_LESS) { + DB_UTIL_TRACE_DEBUG("less \n"); + } else if (result == UCOL_GREATER) { + DB_UTIL_TRACE_DEBUG("greater \n"); + } else if (result == UCOL_EQUAL) { + DB_UTIL_TRACE_DEBUG("equal \n"); + } else { + DB_UTIL_TRACE_DEBUG("compare error : %d \n", result); + } +#endif + + return result; +} + +/* The collating function must return an integer that is negative, zero or positive */ +static int __db_util_collate_icu_8(void *ucol, int str1_len, const void *str1, int str2_len, const void *str2) +{ + UCharIterator uiter1, uiter2; + UErrorCode error = U_ZERO_ERROR; + +#ifdef DB_UTIL_ENABLE_DEVDEBUG + DB_UTIL_TRACE_DEBUG("__db_util_collate_icu_8 func start \n"); + DB_UTIL_TRACE_DEBUG("v1(%d) : %s, v2(%d) : %s \n", str1_len, (char*)str1, str2_len, (char*)str2); +#endif + + DB_UTIL_RETV_IF(icu_symbol.icu_uiter_setUTF8 == NULL, DB_UTIL_ERR_COL_FUNC_RET); + DB_UTIL_RETV_IF(icu_symbol.icu_ucol_strcollIter == NULL, DB_UTIL_ERR_COL_FUNC_RET); + + icu_symbol.icu_uiter_setUTF8(&uiter1, (const char *) str1, str1_len); + icu_symbol.icu_uiter_setUTF8(&uiter2, (const char *) str2, str2_len); + + UCollationResult result = icu_symbol.icu_ucol_strcollIter( + (UCollator *) ucol, + &uiter1, + &uiter2, + &error); + if (U_FAILURE(error)) { + DB_UTIL_TRACE_ERROR("__db_util_collate_icu_8 ucol_strcollIter error: %d\n", error); + return DB_UTIL_ERR_COL_FUNC_RET; + } + +#ifdef DB_UTIL_ENABLE_DEVDEBUG + if (result == UCOL_LESS) { + DB_UTIL_TRACE_DEBUG("less \n"); + } else if (result == UCOL_GREATER) { + DB_UTIL_TRACE_DEBUG("greater \n"); + } else { + DB_UTIL_TRACE_DEBUG("equal \n"); + } +#endif + + return result; +} + +static int __db_util_collate_icu_8_lc(void *ucol, int str1_len, const void *str1, int str2_len, const void *str2) +{ + UCharIterator uiter1, uiter2; + UErrorCode error = U_ZERO_ERROR; + UCollationResult result = 0; + char* str_from = (char*)str1; + char* str_to = (char*)str1; + glong v1_char_len, v2_char_len; + int i; + +#ifdef DB_UTIL_ENABLE_DEVDEBUG + DB_UTIL_TRACE_DEBUG("__db_util_collate_icu_8_lc func start \n"); + DB_UTIL_TRACE_DEBUG("v1(%d) : %s, v2(%d) : %s \n", str1_len, (char*)str1, str2_len, (char*)str2); +#endif + + DB_UTIL_RETV_IF(icu_symbol.icu_uiter_setUTF8 == NULL, DB_UTIL_ERR_COL_FUNC_RET); + DB_UTIL_RETV_IF(icu_symbol.icu_ucol_strcollIter == NULL, DB_UTIL_ERR_COL_FUNC_RET); + + icu_symbol.icu_uiter_setUTF8(&uiter2, (const char *) str2, str2_len); + + v1_char_len = g_utf8_strlen((gchar *)str1, -1); + v2_char_len = g_utf8_strlen((gchar *)str2, -1); + + if (v1_char_len > v2_char_len) { + for (i = 0; i < v2_char_len; i++) { + str_to = g_utf8_next_char(str_to); + } + + while ((int)(str_to-(char*)str1) <= str1_len) { + icu_symbol.icu_uiter_setUTF8( + &uiter1, + (const char *) str_from, + str_to - str_from); + + result = icu_symbol.icu_ucol_strcollIter( + (UCollator *) ucol, + &uiter1, + &uiter2, + &error); + if (U_FAILURE(error)) { + DB_UTIL_TRACE_ERROR("__db_util_collate_icu_8_lc ucol_strcollIter error: %d\n", error); + return DB_UTIL_ERR_COL_FUNC_RET; + } + + if (result == UCOL_EQUAL) +#ifdef DB_UTIL_ENABLE_DEVDEBUG + { + DB_UTIL_TRACE_DEBUG("equal \n"); + return UCOL_EQUAL; + } +#else + return UCOL_EQUAL; +#endif + + str_to = g_utf8_next_char(str_to); + str_from = g_utf8_next_char(str_from); + } + } else { + icu_symbol.icu_uiter_setUTF8( + &uiter1, + (const char *) str1, + str1_len); + + result = icu_symbol.icu_ucol_strcollIter( + (UCollator *) ucol, + &uiter1, + &uiter2, + &error); + if (U_FAILURE(error)) { + DB_UTIL_TRACE_ERROR("__db_util_collate_icu_8_lc ucol_strcollIter error: %d\n", error); + return DB_UTIL_ERR_COL_FUNC_RET; + } + } + +#ifdef DB_UTIL_ENABLE_DEVDEBUG + if (result == UCOL_LESS) { + DB_UTIL_TRACE_DEBUG("less \n"); + } else if (result == UCOL_GREATER) { + DB_UTIL_TRACE_DEBUG("greater \n"); + } else if (result == UCOL_EQUAL) { + DB_UTIL_TRACE_DEBUG("equal \n"); + } else { + DB_UTIL_TRACE_DEBUG("compare error : %d\n", result); + } +#endif + + return result; +} + +static void __db_util_collate_icu_close(void* ucol) +{ + DB_UTIL_TRACE_DEBUG("close icu collator\n"); + DB_UTIL_RET_IF(icu_symbol.icu_ucol_close == NULL); + icu_symbol.icu_ucol_close((UCollator *) ucol); +} + +static int __db_util_collation_create(sqlite3* db_handle, char* locale, char* collator_name, UCollationStrength ucol_strength_value, int utf_type, int cmp_type) +{ + int err; + UErrorCode status = U_ZERO_ERROR; + + DB_UTIL_RETV_IF(icu_symbol.icu_ucol_open == NULL, DB_UTIL_ERR_DLSYM); + DB_UTIL_RETV_IF(icu_symbol.icu_ucol_setStrength == NULL, DB_UTIL_ERR_DLSYM); + + UCollator* ucol = icu_symbol.icu_ucol_open(locale, &status); + if (status == U_USING_DEFAULT_WARNING) { + DB_UTIL_TRACE_ERROR("ucol_open success with default collate option\n"); + } else if (U_FAILURE(status)) { + DB_UTIL_TRACE_ERROR("ucol_open fail : %d \n", status); + return DB_UTIL_ERR_ICU; + } +#ifdef DB_UTIL_ENABLE_DEVDEBUG + else { + DB_UTIL_TRACE_DEBUG("ucol_open success : %d \n", status); + } +#endif + + icu_symbol.icu_ucol_setStrength(ucol, ucol_strength_value); + if (U_FAILURE(status)) { + DB_UTIL_TRACE_ERROR("ucol_setStrength fail : %d \n", status); + return DB_UTIL_ERR_ICU; + } else { + DB_UTIL_TRACE_DEBUG("ucol_setStrength success \n"); + } + + if (utf_type == DB_UTIL_COL_UTF8) { + if (cmp_type == DB_UTIL_CMP_LC) { + err = sqlite3_create_collation_v2(db_handle, collator_name, SQLITE_UTF8, ucol, + __db_util_collate_icu_8_lc, (void(*)(void*))__db_util_collate_icu_close); + } else { + err = sqlite3_create_collation_v2(db_handle, collator_name, SQLITE_UTF8, ucol, + __db_util_collate_icu_8, (void(*)(void*))__db_util_collate_icu_close); + } + } else if (utf_type == DB_UTIL_COL_UTF16) { + if (cmp_type == DB_UTIL_CMP_LC) { + err = sqlite3_create_collation_v2(db_handle, collator_name, SQLITE_UTF16, ucol, + __db_util_collate_icu_16_lc, (void(*)(void*))__db_util_collate_icu_close); + } else { + err = sqlite3_create_collation_v2(db_handle, collator_name, SQLITE_UTF16, ucol, + __db_util_collate_icu_16, (void(*)(void*))__db_util_collate_icu_close); + } + } else { + DB_UTIL_TRACE_ERROR("wrong utf_type param value : %d\n", utf_type); + return DB_UTIL_ERR_PARAM; + } + + if (err != SQLITE_OK) { + DB_UTIL_TRACE_ERROR("sqlite3_create_collation_v2 fail : %d \n", err); + __db_util_collate_icu_close((void*)ucol); + return err; + } else { + DB_UTIL_TRACE_DEBUG("sqlite3_create_collation_v2 success \n"); + return DB_UTIL_OK; + } +} + +int db_util_create_collation( + PARAM_IN sqlite3 *db_handle, + PARAM_IN db_util_collate_type type, + PARAM_IN db_util_collate_textrep text_type, + PARAM_IN char* col_name) +{ + int ret = DB_UTIL_OK; + UErrorCode status = U_ZERO_ERROR; + const char* locale = NULL; + + if ((!db_handle) || (!col_name)) { + DB_UTIL_TRACE_ERROR("wrong input param"); + return DB_UTIL_ERROR; + } + + DB_UTIL_TRACE_DEBUG("db_util_create_collation start"); + + ret = __db_util_dl_load_icu(); + DB_UTIL_RETV_IF(ret != DB_UTIL_OK, DB_UTIL_ERROR); + + char *lang = vconf_get_str(VCONFKEY_LANGSET); + /* get current locale */ + if (lang) { + icu_symbol.icu_uloc_setDefault(lang, &status); + free(lang); + } else { + DB_UTIL_TRACE_WARNING("Fail to get current language vconf : %d", DB_UTIL_ERR_ENV); + return DB_UTIL_ERROR; + } + locale = icu_symbol.icu_uloc_getDefault(); + if (locale == NULL) { + DB_UTIL_TRACE_WARNING("Fail to get current locale : %d", DB_UTIL_ERR_ENV); + return DB_UTIL_ERROR; + } + DB_UTIL_TRACE_DEBUG("locale : %s", locale); + +#ifdef ENABLE_COL_KO_IC + if ((db_util_collate_type == DB_UTIL_COL_KO_IC) || + (db_util_collate_type == DB_UTIL_COL_KO_IC_LC)) { + if (strncmp(locale, "ko", 2) != 0) { + DB_UTIL_TRACE_WARNING("collate type is not match with current locale : %d", DB_UTIL_ERR_ENV); + return DB_UTIL_ERROR; + } + } +#endif + + switch (type) { + case DB_UTIL_COL_UCA: + ret = __db_util_collation_create(db_handle, NULL, col_name, UCOL_SECONDARY, text_type, DB_UTIL_CMP_SC); + break; + case DB_UTIL_COL_LS_AS_CI: + ret = __db_util_collation_create(db_handle, (char*)locale, col_name, UCOL_SECONDARY, text_type, DB_UTIL_CMP_SC); + break; + case DB_UTIL_COL_LS_AI_CI: + ret = __db_util_collation_create(db_handle, (char*)locale, col_name, UCOL_PRIMARY, text_type, DB_UTIL_CMP_SC); + break; + case DB_UTIL_COL_LS_AI_CI_LC: + ret = __db_util_collation_create(db_handle, (char*)locale, col_name, UCOL_PRIMARY, text_type, DB_UTIL_CMP_LC); + break; +#ifdef ENABLE_COL_KO_IC + case DB_UTIL_COL_KO_IC: + ret = __db_util_collation_create(db_handle, (char*)locale, col_name, UCOL_PRIMARY, text_type, DB_UTIL_CMP_SC); + break; + case DB_UTIL_COL_KO_IC_LC: + ret = __db_util_collation_create(db_handle, (char*)locale, col_name, UCOL_PRIMARY, text_type, DB_UTIL_CMP_LC); + break; +#endif + default: + DB_UTIL_TRACE_WARNING("wrong collate input type"); + ret = DB_UTIL_ERROR; + } + + if (ret != DB_UTIL_OK) + ret = DB_UTIL_ERROR; + + return ret; +} + +#ifdef DB_UTIL_ENABLE_DEVDEBUG + +static char *strtoutf8(const UChar * unichars, int len) +{ + DB_UTIL_TRACE_WARNING("strtoutf8 start"); + + int lenstr, lenutf8; + char *pstr = NULL; + UErrorCode status = U_ZERO_ERROR; + + lenstr = lenutf8 = 0; + lenstr = sizeof(char) * 4 * (len + 1); + pstr = (char *)malloc(lenstr); + if (!pstr)return NULL; + u_strToUTF8(pstr, lenstr, &lenutf8, unichars, len, &status); + if (U_FAILURE(status)) { + DB_UTIL_TRACE_WARNING("u_strToUTF8 failed in strtoutf8 :%s\n", + u_errorName(status)); + return NULL; + } + DB_UTIL_TRACE_WARNING("strtoutf8 out : %s", pstr); + return pstr; +} + +#endif |