summaryrefslogtreecommitdiff
path: root/src/hb-unicode.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/hb-unicode.h')
-rw-r--r--src/hb-unicode.h272
1 files changed, 260 insertions, 12 deletions
diff --git a/src/hb-unicode.h b/src/hb-unicode.h
index b26168f..1c4e097 100644
--- a/src/hb-unicode.h
+++ b/src/hb-unicode.h
@@ -1,7 +1,7 @@
/*
* Copyright © 2009 Red Hat, Inc.
* Copyright © 2011 Codethink Limited
- * Copyright © 2011 Google, Inc.
+ * Copyright © 2011,2012 Google, Inc.
*
* This is part of HarfBuzz, a text shaping library.
*
@@ -40,11 +40,135 @@
HB_BEGIN_DECLS
+/* hb_unicode_general_category_t */
+
+/* Unicode Character Database property: General_Category (gc) */
+typedef enum
+{
+ HB_UNICODE_GENERAL_CATEGORY_CONTROL, /* Cc */
+ HB_UNICODE_GENERAL_CATEGORY_FORMAT, /* Cf */
+ HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED, /* Cn */
+ HB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE, /* Co */
+ HB_UNICODE_GENERAL_CATEGORY_SURROGATE, /* Cs */
+ HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER, /* Ll */
+ HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER, /* Lm */
+ HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER, /* Lo */
+ HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER, /* Lt */
+ HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER, /* Lu */
+ HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK, /* Mc */
+ HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK, /* Me */
+ HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK, /* Mn */
+ HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER, /* Nd */
+ HB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER, /* Nl */
+ HB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER, /* No */
+ HB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION, /* Pc */
+ HB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION, /* Pd */
+ HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION, /* Pe */
+ HB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION, /* Pf */
+ HB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION, /* Pi */
+ HB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION, /* Po */
+ HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION, /* Ps */
+ HB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL, /* Sc */
+ HB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL, /* Sk */
+ HB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL, /* Sm */
+ HB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL, /* So */
+ HB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR, /* Zl */
+ HB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR, /* Zp */
+ HB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR /* Zs */
+} hb_unicode_general_category_t;
+
+/* hb_unicode_combining_class_t */
+
+/* Note: newer versions of Unicode may add new values. Clients should be ready to handle
+ * any value in the 0..254 range being returned from hb_unicode_combining_class().
+ */
+
+/* Unicode Character Database property: Canonical_Combining_Class (ccc) */
+typedef enum
+{
+ HB_UNICODE_COMBINING_CLASS_NOT_REORDERED = 0,
+ HB_UNICODE_COMBINING_CLASS_OVERLAY = 1,
+ HB_UNICODE_COMBINING_CLASS_NUKTA = 7,
+ HB_UNICODE_COMBINING_CLASS_KANA_VOICING = 8,
+ HB_UNICODE_COMBINING_CLASS_VIRAMA = 9,
+
+ /* Hebrew */
+ HB_UNICODE_COMBINING_CLASS_CCC10 = 10,
+ HB_UNICODE_COMBINING_CLASS_CCC11 = 11,
+ HB_UNICODE_COMBINING_CLASS_CCC12 = 12,
+ HB_UNICODE_COMBINING_CLASS_CCC13 = 13,
+ HB_UNICODE_COMBINING_CLASS_CCC14 = 14,
+ HB_UNICODE_COMBINING_CLASS_CCC15 = 15,
+ HB_UNICODE_COMBINING_CLASS_CCC16 = 16,
+ HB_UNICODE_COMBINING_CLASS_CCC17 = 17,
+ HB_UNICODE_COMBINING_CLASS_CCC18 = 18,
+ HB_UNICODE_COMBINING_CLASS_CCC19 = 19,
+ HB_UNICODE_COMBINING_CLASS_CCC20 = 20,
+ HB_UNICODE_COMBINING_CLASS_CCC21 = 21,
+ HB_UNICODE_COMBINING_CLASS_CCC22 = 22,
+ HB_UNICODE_COMBINING_CLASS_CCC23 = 23,
+ HB_UNICODE_COMBINING_CLASS_CCC24 = 24,
+ HB_UNICODE_COMBINING_CLASS_CCC25 = 25,
+ HB_UNICODE_COMBINING_CLASS_CCC26 = 26,
+
+ /* Arabic */
+ HB_UNICODE_COMBINING_CLASS_CCC27 = 27,
+ HB_UNICODE_COMBINING_CLASS_CCC28 = 28,
+ HB_UNICODE_COMBINING_CLASS_CCC29 = 29,
+ HB_UNICODE_COMBINING_CLASS_CCC30 = 30,
+ HB_UNICODE_COMBINING_CLASS_CCC31 = 31,
+ HB_UNICODE_COMBINING_CLASS_CCC32 = 32,
+ HB_UNICODE_COMBINING_CLASS_CCC33 = 33,
+ HB_UNICODE_COMBINING_CLASS_CCC34 = 34,
+ HB_UNICODE_COMBINING_CLASS_CCC35 = 35,
+
+ /* Syriac */
+ HB_UNICODE_COMBINING_CLASS_CCC36 = 36,
+
+ /* Telugu */
+ HB_UNICODE_COMBINING_CLASS_CCC84 = 84,
+ HB_UNICODE_COMBINING_CLASS_CCC91 = 91,
+
+ /* Thai */
+ HB_UNICODE_COMBINING_CLASS_CCC103 = 103,
+ HB_UNICODE_COMBINING_CLASS_CCC107 = 107,
+
+ /* Lao */
+ HB_UNICODE_COMBINING_CLASS_CCC118 = 118,
+ HB_UNICODE_COMBINING_CLASS_CCC122 = 122,
+
+ /* Tibetan */
+ HB_UNICODE_COMBINING_CLASS_CCC129 = 129,
+ HB_UNICODE_COMBINING_CLASS_CCC130 = 130,
+ HB_UNICODE_COMBINING_CLASS_CCC133 = 132,
+
+
+ HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW_LEFT = 200,
+ HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW = 202,
+ HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE = 214,
+ HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE_RIGHT = 216,
+ HB_UNICODE_COMBINING_CLASS_BELOW_LEFT = 218,
+ HB_UNICODE_COMBINING_CLASS_BELOW = 220,
+ HB_UNICODE_COMBINING_CLASS_BELOW_RIGHT = 222,
+ HB_UNICODE_COMBINING_CLASS_LEFT = 224,
+ HB_UNICODE_COMBINING_CLASS_RIGHT = 226,
+ HB_UNICODE_COMBINING_CLASS_ABOVE_LEFT = 228,
+ HB_UNICODE_COMBINING_CLASS_ABOVE = 230,
+ HB_UNICODE_COMBINING_CLASS_ABOVE_RIGHT = 232,
+ HB_UNICODE_COMBINING_CLASS_DOUBLE_BELOW = 233,
+ HB_UNICODE_COMBINING_CLASS_DOUBLE_ABOVE = 234,
+
+ HB_UNICODE_COMBINING_CLASS_IOTA_SUBSCRIPT = 240,
+
+ HB_UNICODE_COMBINING_CLASS_INVALID = 255
+} hb_unicode_combining_class_t;
+
+
/*
* hb_unicode_funcs_t
*/
-typedef struct _hb_unicode_funcs_t hb_unicode_funcs_t;
+typedef struct hb_unicode_funcs_t hb_unicode_funcs_t;
/*
@@ -71,7 +195,8 @@ hb_unicode_funcs_set_user_data (hb_unicode_funcs_t *ufuncs,
hb_user_data_key_t *key,
void * data,
hb_destroy_func_t destroy,
- hb_bool_t replace);
+ hb_bool_t replace);
+
void *
hb_unicode_funcs_get_user_data (hb_unicode_funcs_t *ufuncs,
@@ -94,7 +219,7 @@ hb_unicode_funcs_get_parent (hb_unicode_funcs_t *ufuncs);
/* typedefs */
-typedef unsigned int (*hb_unicode_combining_class_func_t) (hb_unicode_funcs_t *ufuncs,
+typedef hb_unicode_combining_class_t (*hb_unicode_combining_class_func_t) (hb_unicode_funcs_t *ufuncs,
hb_codepoint_t unicode,
void *user_data);
typedef unsigned int (*hb_unicode_eastasian_width_func_t) (hb_unicode_funcs_t *ufuncs,
@@ -121,47 +246,165 @@ typedef hb_bool_t (*hb_unicode_decompose_func_t) (hb_unicode_funcs_t *ufuncs,
hb_codepoint_t *b,
void *user_data);
+/**
+ * hb_unicode_decompose_compatibility_func_t:
+ * @ufuncs: a Unicode function structure
+ * @u: codepoint to decompose
+ * @decomposed: address of codepoint array (of length %HB_UNICODE_MAX_DECOMPOSITION_LEN) to write decomposition into
+ * @user_data: user data pointer as passed to hb_unicode_funcs_set_decompose_compatibility_func()
+ *
+ * Fully decompose @u to its Unicode compatibility decomposition. The codepoints of the decomposition will be written to @decomposed.
+ * The complete length of the decomposition will be returned.
+ *
+ * If @u has no compatibility decomposition, zero should be returned.
+ *
+ * The Unicode standard guarantees that a buffer of length %HB_UNICODE_MAX_DECOMPOSITION_LEN codepoints will always be sufficient for any
+ * compatibility decomposition plus an terminating value of 0. Consequently, @decompose must be allocated by the caller to be at least this length. Implementations
+ * of this function type must ensure that they do not write past the provided array.
+ *
+ * Return value: number of codepoints in the full compatibility decomposition of @u, or 0 if no decomposition available.
+ */
+typedef unsigned int (*hb_unicode_decompose_compatibility_func_t) (hb_unicode_funcs_t *ufuncs,
+ hb_codepoint_t u,
+ hb_codepoint_t *decomposed,
+ void *user_data);
+
+/* See Unicode 6.1 for details on the maximum decomposition length. */
+#define HB_UNICODE_MAX_DECOMPOSITION_LEN (18+1) /* codepoints */
+
/* setters */
+/**
+ * hb_unicode_funcs_set_combining_class_func:
+ * @ufuncs: a Unicode function structure
+ * @func: (closure user_data) (destroy destroy) (scope notified):
+ * @user_data:
+ * @destroy:
+ *
+ *
+ *
+ * Since: 1.0
+ **/
void
hb_unicode_funcs_set_combining_class_func (hb_unicode_funcs_t *ufuncs,
- hb_unicode_combining_class_func_t combining_class_func,
+ hb_unicode_combining_class_func_t func,
void *user_data, hb_destroy_func_t destroy);
+/**
+ * hb_unicode_funcs_set_eastasian_width_func:
+ * @ufuncs: a Unicode function structure
+ * @func: (closure user_data) (destroy destroy) (scope notified):
+ * @user_data:
+ * @destroy:
+ *
+ *
+ *
+ * Since: 1.0
+ **/
void
hb_unicode_funcs_set_eastasian_width_func (hb_unicode_funcs_t *ufuncs,
- hb_unicode_eastasian_width_func_t eastasian_width_func,
+ hb_unicode_eastasian_width_func_t func,
void *user_data, hb_destroy_func_t destroy);
+/**
+ * hb_unicode_funcs_set_general_category_func:
+ * @ufuncs: a Unicode function structure
+ * @func: (closure user_data) (destroy destroy) (scope notified):
+ * @user_data:
+ * @destroy:
+ *
+ *
+ *
+ * Since: 1.0
+ **/
void
hb_unicode_funcs_set_general_category_func (hb_unicode_funcs_t *ufuncs,
- hb_unicode_general_category_func_t general_category_func,
+ hb_unicode_general_category_func_t func,
void *user_data, hb_destroy_func_t destroy);
+/**
+ * hb_unicode_funcs_set_mirroring_func:
+ * @ufuncs: a Unicode function structure
+ * @func: (closure user_data) (destroy destroy) (scope notified):
+ * @user_data:
+ * @destroy:
+ *
+ *
+ *
+ * Since: 1.0
+ **/
void
hb_unicode_funcs_set_mirroring_func (hb_unicode_funcs_t *ufuncs,
- hb_unicode_mirroring_func_t mirroring_func,
+ hb_unicode_mirroring_func_t func,
void *user_data, hb_destroy_func_t destroy);
+/**
+ * hb_unicode_funcs_set_script_func:
+ * @ufuncs: a Unicode function structure
+ * @func: (closure user_data) (destroy destroy) (scope notified):
+ * @user_data:
+ * @destroy:
+ *
+ *
+ *
+ * Since: 1.0
+ **/
void
hb_unicode_funcs_set_script_func (hb_unicode_funcs_t *ufuncs,
- hb_unicode_script_func_t script_func,
+ hb_unicode_script_func_t func,
void *user_data, hb_destroy_func_t destroy);
+/**
+ * hb_unicode_funcs_set_compose_func:
+ * @ufuncs: a Unicode function structure
+ * @func: (closure user_data) (destroy destroy) (scope notified):
+ * @user_data:
+ * @destroy:
+ *
+ *
+ *
+ * Since: 1.0
+ **/
void
hb_unicode_funcs_set_compose_func (hb_unicode_funcs_t *ufuncs,
- hb_unicode_compose_func_t compose_func,
+ hb_unicode_compose_func_t func,
void *user_data, hb_destroy_func_t destroy);
+/**
+ * hb_unicode_funcs_set_decompose_func:
+ * @ufuncs: a Unicode function structure
+ * @func: (closure user_data) (destroy destroy) (scope notified):
+ * @user_data:
+ * @destroy:
+ *
+ *
+ *
+ * Since: 1.0
+ **/
void
hb_unicode_funcs_set_decompose_func (hb_unicode_funcs_t *ufuncs,
- hb_unicode_decompose_func_t decompose_func,
+ hb_unicode_decompose_func_t func,
void *user_data, hb_destroy_func_t destroy);
+/**
+ * hb_unicode_funcs_set_decompose_compatibility_func:
+ * @ufuncs: a Unicode function structure
+ * @func: (closure user_data) (destroy destroy) (scope notified):
+ * @user_data:
+ * @destroy:
+ *
+ *
+ *
+ * Since: 1.0
+ **/
+void
+hb_unicode_funcs_set_decompose_compatibility_func (hb_unicode_funcs_t *ufuncs,
+ hb_unicode_decompose_compatibility_func_t func,
+ void *user_data, hb_destroy_func_t destroy);
/* accessors */
-unsigned int
+hb_unicode_combining_class_t
hb_unicode_combining_class (hb_unicode_funcs_t *ufuncs,
hb_codepoint_t unicode);
@@ -192,6 +435,11 @@ hb_unicode_decompose (hb_unicode_funcs_t *ufuncs,
hb_codepoint_t *a,
hb_codepoint_t *b);
+unsigned int
+hb_unicode_decompose_compatibility (hb_unicode_funcs_t *ufuncs,
+ hb_codepoint_t u,
+ hb_codepoint_t *decomposed);
+
HB_END_DECLS
#endif /* HB_UNICODE_H */