From 0809aadd4bbd5d0f256407def7cc10b79772a824 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Wed, 20 Apr 2011 02:44:29 -0400 Subject: [PATCH] [glib] Add two-way script conversion functions Also optimizes the common-direction script lookup to be an array lookup. --- src/hb-glib.cc | 270 +++++++++++++++++++++++++------------------------ src/hb-glib.h | 8 ++ 2 files changed, 148 insertions(+), 130 deletions(-) diff --git a/src/hb-glib.cc b/src/hb-glib.cc index a87031b1..158a2104 100644 --- a/src/hb-glib.cc +++ b/src/hb-glib.cc @@ -34,6 +34,145 @@ HB_BEGIN_DECLS +static const hb_script_t +glib_script_to_script[] = +{ + HB_SCRIPT_COMMON, + HB_SCRIPT_INHERITED, + HB_SCRIPT_ARABIC, + HB_SCRIPT_ARMENIAN, + HB_SCRIPT_BENGALI, + HB_SCRIPT_BOPOMOFO, + HB_SCRIPT_CHEROKEE, + HB_SCRIPT_COPTIC, + HB_SCRIPT_CYRILLIC, + HB_SCRIPT_DESERET, + HB_SCRIPT_DEVANAGARI, + HB_SCRIPT_ETHIOPIC, + HB_SCRIPT_GEORGIAN, + HB_SCRIPT_GOTHIC, + HB_SCRIPT_GREEK, + HB_SCRIPT_GUJARATI, + HB_SCRIPT_GURMUKHI, + HB_SCRIPT_HAN, + HB_SCRIPT_HANGUL, + HB_SCRIPT_HEBREW, + HB_SCRIPT_HIRAGANA, + HB_SCRIPT_KANNADA, + HB_SCRIPT_KATAKANA, + HB_SCRIPT_KHMER, + HB_SCRIPT_LAO, + HB_SCRIPT_LATIN, + HB_SCRIPT_MALAYALAM, + HB_SCRIPT_MONGOLIAN, + HB_SCRIPT_MYANMAR, + HB_SCRIPT_OGHAM, + HB_SCRIPT_OLD_ITALIC, + HB_SCRIPT_ORIYA, + HB_SCRIPT_RUNIC, + HB_SCRIPT_SINHALA, + HB_SCRIPT_SYRIAC, + HB_SCRIPT_TAMIL, + HB_SCRIPT_TELUGU, + HB_SCRIPT_THAANA, + HB_SCRIPT_THAI, + HB_SCRIPT_TIBETAN, + HB_SCRIPT_CANADIAN_ABORIGINAL, + HB_SCRIPT_YI, + HB_SCRIPT_TAGALOG, + HB_SCRIPT_HANUNOO, + HB_SCRIPT_BUHID, + HB_SCRIPT_TAGBANWA, + + /* Unicode-4.0 additions */ + HB_SCRIPT_BRAILLE, + HB_SCRIPT_CYPRIOT, + HB_SCRIPT_LIMBU, + HB_SCRIPT_OSMANYA, + HB_SCRIPT_SHAVIAN, + HB_SCRIPT_LINEAR_B, + HB_SCRIPT_TAI_LE, + HB_SCRIPT_UGARITIC, + + /* Unicode-4.1 additions */ + HB_SCRIPT_NEW_TAI_LUE, + HB_SCRIPT_BUGINESE, + HB_SCRIPT_GLAGOLITIC, + HB_SCRIPT_TIFINAGH, + HB_SCRIPT_SYLOTI_NAGRI, + HB_SCRIPT_OLD_PERSIAN, + HB_SCRIPT_KHAROSHTHI, + + /* Unicode-5.0 additions */ + HB_SCRIPT_UNKNOWN, + HB_SCRIPT_BALINESE, + HB_SCRIPT_CUNEIFORM, + HB_SCRIPT_PHOENICIAN, + HB_SCRIPT_PHAGS_PA, + HB_SCRIPT_NKO, + + /* Unicode-5.1 additions */ + HB_SCRIPT_KAYAH_LI, + HB_SCRIPT_LEPCHA, + HB_SCRIPT_REJANG, + HB_SCRIPT_SUNDANESE, + HB_SCRIPT_SAURASHTRA, + HB_SCRIPT_CHAM, + HB_SCRIPT_OL_CHIKI, + HB_SCRIPT_VAI, + HB_SCRIPT_CARIAN, + HB_SCRIPT_LYCIAN, + HB_SCRIPT_LYDIAN, + + /* Unicode-5.2 additions */ + HB_SCRIPT_AVESTAN, + HB_SCRIPT_BAMUM, + HB_SCRIPT_EGYPTIAN_HIEROGLYPHS, + HB_SCRIPT_IMPERIAL_ARAMAIC, + HB_SCRIPT_INSCRIPTIONAL_PAHLAVI, + HB_SCRIPT_INSCRIPTIONAL_PARTHIAN, + HB_SCRIPT_JAVANESE, + HB_SCRIPT_KAITHI, + HB_SCRIPT_TAI_THAM, + HB_SCRIPT_LISU, + HB_SCRIPT_MEETEI_MAYEK, + HB_SCRIPT_OLD_SOUTH_ARABIAN, + HB_SCRIPT_OLD_TURKIC, + HB_SCRIPT_SAMARITAN, + HB_SCRIPT_TAI_VIET, + + /* Unicode-6.0 additions */ + HB_SCRIPT_BATAK, + HB_SCRIPT_BRAHMI, + HB_SCRIPT_MANDAIC +}; + +hb_script_t +hb_glib_script_to_script (GUnicodeScript script) +{ + if (likely ((unsigned int) script < ARRAY_LENGTH (glib_script_to_script))) + return glib_script_to_script[script]; + + if (unlikely (script == G_UNICODE_SCRIPT_INVALID_CODE)) + return HB_SCRIPT_INVALID; + + return HB_SCRIPT_UNKNOWN; +} + +GUnicodeScript +hb_glib_script_from_script (hb_script_t script) +{ + unsigned int count = ARRAY_LENGTH (glib_script_to_script); + for (unsigned int i = 0; i < count; i++) + if (glib_script_to_script[i] == script) + return (GUnicodeScript) i; + + if (unlikely (script == HB_SCRIPT_INVALID)) + return G_UNICODE_SCRIPT_INVALID_CODE; + + return G_UNICODE_SCRIPT_UNKNOWN; +} + static unsigned int hb_glib_get_combining_class (hb_unicode_funcs_t *ufuncs, @@ -76,136 +215,7 @@ hb_glib_get_script (hb_unicode_funcs_t *ufuncs, hb_codepoint_t unicode, void *user_data) { - GUnicodeScript script = g_unichar_get_script (unicode); - switch (script) - { -#define MATCH_SCRIPT(C) case G_UNICODE_SCRIPT_##C: return HB_SCRIPT_##C -#define MATCH_SCRIPT2(C1, C2) case G_UNICODE_SCRIPT_##C1: return HB_SCRIPT_##C2 - - MATCH_SCRIPT2(INVALID_CODE, INVALID); - - MATCH_SCRIPT (COMMON); - MATCH_SCRIPT (INHERITED); - MATCH_SCRIPT (ARABIC); - MATCH_SCRIPT (ARMENIAN); - MATCH_SCRIPT (BENGALI); - MATCH_SCRIPT (BOPOMOFO); - MATCH_SCRIPT (CHEROKEE); - MATCH_SCRIPT (COPTIC); - MATCH_SCRIPT (CYRILLIC); - MATCH_SCRIPT (DESERET); - MATCH_SCRIPT (DEVANAGARI); - MATCH_SCRIPT (ETHIOPIC); - MATCH_SCRIPT (GEORGIAN); - MATCH_SCRIPT (GOTHIC); - MATCH_SCRIPT (GREEK); - MATCH_SCRIPT (GUJARATI); - MATCH_SCRIPT (GURMUKHI); - MATCH_SCRIPT (HAN); - MATCH_SCRIPT (HANGUL); - MATCH_SCRIPT (HEBREW); - MATCH_SCRIPT (HIRAGANA); - MATCH_SCRIPT (KANNADA); - MATCH_SCRIPT (KATAKANA); - MATCH_SCRIPT (KHMER); - MATCH_SCRIPT (LAO); - MATCH_SCRIPT (LATIN); - MATCH_SCRIPT (MALAYALAM); - MATCH_SCRIPT (MONGOLIAN); - MATCH_SCRIPT (MYANMAR); - MATCH_SCRIPT (OGHAM); - MATCH_SCRIPT (OLD_ITALIC); - MATCH_SCRIPT (ORIYA); - MATCH_SCRIPT (RUNIC); - MATCH_SCRIPT (SINHALA); - MATCH_SCRIPT (SYRIAC); - MATCH_SCRIPT (TAMIL); - MATCH_SCRIPT (TELUGU); - MATCH_SCRIPT (THAANA); - MATCH_SCRIPT (THAI); - MATCH_SCRIPT (TIBETAN); - MATCH_SCRIPT (CANADIAN_ABORIGINAL); - MATCH_SCRIPT (YI); - MATCH_SCRIPT (TAGALOG); - MATCH_SCRIPT (HANUNOO); - MATCH_SCRIPT (BUHID); - MATCH_SCRIPT (TAGBANWA); - - /* Unicode-4.0 additions */ - MATCH_SCRIPT (BRAILLE); - MATCH_SCRIPT (CYPRIOT); - MATCH_SCRIPT (LIMBU); - MATCH_SCRIPT (OSMANYA); - MATCH_SCRIPT (SHAVIAN); - MATCH_SCRIPT (LINEAR_B); - MATCH_SCRIPT (TAI_LE); - MATCH_SCRIPT (UGARITIC); - - /* Unicode-4.1 additions */ - MATCH_SCRIPT (NEW_TAI_LUE); - MATCH_SCRIPT (BUGINESE); - MATCH_SCRIPT (GLAGOLITIC); - MATCH_SCRIPT (TIFINAGH); - MATCH_SCRIPT (SYLOTI_NAGRI); - MATCH_SCRIPT (OLD_PERSIAN); - MATCH_SCRIPT (KHAROSHTHI); - - /* Unicode-5.0 additions */ - MATCH_SCRIPT (UNKNOWN); - MATCH_SCRIPT (BALINESE); - MATCH_SCRIPT (CUNEIFORM); - MATCH_SCRIPT (PHOENICIAN); - MATCH_SCRIPT (PHAGS_PA); - MATCH_SCRIPT (NKO); - - /* Unicode-5.1 additions */ - MATCH_SCRIPT (KAYAH_LI); - MATCH_SCRIPT (LEPCHA); - MATCH_SCRIPT (REJANG); - MATCH_SCRIPT (SUNDANESE); - MATCH_SCRIPT (SAURASHTRA); - MATCH_SCRIPT (CHAM); - MATCH_SCRIPT (OL_CHIKI); - MATCH_SCRIPT (VAI); - MATCH_SCRIPT (CARIAN); - MATCH_SCRIPT (LYCIAN); - MATCH_SCRIPT (LYDIAN); - - /* Unicode-5.2 additions */ -#if GLIB_CHECK_VERSION(2,26,0) - MATCH_SCRIPT (AVESTAN); - MATCH_SCRIPT (BAMUM); - MATCH_SCRIPT (EGYPTIAN_HIEROGLYPHS); - MATCH_SCRIPT (IMPERIAL_ARAMAIC); - MATCH_SCRIPT (INSCRIPTIONAL_PAHLAVI); - MATCH_SCRIPT (INSCRIPTIONAL_PARTHIAN); - MATCH_SCRIPT (JAVANESE); - MATCH_SCRIPT (KAITHI); - MATCH_SCRIPT (TAI_THAM); - MATCH_SCRIPT (LISU); - MATCH_SCRIPT (MEETEI_MAYEK); - MATCH_SCRIPT (OLD_SOUTH_ARABIAN); -#if GLIB_CHECK_VERSION(2,27,92) - MATCH_SCRIPT (OLD_TURKIC); -#else - MATCH_SCRIPT2(OLD_TURKISH, OLD_TURKIC); -#endif - MATCH_SCRIPT (SAMARITAN); - MATCH_SCRIPT (TAI_VIET); -#endif - - /* Unicode-6.0 additions */ -#if GLIB_CHECK_VERSION(2,28,0) - MATCH_SCRIPT (BATAK); - MATCH_SCRIPT (BRAHMI); - MATCH_SCRIPT (MANDAIC); -#endif - -#undef MATCH_SCRIPT -#undef MATCH_SCRIPT2 - } - - return HB_SCRIPT_UNKNOWN; + return hb_glib_script_to_script (g_unichar_get_script (unicode)); } static hb_unicode_funcs_t glib_ufuncs = { diff --git a/src/hb-glib.h b/src/hb-glib.h index 81ab15d0..abec2d2d 100644 --- a/src/hb-glib.h +++ b/src/hb-glib.h @@ -28,10 +28,18 @@ #define HB_GLIB_H #include "hb.h" +#include HB_BEGIN_DECLS +hb_script_t +hb_glib_script_to_script (GUnicodeScript script); + +GUnicodeScript +hb_glib_script_from_script (hb_script_t script); + + hb_unicode_funcs_t * hb_glib_get_unicode_funcs (void); -- GitLab