hb-unicode.cc 9.6 KB
Newer Older
B
Behdad Esfahbod 已提交
1
/*
B
Behdad Esfahbod 已提交
2
 * Copyright © 2009  Red Hat, Inc.
3
 * Copyright © 2011 Codethink Limited
B
Behdad Esfahbod 已提交
4
 * Copyright © 2010,2011  Google, Inc.
B
Behdad Esfahbod 已提交
5
 *
B
Behdad Esfahbod 已提交
6
 *  This is part of HarfBuzz, a text shaping library.
B
Behdad Esfahbod 已提交
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
 *
 * Permission is hereby granted, without written agreement and without
 * license or royalty fees, to use, copy, modify, and distribute this
 * software and its documentation for any purpose, provided that the
 * above copyright notice and the following two paragraphs appear in
 * all copies of this software.
 *
 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
 * DAMAGE.
 *
 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
 *
 * Red Hat Author(s): Behdad Esfahbod
27
 * Codethink Author(s): Ryan Lortie
28
 * Google Author(s): Behdad Esfahbod
B
Behdad Esfahbod 已提交
29 30
 */

31
#include "hb-private.hh"
B
Behdad Esfahbod 已提交
32

33
#include "hb-unicode-private.hh"
B
Behdad Esfahbod 已提交
34

B
Behdad Esfahbod 已提交
35 36


B
Behdad Esfahbod 已提交
37 38 39 40
/*
 * hb_unicode_funcs_t
 */

41
static hb_unicode_combining_class_t
42 43 44
hb_unicode_combining_class_nil (hb_unicode_funcs_t *ufuncs    HB_UNUSED,
				hb_codepoint_t      unicode   HB_UNUSED,
				void               *user_data HB_UNUSED)
B
Behdad Esfahbod 已提交
45
{
46
  return HB_UNICODE_COMBINING_CLASS_NOT_REORDERED;
47 48
}

B
Behdad Esfahbod 已提交
49
static unsigned int
50 51 52
hb_unicode_eastasian_width_nil (hb_unicode_funcs_t *ufuncs    HB_UNUSED,
				hb_codepoint_t      unicode   HB_UNUSED,
				void               *user_data HB_UNUSED)
B
Behdad Esfahbod 已提交
53 54 55 56 57
{
  return 1;
}

static hb_unicode_general_category_t
58 59 60
hb_unicode_general_category_nil (hb_unicode_funcs_t *ufuncs    HB_UNUSED,
				 hb_codepoint_t      unicode   HB_UNUSED,
				 void               *user_data HB_UNUSED)
B
Behdad Esfahbod 已提交
61 62 63 64 65
{
  return HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER;
}

static hb_codepoint_t
66 67 68
hb_unicode_mirroring_nil (hb_unicode_funcs_t *ufuncs    HB_UNUSED,
			  hb_codepoint_t      unicode   HB_UNUSED,
			  void               *user_data HB_UNUSED)
B
Behdad Esfahbod 已提交
69 70 71 72 73
{
  return unicode;
}

static hb_script_t
74 75 76
hb_unicode_script_nil (hb_unicode_funcs_t *ufuncs    HB_UNUSED,
		       hb_codepoint_t      unicode   HB_UNUSED,
		       void               *user_data HB_UNUSED)
B
Behdad Esfahbod 已提交
77 78 79
{
  return HB_SCRIPT_UNKNOWN;
}
80

81
static hb_bool_t
82
hb_unicode_compose_nil (hb_unicode_funcs_t *ufuncs    HB_UNUSED,
83 84 85 86 87
			hb_codepoint_t      a         HB_UNUSED,
			hb_codepoint_t      b         HB_UNUSED,
			hb_codepoint_t     *ab        HB_UNUSED,
			void               *user_data HB_UNUSED)
{
88
  return false;
89 90 91
}

static hb_bool_t
92
hb_unicode_decompose_nil (hb_unicode_funcs_t *ufuncs    HB_UNUSED,
93 94 95 96 97
			  hb_codepoint_t      ab        HB_UNUSED,
			  hb_codepoint_t     *a         HB_UNUSED,
			  hb_codepoint_t     *b         HB_UNUSED,
			  void               *user_data HB_UNUSED)
{
98
  return false;
99 100
}

101

102 103 104 105 106 107 108 109 110
static unsigned int
hb_unicode_decompose_compatibility_nil (hb_unicode_funcs_t *ufuncs     HB_UNUSED,
					hb_codepoint_t      u          HB_UNUSED,
					hb_codepoint_t     *decomposed HB_UNUSED,
					void               *user_data  HB_UNUSED)
{
  return 0;
}

111

112 113 114
hb_unicode_funcs_t *
hb_unicode_funcs_get_default (void)
{
115
  return const_cast<hb_unicode_funcs_t *> (&_hb_unicode_funcs_default);
116 117
}

B
Behdad Esfahbod 已提交
118
hb_unicode_funcs_t *
119
hb_unicode_funcs_create (hb_unicode_funcs_t *parent)
B
Behdad Esfahbod 已提交
120 121 122
{
  hb_unicode_funcs_t *ufuncs;

123
  if (!(ufuncs = hb_object_create<hb_unicode_funcs_t> ()))
124
    return hb_unicode_funcs_get_empty ();
B
Behdad Esfahbod 已提交
125

126
  if (!parent)
127
    parent = hb_unicode_funcs_get_empty ();
128

129 130
  hb_unicode_funcs_make_immutable (parent);
  ufuncs->parent = hb_unicode_funcs_reference (parent);
131

132
  ufuncs->func = parent->func;
133 134 135 136 137

  /* We can safely copy user_data from parent since we hold a reference
   * onto it and it's immutable.  We should not copy the destroy notifiers
   * though. */
  ufuncs->user_data = parent->user_data;
138

B
Behdad Esfahbod 已提交
139 140 141
  return ufuncs;
}

142

143 144 145 146 147
extern HB_INTERNAL const hb_unicode_funcs_t _hb_unicode_funcs_nil;
const hb_unicode_funcs_t _hb_unicode_funcs_nil = {
  HB_OBJECT_HEADER_STATIC,

  NULL, /* parent */
148
  true, /* immutable */
149
  {
150
#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_unicode_##name##_nil,
151
    HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
152
#undef HB_UNICODE_FUNC_IMPLEMENT
153 154
  }
};
155

156 157 158
hb_unicode_funcs_t *
hb_unicode_funcs_get_empty (void)
{
159
  return const_cast<hb_unicode_funcs_t *> (&_hb_unicode_funcs_nil);
160 161
}

B
Behdad Esfahbod 已提交
162 163 164
hb_unicode_funcs_t *
hb_unicode_funcs_reference (hb_unicode_funcs_t *ufuncs)
{
165
  return hb_object_reference (ufuncs);
B
Behdad Esfahbod 已提交
166 167 168 169 170
}

void
hb_unicode_funcs_destroy (hb_unicode_funcs_t *ufuncs)
{
171
  if (!hb_object_destroy (ufuncs)) return;
B
Behdad Esfahbod 已提交
172

B
Behdad Esfahbod 已提交
173
#define HB_UNICODE_FUNC_IMPLEMENT(name) \
B
Behdad Esfahbod 已提交
174 175 176
  if (ufuncs->destroy.name) ufuncs->destroy.name (ufuncs->user_data.name);
    HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
#undef HB_UNICODE_FUNC_IMPLEMENT
177

178
  hb_unicode_funcs_destroy (ufuncs->parent);
179

B
Behdad Esfahbod 已提交
180 181 182
  free (ufuncs);
}

183 184 185 186
hb_bool_t
hb_unicode_funcs_set_user_data (hb_unicode_funcs_t *ufuncs,
			        hb_user_data_key_t *key,
			        void *              data,
187 188
			        hb_destroy_func_t   destroy,
				hb_bool_t           replace)
189
{
190
  return hb_object_set_user_data (ufuncs, key, data, destroy, replace);
191 192 193 194 195 196 197 198 199 200
}

void *
hb_unicode_funcs_get_user_data (hb_unicode_funcs_t *ufuncs,
			        hb_user_data_key_t *key)
{
  return hb_object_get_user_data (ufuncs, key);
}


201 202 203
void
hb_unicode_funcs_make_immutable (hb_unicode_funcs_t *ufuncs)
{
204
  if (hb_object_is_inert (ufuncs))
205 206
    return;

207
  ufuncs->immutable = true;
208 209
}

B
Behdad Esfahbod 已提交
210 211 212 213 214 215
hb_bool_t
hb_unicode_funcs_is_immutable (hb_unicode_funcs_t *ufuncs)
{
  return ufuncs->immutable;
}

216 217 218
hb_unicode_funcs_t *
hb_unicode_funcs_get_parent (hb_unicode_funcs_t *ufuncs)
{
219
  return ufuncs->parent ? ufuncs->parent : hb_unicode_funcs_get_empty ();
220 221
}

B
Behdad Esfahbod 已提交
222

B
Behdad Esfahbod 已提交
223
#define HB_UNICODE_FUNC_IMPLEMENT(name)						\
B
Behdad Esfahbod 已提交
224 225 226
										\
void										\
hb_unicode_funcs_set_##name##_func (hb_unicode_funcs_t		   *ufuncs,	\
227
				    hb_unicode_##name##_func_t	    func,	\
B
Behdad Esfahbod 已提交
228 229 230 231 232 233 234 235 236 237
				    void			   *user_data,	\
				    hb_destroy_func_t		    destroy)	\
{										\
  if (ufuncs->immutable)							\
    return;									\
										\
  if (ufuncs->destroy.name)							\
    ufuncs->destroy.name (ufuncs->user_data.name);				\
										\
  if (func) {									\
238
    ufuncs->func.name = func;							\
B
Behdad Esfahbod 已提交
239 240 241
    ufuncs->user_data.name = user_data;						\
    ufuncs->destroy.name = destroy;						\
  } else {									\
242
    ufuncs->func.name = ufuncs->parent->func.name;				\
B
Behdad Esfahbod 已提交
243 244 245
    ufuncs->user_data.name = ufuncs->parent->user_data.name;			\
    ufuncs->destroy.name = NULL;						\
  }										\
B
Behdad Esfahbod 已提交
246 247
}

B
Behdad Esfahbod 已提交
248
HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
B
Behdad Esfahbod 已提交
249 250 251 252
#undef HB_UNICODE_FUNC_IMPLEMENT


#define HB_UNICODE_FUNC_IMPLEMENT(return_type, name)				\
B
Behdad Esfahbod 已提交
253 254
										\
return_type									\
255 256
hb_unicode_##name (hb_unicode_funcs_t *ufuncs,					\
		   hb_codepoint_t      unicode)					\
B
Behdad Esfahbod 已提交
257
{										\
B
Behdad Esfahbod 已提交
258
  return ufuncs->name (unicode);						\
B
Behdad Esfahbod 已提交
259
}
B
Behdad Esfahbod 已提交
260
HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS_SIMPLE
B
Behdad Esfahbod 已提交
261
#undef HB_UNICODE_FUNC_IMPLEMENT
B
Behdad Esfahbod 已提交
262

263 264 265 266 267 268
hb_bool_t
hb_unicode_compose (hb_unicode_funcs_t *ufuncs,
		    hb_codepoint_t      a,
		    hb_codepoint_t      b,
		    hb_codepoint_t     *ab)
{
B
Behdad Esfahbod 已提交
269
  return ufuncs->compose (a, b, ab);
270 271 272 273 274 275 276 277
}

hb_bool_t
hb_unicode_decompose (hb_unicode_funcs_t *ufuncs,
		      hb_codepoint_t      ab,
		      hb_codepoint_t     *a,
		      hb_codepoint_t     *b)
{
B
Behdad Esfahbod 已提交
278
  return ufuncs->decompose (ab, a, b);
279
}
B
Behdad Esfahbod 已提交
280

281 282 283 284 285
unsigned int
hb_unicode_decompose_compatibility (hb_unicode_funcs_t *ufuncs,
				    hb_codepoint_t      u,
				    hb_codepoint_t     *decomposed)
{
B
Behdad Esfahbod 已提交
286
  return ufuncs->decompose_compatibility (u, decomposed);
287
}
B
Behdad Esfahbod 已提交
288 289 290


unsigned int
291
hb_unicode_funcs_t::modified_combining_class (hb_codepoint_t unicode)
B
Behdad Esfahbod 已提交
292
{
293
  int c = combining_class (unicode);
B
Behdad Esfahbod 已提交
294 295 296 297 298 299 300 301 302

  if (unlikely (hb_in_range<int> (c, 27, 33)))
  {
    /* Modify the combining-class to suit Arabic better.  See:
     * http://unicode.org/faq/normalization.html#8
     * http://unicode.org/faq/normalization.html#9
     */
    c = c == 33 ? 27 : c + 1;
  }
303
  else if (unlikely (hb_in_range<int> (c, 10, 26)))
B
Behdad Esfahbod 已提交
304 305 306
  {
    /* The equivalent fix for Hebrew is more complex.
     *
307
     * We permute the "fixed-position" classes 10-26 into the order
B
Behdad Esfahbod 已提交
308 309 310 311 312 313 314 315 316 317
     * described in the SBL Hebrew manual:
     *
     * http://www.sbl-site.org/Fonts/SBLHebrewUserManual1.5x.pdf
     *
     * (as recommended by:
     *  http://forum.fontlab.com/archive-old-microsoft-volt-group/vista-and-diacritic-ordering-t6751.0.html)
     *
     * More details here:
     * https://bugzilla.mozilla.org/show_bug.cgi?id=662055
     */
318
    static const int permuted_hebrew_classes[26 - 10 + 1] = {
B
Behdad Esfahbod 已提交
319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334
      /* 10 sheva */        22,
      /* 11 hataf segol */  15,
      /* 12 hataf patah */  16,
      /* 13 hataf qamats */ 17,
      /* 14 hiriq */        23,
      /* 15 tsere */        18,
      /* 16 segol */        19,
      /* 17 patah */        20,
      /* 18 qamats */       21,
      /* 19 holam */        14,
      /* 20 qubuts */       24,
      /* 21 dagesh */       12,
      /* 22 meteg */        25,
      /* 23 rafe */         13,
      /* 24 shin dot */     10,
      /* 25 sin dot */      11,
335
      /* 26 point varika */ 26,
B
Behdad Esfahbod 已提交
336 337 338
    };
    c = permuted_hebrew_classes[c - 10];
  }
339 340 341 342 343 344
  else if (unlikely (unicode == 0x0E3A)) /* THAI VOWEL SIGN PHINTHU */
  {
    /* Assign 104, so it reorders after the THAI ccc=103 marks.
     * Uniscribe does this. */
    c = 104;
  }
345 346 347 348 349 350 351 352
  else if (unlikely (hb_in_range<hb_codepoint_t> (unicode, 0x0C55, 0x0C56)))
  {
    /* Telugu length marks.
     * These are the only matras in the main Indic script range that have
     * a non-zero ccc.  That makes them reorder with the Halant that is
     * ccc=9.  Just zero them, we don't need them in our Indic shaper. */
    c = 0;
  }
B
Behdad Esfahbod 已提交
353 354 355

  return c;
}