hb-unicode.cc 10.2 KB
Newer Older
B
Behdad Esfahbod 已提交
1
/*
B
Behdad Esfahbod 已提交
2
 * Copyright © 2009  Red Hat, Inc.
3
 * Copyright © 2011 Codethink Limited
B
Behdad Esfahbod 已提交
4
 * Copyright © 2010,2011  Google, Inc.
B
Behdad Esfahbod 已提交
5
 *
B
Behdad Esfahbod 已提交
6
 *  This is part of HarfBuzz, a text shaping library.
B
Behdad Esfahbod 已提交
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
 *
 * Permission is hereby granted, without written agreement and without
 * license or royalty fees, to use, copy, modify, and distribute this
 * software and its documentation for any purpose, provided that the
 * above copyright notice and the following two paragraphs appear in
 * all copies of this software.
 *
 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
 * DAMAGE.
 *
 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
 *
 * Red Hat Author(s): Behdad Esfahbod
27
 * Codethink Author(s): Ryan Lortie
28
 * Google Author(s): Behdad Esfahbod
B
Behdad Esfahbod 已提交
29 30
 */

31
#include "hb-private.hh"
B
Behdad Esfahbod 已提交
32

33
#include "hb-unicode-private.hh"
B
Behdad Esfahbod 已提交
34

B
Behdad Esfahbod 已提交
35 36


B
Behdad Esfahbod 已提交
37 38 39 40
/*
 * hb_unicode_funcs_t
 */

B
Behdad Esfahbod 已提交
41
static unsigned int
42 43 44
hb_unicode_combining_class_nil (hb_unicode_funcs_t *ufuncs    HB_UNUSED,
				hb_codepoint_t      unicode   HB_UNUSED,
				void               *user_data HB_UNUSED)
B
Behdad Esfahbod 已提交
45 46
{
  return 0;
47 48
}

B
Behdad Esfahbod 已提交
49
static unsigned int
50 51 52
hb_unicode_eastasian_width_nil (hb_unicode_funcs_t *ufuncs    HB_UNUSED,
				hb_codepoint_t      unicode   HB_UNUSED,
				void               *user_data HB_UNUSED)
B
Behdad Esfahbod 已提交
53 54 55 56 57
{
  return 1;
}

static hb_unicode_general_category_t
58 59 60
hb_unicode_general_category_nil (hb_unicode_funcs_t *ufuncs    HB_UNUSED,
				 hb_codepoint_t      unicode   HB_UNUSED,
				 void               *user_data HB_UNUSED)
B
Behdad Esfahbod 已提交
61 62 63 64 65
{
  return HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER;
}

static hb_codepoint_t
66 67 68
hb_unicode_mirroring_nil (hb_unicode_funcs_t *ufuncs    HB_UNUSED,
			  hb_codepoint_t      unicode   HB_UNUSED,
			  void               *user_data HB_UNUSED)
B
Behdad Esfahbod 已提交
69 70 71 72 73
{
  return unicode;
}

static hb_script_t
74 75 76
hb_unicode_script_nil (hb_unicode_funcs_t *ufuncs    HB_UNUSED,
		       hb_codepoint_t      unicode   HB_UNUSED,
		       void               *user_data HB_UNUSED)
B
Behdad Esfahbod 已提交
77 78 79
{
  return HB_SCRIPT_UNKNOWN;
}
80

81
static hb_bool_t
82
hb_unicode_compose_nil (hb_unicode_funcs_t *ufuncs    HB_UNUSED,
83 84 85 86 87
			hb_codepoint_t      a         HB_UNUSED,
			hb_codepoint_t      b         HB_UNUSED,
			hb_codepoint_t     *ab        HB_UNUSED,
			void               *user_data HB_UNUSED)
{
88
  return false;
89 90 91
}

static hb_bool_t
92
hb_unicode_decompose_nil (hb_unicode_funcs_t *ufuncs    HB_UNUSED,
93 94 95 96 97
			  hb_codepoint_t      ab        HB_UNUSED,
			  hb_codepoint_t     *a         HB_UNUSED,
			  hb_codepoint_t     *b         HB_UNUSED,
			  void               *user_data HB_UNUSED)
{
98
  return false;
99 100
}

101

102

103 104 105
hb_unicode_funcs_t *
hb_unicode_funcs_get_default (void)
{
106
  return const_cast<hb_unicode_funcs_t *> (&_hb_unicode_funcs_default);
107 108
}

B
Behdad Esfahbod 已提交
109
hb_unicode_funcs_t *
110
hb_unicode_funcs_create (hb_unicode_funcs_t *parent)
B
Behdad Esfahbod 已提交
111 112 113
{
  hb_unicode_funcs_t *ufuncs;

114
  if (!(ufuncs = hb_object_create<hb_unicode_funcs_t> ()))
115
    return hb_unicode_funcs_get_empty ();
B
Behdad Esfahbod 已提交
116

117
  if (!parent)
118
    parent = hb_unicode_funcs_get_empty ();
119

120 121
  hb_unicode_funcs_make_immutable (parent);
  ufuncs->parent = hb_unicode_funcs_reference (parent);
122

123
  ufuncs->func = parent->func;
124 125 126 127 128

  /* We can safely copy user_data from parent since we hold a reference
   * onto it and it's immutable.  We should not copy the destroy notifiers
   * though. */
  ufuncs->user_data = parent->user_data;
129

B
Behdad Esfahbod 已提交
130 131 132
  return ufuncs;
}

133

134 135 136 137 138
extern HB_INTERNAL const hb_unicode_funcs_t _hb_unicode_funcs_nil;
const hb_unicode_funcs_t _hb_unicode_funcs_nil = {
  HB_OBJECT_HEADER_STATIC,

  NULL, /* parent */
139
  true, /* immutable */
140
  {
141
#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_unicode_##name##_nil,
142
    HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
143
#undef HB_UNICODE_FUNC_IMPLEMENT
144 145
  }
};
146

147 148 149
hb_unicode_funcs_t *
hb_unicode_funcs_get_empty (void)
{
150
  return const_cast<hb_unicode_funcs_t *> (&_hb_unicode_funcs_nil);
151 152
}

B
Behdad Esfahbod 已提交
153 154 155
hb_unicode_funcs_t *
hb_unicode_funcs_reference (hb_unicode_funcs_t *ufuncs)
{
156
  return hb_object_reference (ufuncs);
B
Behdad Esfahbod 已提交
157 158 159 160 161
}

void
hb_unicode_funcs_destroy (hb_unicode_funcs_t *ufuncs)
{
162
  if (!hb_object_destroy (ufuncs)) return;
B
Behdad Esfahbod 已提交
163

B
Behdad Esfahbod 已提交
164
#define HB_UNICODE_FUNC_IMPLEMENT(name) \
B
Behdad Esfahbod 已提交
165 166 167
  if (ufuncs->destroy.name) ufuncs->destroy.name (ufuncs->user_data.name);
    HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
#undef HB_UNICODE_FUNC_IMPLEMENT
168

169
  hb_unicode_funcs_destroy (ufuncs->parent);
170

B
Behdad Esfahbod 已提交
171 172 173
  free (ufuncs);
}

174 175 176 177
hb_bool_t
hb_unicode_funcs_set_user_data (hb_unicode_funcs_t *ufuncs,
			        hb_user_data_key_t *key,
			        void *              data,
178 179
			        hb_destroy_func_t   destroy,
				hb_bool_t           replace)
180
{
181
  return hb_object_set_user_data (ufuncs, key, data, destroy, replace);
182 183 184 185 186 187 188 189 190 191
}

void *
hb_unicode_funcs_get_user_data (hb_unicode_funcs_t *ufuncs,
			        hb_user_data_key_t *key)
{
  return hb_object_get_user_data (ufuncs, key);
}


192 193 194
void
hb_unicode_funcs_make_immutable (hb_unicode_funcs_t *ufuncs)
{
195
  if (hb_object_is_inert (ufuncs))
196 197
    return;

198
  ufuncs->immutable = true;
199 200
}

B
Behdad Esfahbod 已提交
201 202 203 204 205 206
hb_bool_t
hb_unicode_funcs_is_immutable (hb_unicode_funcs_t *ufuncs)
{
  return ufuncs->immutable;
}

207 208 209
hb_unicode_funcs_t *
hb_unicode_funcs_get_parent (hb_unicode_funcs_t *ufuncs)
{
210
  return ufuncs->parent ? ufuncs->parent : hb_unicode_funcs_get_empty ();
211 212
}

B
Behdad Esfahbod 已提交
213

B
Behdad Esfahbod 已提交
214
#define HB_UNICODE_FUNC_IMPLEMENT(name)						\
B
Behdad Esfahbod 已提交
215 216 217
										\
void										\
hb_unicode_funcs_set_##name##_func (hb_unicode_funcs_t		   *ufuncs,	\
218
				    hb_unicode_##name##_func_t	    func,	\
B
Behdad Esfahbod 已提交
219 220 221 222 223 224 225 226 227 228
				    void			   *user_data,	\
				    hb_destroy_func_t		    destroy)	\
{										\
  if (ufuncs->immutable)							\
    return;									\
										\
  if (ufuncs->destroy.name)							\
    ufuncs->destroy.name (ufuncs->user_data.name);				\
										\
  if (func) {									\
229
    ufuncs->func.name = func;							\
B
Behdad Esfahbod 已提交
230 231 232
    ufuncs->user_data.name = user_data;						\
    ufuncs->destroy.name = destroy;						\
  } else {									\
233
    ufuncs->func.name = ufuncs->parent->func.name;				\
B
Behdad Esfahbod 已提交
234 235 236
    ufuncs->user_data.name = ufuncs->parent->user_data.name;			\
    ufuncs->destroy.name = NULL;						\
  }										\
B
Behdad Esfahbod 已提交
237 238 239 240 241 242 243
}

    HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
#undef HB_UNICODE_FUNC_IMPLEMENT


#define HB_UNICODE_FUNC_IMPLEMENT(return_type, name)				\
B
Behdad Esfahbod 已提交
244 245
										\
return_type									\
246 247
hb_unicode_##name (hb_unicode_funcs_t *ufuncs,					\
		   hb_codepoint_t      unicode)					\
B
Behdad Esfahbod 已提交
248
{										\
249
  return ufuncs->func.name (ufuncs, unicode, ufuncs->user_data.name);		\
B
Behdad Esfahbod 已提交
250
}
B
Behdad Esfahbod 已提交
251
    HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS_SIMPLE
B
Behdad Esfahbod 已提交
252
#undef HB_UNICODE_FUNC_IMPLEMENT
B
Behdad Esfahbod 已提交
253

254 255 256 257 258 259
hb_bool_t
hb_unicode_compose (hb_unicode_funcs_t *ufuncs,
		    hb_codepoint_t      a,
		    hb_codepoint_t      b,
		    hb_codepoint_t     *ab)
{
260
  *ab = 0;
261 262 263 264 265 266 267 268 269
  return ufuncs->func.compose (ufuncs, a, b, ab, ufuncs->user_data.compose);
}

hb_bool_t
hb_unicode_decompose (hb_unicode_funcs_t *ufuncs,
		      hb_codepoint_t      ab,
		      hb_codepoint_t     *a,
		      hb_codepoint_t     *b)
{
270 271 272
  /* XXX FIXME, move these to complex shapers and propagage to normalizer.*/
  switch (ab) {
    case 0x0AC9  : *a = 0x0AC5; *b= 0x0ABE; return true;
273

274
    case 0x0931  : return false;
275
    case 0x0B94  : return false;
276

277 278 279 280 281 282 283
    /* These ones have Unicode decompositions, but we do it
     * this way to be close to what Uniscribe does. */
    case 0x0DDA  : *a = 0x0DD9; *b= 0x0DDA; return true;
    case 0x0DDC  : *a = 0x0DD9; *b= 0x0DDC; return true;
    case 0x0DDD  : *a = 0x0DD9; *b= 0x0DDD; return true;
    case 0x0DDE  : *a = 0x0DD9; *b= 0x0DDE; return true;

284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302
    case 0x0F77  : *a = 0x0FB2; *b= 0x0F81; return true;
    case 0x0F79  : *a = 0x0FB3; *b= 0x0F81; return true;
    case 0x17BE  : *a = 0x17C1; *b= 0x17BE; return true;
    case 0x17BF  : *a = 0x17C1; *b= 0x17BF; return true;
    case 0x17C0  : *a = 0x17C1; *b= 0x17C0; return true;
    case 0x17C4  : *a = 0x17C1; *b= 0x17C4; return true;
    case 0x17C5  : *a = 0x17C1; *b= 0x17C5; return true;
    case 0x1925  : *a = 0x1920; *b= 0x1923; return true;
    case 0x1926  : *a = 0x1920; *b= 0x1924; return true;
    case 0x1B3C  : *a = 0x1B42; *b= 0x1B3C; return true;
    case 0x1112E  : *a = 0x11127; *b= 0x11131; return true;
    case 0x1112F  : *a = 0x11127; *b= 0x11132; return true;
#if 0
    case 0x0B57  : *a = 0xno decomp, -> RIGHT; return true;
    case 0x1C29  : *a = 0xno decomp, -> LEFT; return true;
    case 0xA9C0  : *a = 0xno decomp, -> RIGHT; return true;
    case 0x111BF  : *a = 0xno decomp, -> ABOVE; return true;
#endif
  }
303
  *a = ab; *b = 0;
304 305
  return ufuncs->func.decompose (ufuncs, ab, a, b, ufuncs->user_data.decompose);
}
B
Behdad Esfahbod 已提交
306

B
Behdad Esfahbod 已提交
307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361


unsigned int
_hb_unicode_modified_combining_class (hb_unicode_funcs_t *ufuncs,
				      hb_codepoint_t      unicode)
{
  int c = hb_unicode_combining_class (ufuncs, unicode);

  if (unlikely (hb_in_range<int> (c, 27, 33)))
  {
    /* Modify the combining-class to suit Arabic better.  See:
     * http://unicode.org/faq/normalization.html#8
     * http://unicode.org/faq/normalization.html#9
     */
    c = c == 33 ? 27 : c + 1;
  }
  else if (unlikely (hb_in_range<int> (c, 10, 25)))
  {
    /* The equivalent fix for Hebrew is more complex.
     *
     * We permute the "fixed-position" classes 10-25 into the order
     * described in the SBL Hebrew manual:
     *
     * http://www.sbl-site.org/Fonts/SBLHebrewUserManual1.5x.pdf
     *
     * (as recommended by:
     *  http://forum.fontlab.com/archive-old-microsoft-volt-group/vista-and-diacritic-ordering-t6751.0.html)
     *
     * More details here:
     * https://bugzilla.mozilla.org/show_bug.cgi?id=662055
     */
    static const int permuted_hebrew_classes[25 - 10 + 1] = {
      /* 10 sheva */        22,
      /* 11 hataf segol */  15,
      /* 12 hataf patah */  16,
      /* 13 hataf qamats */ 17,
      /* 14 hiriq */        23,
      /* 15 tsere */        18,
      /* 16 segol */        19,
      /* 17 patah */        20,
      /* 18 qamats */       21,
      /* 19 holam */        14,
      /* 20 qubuts */       24,
      /* 21 dagesh */       12,
      /* 22 meteg */        25,
      /* 23 rafe */         13,
      /* 24 shin dot */     10,
      /* 25 sin dot */      11,
    };
    c = permuted_hebrew_classes[c - 10];
  }

  return c;
}