hb-ot-shape-complex-arabic-fallback.hh 12.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
/*
 * Copyright © 2012  Google, Inc.
 *
 *  This is part of HarfBuzz, a text shaping library.
 *
 * Permission is hereby granted, without written agreement and without
 * license or royalty fees, to use, copy, modify, and distribute this
 * software and its documentation for any purpose, provided that the
 * above copyright notice and the following two paragraphs appear in
 * all copies of this software.
 *
 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
 * DAMAGE.
 *
 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
 *
 * Google Author(s): Behdad Esfahbod
 */

#ifndef HB_OT_SHAPE_COMPLEX_ARABIC_FALLBACK_HH
#define HB_OT_SHAPE_COMPLEX_ARABIC_FALLBACK_HH

30
#include "hb.hh"
31

32
#include "hb-ot-shape.hh"
33 34 35
#include "hb-ot-layout-gsub-table.hh"


36 37
/* Features ordered the same as the entries in shaping_table rows,
 * followed by rlig.  Don't change. */
38 39 40 41 42 43 44 45 46 47
static const hb_tag_t arabic_fallback_features[] =
{
  HB_TAG('i','n','i','t'),
  HB_TAG('m','e','d','i'),
  HB_TAG('f','i','n','a'),
  HB_TAG('i','s','o','l'),
  HB_TAG('r','l','i','g'),
};

static OT::SubstLookup *
B
Behdad Esfahbod 已提交
48
arabic_fallback_synthesize_lookup_single (const hb_ot_shape_plan_t *plan HB_UNUSED,
49 50 51 52 53 54 55 56 57 58 59 60 61
					  hb_font_t *font,
					  unsigned int feature_index)
{
  OT::GlyphID glyphs[SHAPING_TABLE_LAST - SHAPING_TABLE_FIRST + 1];
  OT::GlyphID substitutes[SHAPING_TABLE_LAST - SHAPING_TABLE_FIRST + 1];
  unsigned int num_glyphs = 0;

  /* Populate arrays */
  for (hb_codepoint_t u = SHAPING_TABLE_FIRST; u < SHAPING_TABLE_LAST + 1; u++)
  {
    hb_codepoint_t s = shaping_table[u - SHAPING_TABLE_FIRST][feature_index];
    hb_codepoint_t u_glyph, s_glyph;

62
    if (!s ||
63 64 65
	!hb_font_get_glyph (font, u, 0, &u_glyph) ||
	!hb_font_get_glyph (font, s, 0, &s_glyph) ||
	u_glyph == s_glyph ||
66
	u_glyph > 0xFFFFu || s_glyph > 0xFFFFu)
67 68 69 70 71 72 73 74
      continue;

    glyphs[num_glyphs].set (u_glyph);
    substitutes[num_glyphs].set (s_glyph);

    num_glyphs++;
  }

75
  if (!num_glyphs)
B
Behdad Esfahbod 已提交
76
    return nullptr;
77

78
  /* Bubble-sort or something equally good!
79
   * May not be good-enough for presidential candidate interviews, but good-enough for us... */
80
  hb_stable_sort (&glyphs[0], num_glyphs, (int(*)(const OT::GlyphID*, const OT::GlyphID *)) OT::GlyphID::cmp, &substitutes[0]);
81

82 83
  hb_supplier_t<OT::GlyphID> glyphs_supplier      (glyphs, num_glyphs);
  hb_supplier_t<OT::GlyphID> substitutes_supplier (substitutes, num_glyphs);
84 85 86

  /* Each glyph takes four bytes max, and there's some overhead. */
  char buf[(SHAPING_TABLE_LAST - SHAPING_TABLE_FIRST + 1) * 4 + 128];
87
  hb_serialize_context_t c (buf, sizeof (buf));
88 89 90 91 92 93 94 95 96
  OT::SubstLookup *lookup = c.start_serialize<OT::SubstLookup> ();
  bool ret = lookup->serialize_single (&c,
				       OT::LookupFlag::IgnoreMarks,
				       glyphs_supplier,
				       substitutes_supplier,
				       num_glyphs);
  c.end_serialize ();
  /* TODO sanitize the results? */

B
Behdad Esfahbod 已提交
97
  return ret ? c.copy<OT::SubstLookup> () : nullptr;
98 99 100
}

static OT::SubstLookup *
B
Behdad Esfahbod 已提交
101
arabic_fallback_synthesize_lookup_ligature (const hb_ot_shape_plan_t *plan HB_UNUSED,
102 103 104 105 106 107 108 109
					    hb_font_t *font)
{
  OT::GlyphID first_glyphs[ARRAY_LENGTH_CONST (ligature_table)];
  unsigned int first_glyphs_indirection[ARRAY_LENGTH_CONST (ligature_table)];
  unsigned int ligature_per_first_glyph_count_list[ARRAY_LENGTH_CONST (first_glyphs)];
  unsigned int num_first_glyphs = 0;

  /* We know that all our ligatures are 2-component */
B
Minor  
Behdad Esfahbod 已提交
110 111 112
  OT::GlyphID ligature_list[ARRAY_LENGTH_CONST (first_glyphs) * ARRAY_LENGTH_CONST(ligature_table[0].ligatures)];
  unsigned int component_count_list[ARRAY_LENGTH_CONST (ligature_list)];
  OT::GlyphID component_list[ARRAY_LENGTH_CONST (ligature_list) * 1/* One extra component per ligature */];
113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
  unsigned int num_ligatures = 0;

  /* Populate arrays */

  /* Sort out the first-glyphs */
  for (unsigned int first_glyph_idx = 0; first_glyph_idx < ARRAY_LENGTH (first_glyphs); first_glyph_idx++)
  {
    hb_codepoint_t first_u = ligature_table[first_glyph_idx].first;
    hb_codepoint_t first_glyph;
    if (!hb_font_get_glyph (font, first_u, 0, &first_glyph))
      continue;
    first_glyphs[num_first_glyphs].set (first_glyph);
    ligature_per_first_glyph_count_list[num_first_glyphs] = 0;
    first_glyphs_indirection[num_first_glyphs] = first_glyph_idx;
    num_first_glyphs++;
  }
129
  hb_stable_sort (&first_glyphs[0], num_first_glyphs, (int(*)(const OT::GlyphID*, const OT::GlyphID *)) OT::GlyphID::cmp, &first_glyphs_indirection[0]);
130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147

  /* Now that the first-glyphs are sorted, walk again, populate ligatures. */
  for (unsigned int i = 0; i < num_first_glyphs; i++)
  {
    unsigned int first_glyph_idx = first_glyphs_indirection[i];

    for (unsigned int second_glyph_idx = 0; second_glyph_idx < ARRAY_LENGTH (ligature_table[0].ligatures); second_glyph_idx++)
    {
      hb_codepoint_t second_u   = ligature_table[first_glyph_idx].ligatures[second_glyph_idx].second;
      hb_codepoint_t ligature_u = ligature_table[first_glyph_idx].ligatures[second_glyph_idx].ligature;
      hb_codepoint_t second_glyph, ligature_glyph;
      if (!second_u ||
	  !hb_font_get_glyph (font, second_u,   0, &second_glyph) ||
	  !hb_font_get_glyph (font, ligature_u, 0, &ligature_glyph))
	continue;

      ligature_per_first_glyph_count_list[i]++;

B
Minor  
Behdad Esfahbod 已提交
148
      ligature_list[num_ligatures].set (ligature_glyph);
149 150 151 152 153 154
      component_count_list[num_ligatures] = 2;
      component_list[num_ligatures].set (second_glyph);
      num_ligatures++;
    }
  }

155
  if (!num_ligatures)
B
Behdad Esfahbod 已提交
156
    return nullptr;
157

158 159 160 161 162
  hb_supplier_t<OT::GlyphID>  first_glyphs_supplier                      (first_glyphs, num_first_glyphs);
  hb_supplier_t<unsigned int> ligature_per_first_glyph_count_supplier    (ligature_per_first_glyph_count_list, num_first_glyphs);
  hb_supplier_t<OT::GlyphID>  ligatures_supplier                         (ligature_list, num_ligatures);
  hb_supplier_t<unsigned int> component_count_supplier                   (component_count_list, num_ligatures);
  hb_supplier_t<OT::GlyphID>  component_supplier                         (component_list, num_ligatures);
163

B
Minor  
Behdad Esfahbod 已提交
164 165
  /* 16 bytes per ligature ought to be enough... */
  char buf[ARRAY_LENGTH_CONST (ligature_list) * 16 + 128];
166
  hb_serialize_context_t c (buf, sizeof (buf));
167 168 169 170 171 172 173 174 175 176 177 178
  OT::SubstLookup *lookup = c.start_serialize<OT::SubstLookup> ();
  bool ret = lookup->serialize_ligature (&c,
					 OT::LookupFlag::IgnoreMarks,
					 first_glyphs_supplier,
					 ligature_per_first_glyph_count_supplier,
					 num_first_glyphs,
					 ligatures_supplier,
					 component_count_supplier,
					 component_supplier);
  c.end_serialize ();
  /* TODO sanitize the results? */

B
Behdad Esfahbod 已提交
179
  return ret ? c.copy<OT::SubstLookup> () : nullptr;
180 181 182 183 184 185 186 187 188 189 190 191 192
}

static OT::SubstLookup *
arabic_fallback_synthesize_lookup (const hb_ot_shape_plan_t *plan,
				   hb_font_t *font,
				   unsigned int feature_index)
{
  if (feature_index < 4)
    return arabic_fallback_synthesize_lookup_single (plan, font, feature_index);
  else
    return arabic_fallback_synthesize_lookup_ligature (plan, font);
}

193
#define ARABIC_FALLBACK_MAX_LOOKUPS 5
194

195 196
struct arabic_fallback_plan_t
{
197 198 199
  unsigned int num_lookups;
  bool free_lookups;

200 201
  hb_mask_t mask_array[ARABIC_FALLBACK_MAX_LOOKUPS];
  OT::SubstLookup *lookup_array[ARABIC_FALLBACK_MAX_LOOKUPS];
B
Behdad Esfahbod 已提交
202
  OT::hb_ot_layout_lookup_accelerator_t accel_array[ARABIC_FALLBACK_MAX_LOOKUPS];
203 204
};

K
Ken Brown 已提交
205
#if defined(_WIN32) && !defined(HB_NO_WIN1256)
206 207 208 209 210 211 212
#define HB_WITH_WIN1256
#endif

#ifdef HB_WITH_WIN1256
#include "hb-ot-shape-complex-arabic-win1256.hh"
#endif

213 214
struct ManifestLookup
{
B
Minor  
Behdad Esfahbod 已提交
215
  public:
216
  OT::Tag tag;
217
  OT::OffsetTo<OT::SubstLookup> lookupOffset;
B
Minor  
Behdad Esfahbod 已提交
218 219
  public:
  DEFINE_SIZE_STATIC (6);
220 221 222 223
};
typedef OT::ArrayOf<ManifestLookup> Manifest;

static bool
224 225 226
arabic_fallback_plan_init_win1256 (arabic_fallback_plan_t *fallback_plan HB_UNUSED,
				   const hb_ot_shape_plan_t *plan HB_UNUSED,
				   hb_font_t *font HB_UNUSED)
227 228 229 230 231 232 233 234 235 236 237 238
{
#ifdef HB_WITH_WIN1256
  /* Does this font look like it's Windows-1256-encoded? */
  hb_codepoint_t g;
  if (!(hb_font_get_glyph (font, 0x0627u, 0, &g) && g == 199 /* ALEF */ &&
	hb_font_get_glyph (font, 0x0644u, 0, &g) && g == 225 /* LAM */ &&
	hb_font_get_glyph (font, 0x0649u, 0, &g) && g == 236 /* ALEF MAKSURA */ &&
	hb_font_get_glyph (font, 0x064Au, 0, &g) && g == 237 /* YEH */ &&
	hb_font_get_glyph (font, 0x0652u, 0, &g) && g == 250 /* SUKUN */))
    return false;

  const Manifest &manifest = reinterpret_cast<const Manifest&> (arabic_win1256_gsub_lookups.manifest);
239 240
  static_assert (sizeof (arabic_win1256_gsub_lookups.manifestData) / sizeof (ManifestLookup)
		 <= ARABIC_FALLBACK_MAX_LOOKUPS, "");
241 242 243 244 245 246
  /* TODO sanitize the table? */

  unsigned j = 0;
  unsigned int count = manifest.len;
  for (unsigned int i = 0; i < count; i++)
  {
247
    fallback_plan->mask_array[j] = plan->map.get_1_mask (manifest[i].tag);
248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272
    if (fallback_plan->mask_array[j])
    {
      fallback_plan->lookup_array[j] = const_cast<OT::SubstLookup*> (&(&manifest+manifest[i].lookupOffset));
      if (fallback_plan->lookup_array[j])
      {
	fallback_plan->accel_array[j].init (*fallback_plan->lookup_array[j]);
	j++;
      }
    }
  }

  fallback_plan->num_lookups = j;
  fallback_plan->free_lookups = false;

  return j > 0;
#else
  return false;
#endif
}

static bool
arabic_fallback_plan_init_unicode (arabic_fallback_plan_t *fallback_plan,
				   const hb_ot_shape_plan_t *plan,
				   hb_font_t *font)
{
273
  static_assert ((ARRAY_LENGTH_CONST(arabic_fallback_features) <= ARABIC_FALLBACK_MAX_LOOKUPS), "");
274
  unsigned int j = 0;
275
  for (unsigned int i = 0; i < ARRAY_LENGTH(arabic_fallback_features) ; i++)
276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294
  {
    fallback_plan->mask_array[j] = plan->map.get_1_mask (arabic_fallback_features[i]);
    if (fallback_plan->mask_array[j])
    {
      fallback_plan->lookup_array[j] = arabic_fallback_synthesize_lookup (plan, font, i);
      if (fallback_plan->lookup_array[j])
      {
	fallback_plan->accel_array[j].init (*fallback_plan->lookup_array[j]);
	j++;
      }
    }
  }

  fallback_plan->num_lookups = j;
  fallback_plan->free_lookups = true;

  return j > 0;
}

295 296 297 298 299 300
static arabic_fallback_plan_t *
arabic_fallback_plan_create (const hb_ot_shape_plan_t *plan,
			     hb_font_t *font)
{
  arabic_fallback_plan_t *fallback_plan = (arabic_fallback_plan_t *) calloc (1, sizeof (arabic_fallback_plan_t));
  if (unlikely (!fallback_plan))
301
    return const_cast<arabic_fallback_plan_t *> (&Null(arabic_fallback_plan_t));
302

303 304 305 306 307 308 309
  fallback_plan->num_lookups = 0;
  fallback_plan->free_lookups = false;

  /* Try synthesizing GSUB table using Unicode Arabic Presentation Forms,
   * in case the font has cmap entries for the presentation-forms characters. */
  if (arabic_fallback_plan_init_unicode (fallback_plan, plan, font))
    return fallback_plan;
310

311 312 313 314 315
  /* See if this looks like a Windows-1256-encoded font.  If it does, use a
   * hand-coded GSUB table. */
  if (arabic_fallback_plan_init_win1256 (fallback_plan, plan, font))
    return fallback_plan;

316
  assert (fallback_plan->num_lookups == 0);
317
  free (fallback_plan);
318
  return const_cast<arabic_fallback_plan_t *> (&Null(arabic_fallback_plan_t));
319 320 321 322 323
}

static void
arabic_fallback_plan_destroy (arabic_fallback_plan_t *fallback_plan)
{
324
  if (!fallback_plan || fallback_plan->num_lookups == 0)
325 326
    return;

327
  for (unsigned int i = 0; i < fallback_plan->num_lookups; i++)
328
    if (fallback_plan->lookup_array[i])
329
    {
330
      fallback_plan->accel_array[i].fini ();
331 332
      if (fallback_plan->free_lookups)
	free (fallback_plan->lookup_array[i]);
333
    }
334 335 336 337 338 339 340 341 342

  free (fallback_plan);
}

static void
arabic_fallback_plan_shape (arabic_fallback_plan_t *fallback_plan,
			    hb_font_t *font,
			    hb_buffer_t *buffer)
{
343
  OT::hb_ot_apply_context_t c (0, font, buffer);
344
  for (unsigned int i = 0; i < fallback_plan->num_lookups; i++)
345
    if (fallback_plan->lookup_array[i]) {
B
Behdad Esfahbod 已提交
346 347 348 349
      c.set_lookup_mask (fallback_plan->mask_array[i]);
      hb_ot_layout_substitute_lookup (&c,
				      *fallback_plan->lookup_array[i],
				      fallback_plan->accel_array[i]);
350 351 352 353 354
    }
}


#endif /* HB_OT_SHAPE_COMPLEX_ARABIC_FALLBACK_HH */