hb-ot-shape-complex-arabic.cc 9.4 KB
Newer Older
1
/*
B
Behdad Esfahbod 已提交
2
 * Copyright © 2010  Google, Inc.
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
 *
 *  This is part of HarfBuzz, a text shaping library.
 *
 * Permission is hereby granted, without written agreement and without
 * license or royalty fees, to use, copy, modify, and distribute this
 * software and its documentation for any purpose, provided that the
 * above copyright notice and the following two paragraphs appear in
 * all copies of this software.
 *
 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
 * DAMAGE.
 *
 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
 *
 * Google Author(s): Behdad Esfahbod
 */

B
Behdad Esfahbod 已提交
27
#include "hb-ot-shape-complex-private.hh"
28
#include "hb-ot-shape-private.hh"
29 30


31 32

/* buffer var allocations */
33
#define arabic_shaping_action() complex_var_temporary_u8() /* arabic shaping action */
34 35


36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
/*
 * Bits used in the joining tables
 */
enum {
  JOINING_TYPE_U		= 0,
  JOINING_TYPE_R		= 1,
  JOINING_TYPE_D		= 2,
  JOINING_TYPE_C		= JOINING_TYPE_D,
  JOINING_GROUP_ALAPH		= 3,
  JOINING_GROUP_DALATH_RISH	= 4,
  NUM_STATE_MACHINE_COLS	= 5,

  /* We deliberately don't have a JOINING_TYPE_L since that's unused in Unicode. */

  JOINING_TYPE_T = 6,
B
Behdad Esfahbod 已提交
51
  JOINING_TYPE_X = 7  /* means: use general-category to choose between U or T. */
52 53 54 55 56 57
};

/*
 * Joining types:
 */

58
#include "hb-ot-shape-complex-arabic-table.hh"
59

60
static unsigned int get_joining_type (hb_codepoint_t u, hb_unicode_general_category_t gen_cat)
61
{
B
Minor  
Behdad Esfahbod 已提交
62
  if (likely (hb_in_range<hb_codepoint_t> (u, JOINING_TABLE_FIRST, JOINING_TABLE_LAST))) {
63
    unsigned int j_type = joining_table[u - JOINING_TABLE_FIRST];
64 65 66 67
    if (likely (j_type != JOINING_TYPE_X))
      return j_type;
  }

68
  /* Mongolian joining data is not in ArabicJoining.txt yet */
B
Minor  
Behdad Esfahbod 已提交
69
  if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x1800, 0x18AF)))
70 71
  {
    /* All letters, SIBE SYLLABLE BOUNDARY MARKER, and NIRUGU are D */
72
    if (gen_cat == HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER || u == 0x1807 || u == 0x180A)
73 74 75
      return JOINING_TYPE_D;
  }

B
Minor  
Behdad Esfahbod 已提交
76
  if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x200C, 0x200D))) {
77 78 79
    return u == 0x200C ? JOINING_TYPE_U : JOINING_TYPE_C;
  }

B
Minor  
Behdad Esfahbod 已提交
80
  return (FLAG(gen_cat) & (FLAG(HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) | FLAG(HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) | FLAG(HB_UNICODE_GENERAL_CATEGORY_FORMAT))) ?
81 82 83
	 JOINING_TYPE_T : JOINING_TYPE_U;
}

B
Behdad Esfahbod 已提交
84 85 86 87 88 89
static hb_codepoint_t get_arabic_shape (hb_codepoint_t u, unsigned int shape)
{
  if (likely (hb_in_range<hb_codepoint_t> (u, SHAPING_TABLE_FIRST, SHAPING_TABLE_LAST)) && shape < 4)
    return shaping_table[u - SHAPING_TABLE_FIRST][shape];
  return u;
}
90

91 92 93 94 95 96 97 98 99 100
static uint16_t get_ligature (hb_codepoint_t first, hb_codepoint_t second)
{
  if (unlikely (!second)) return 0;
  for (unsigned i = 0; i < ARRAY_LENGTH (ligature_table); i++)
    if (ligature_table[i].first == first)
      for (unsigned j = 0; j < ARRAY_LENGTH (ligature_table[i].ligatures); j++)
	if (ligature_table[i].ligatures[j].second == second)
	  return ligature_table[i].ligatures[j].ligature;
  return 0;
}
101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137

static const hb_tag_t arabic_syriac_features[] =
{
  HB_TAG('i','n','i','t'),
  HB_TAG('m','e','d','i'),
  HB_TAG('f','i','n','a'),
  HB_TAG('i','s','o','l'),
  /* Syriac */
  HB_TAG('m','e','d','2'),
  HB_TAG('f','i','n','2'),
  HB_TAG('f','i','n','3'),
  HB_TAG_NONE
};


/* Same order as the feature array */
enum {
  INIT,
  MEDI,
  FINA,
  ISOL,

  /* Syriac */
  MED2,
  FIN2,
  FIN3,

  NONE,

  COMMON_NUM_FEATURES = 4,
  SYRIAC_NUM_FEATURES = 7,
  TOTAL_NUM_FEATURES = NONE
};

static const struct arabic_state_table_entry {
	uint8_t prev_action;
	uint8_t curr_action;
B
Behdad Esfahbod 已提交
138
	uint16_t next_state;
139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165
} arabic_state_table[][NUM_STATE_MACHINE_COLS] =
{
  /*   jt_U,          jt_R,          jt_D,          jg_ALAPH,      jg_DALATH_RISH */

  /* State 0: prev was U, not willing to join. */
  { {NONE,NONE,0}, {NONE,ISOL,1}, {NONE,ISOL,2}, {NONE,ISOL,1}, {NONE,ISOL,6}, },

  /* State 1: prev was R or ISOL/ALAPH, not willing to join. */
  { {NONE,NONE,0}, {NONE,ISOL,1}, {NONE,ISOL,2}, {NONE,FIN2,5}, {NONE,ISOL,6}, },

  /* State 2: prev was D/ISOL, willing to join. */
  { {NONE,NONE,0}, {INIT,FINA,1}, {INIT,FINA,3}, {INIT,FINA,4}, {INIT,FINA,6}, },

  /* State 3: prev was D/FINA, willing to join. */
  { {NONE,NONE,0}, {MEDI,FINA,1}, {MEDI,FINA,3}, {MEDI,FINA,4}, {MEDI,FINA,6}, },

  /* State 4: prev was FINA ALAPH, not willing to join. */
  { {NONE,NONE,0}, {MED2,ISOL,1}, {MED2,ISOL,2}, {MED2,FIN2,5}, {MED2,ISOL,6}, },

  /* State 5: prev was FIN2/FIN3 ALAPH, not willing to join. */
  { {NONE,NONE,0}, {ISOL,ISOL,1}, {ISOL,ISOL,2}, {ISOL,FIN2,5}, {ISOL,ISOL,6}, },

  /* State 6: prev was DALATH/RISH, not willing to join. */
  { {NONE,NONE,0}, {NONE,ISOL,1}, {NONE,ISOL,2}, {NONE,FIN3,5}, {NONE,ISOL,6}, }
};


166 167

void
168
_hb_ot_shape_complex_collect_features_arabic (hb_ot_map_builder_t *map, const hb_segment_properties_t  *props)
169
{
170 171 172 173 174 175 176 177 178 179
  /* For Language forms (in ArabicOT speak), we do the iso/fina/medi/init together,
   * then rlig and calt each in their own stage.  This makes IranNastaliq's ALLAH
   * ligature work correctly. It's unfortunate though...
   *
   * This also makes Arial Bold in Windows7 work.  See:
   * https://bugzilla.mozilla.org/show_bug.cgi?id=644184
   *
   * TODO: Add test cases for these two.
   */

180
  map->add_bool_feature (HB_TAG('c','c','m','p'));
181
  map->add_bool_feature (HB_TAG('l','o','c','l'));
182

183
  map->add_gsub_pause (NULL, NULL);
184

185 186
  unsigned int num_features = props->script == HB_SCRIPT_SYRIAC ? SYRIAC_NUM_FEATURES : COMMON_NUM_FEATURES;
  for (unsigned int i = 0; i < num_features; i++)
187
    map->add_bool_feature (arabic_syriac_features[i], false);
188

189
  map->add_gsub_pause (NULL, NULL);
190

191 192
  map->add_bool_feature (HB_TAG('r','l','i','g'));
  map->add_gsub_pause (NULL, NULL);
193

194 195
  map->add_bool_feature (HB_TAG('c','a','l','t'));
  map->add_gsub_pause (NULL, NULL);
196 197

  /* ArabicOT spec enables 'cswh' for Arabic where as for basic shaper it's disabled by default. */
198
  map->add_bool_feature (HB_TAG('c','s','w','h'));
199 200
}

B
Behdad Esfahbod 已提交
201 202
hb_ot_shape_normalization_mode_t
_hb_ot_shape_complex_normalization_preference_arabic (void)
203
{
B
Behdad Esfahbod 已提交
204
  return HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS;
205 206
}

B
Behdad Esfahbod 已提交
207 208

static void
209
arabic_fallback_shape (hb_font_t *font, hb_buffer_t *buffer)
B
Behdad Esfahbod 已提交
210 211
{
  unsigned int count = buffer->len;
212
  hb_codepoint_t glyph;
213 214

  /* Shape to presentation forms */
215 216 217 218 219 220
  for (unsigned int i = 0; i < count; i++) {
    hb_codepoint_t u = buffer->info[i].codepoint;
    hb_codepoint_t shaped = get_arabic_shape (u, buffer->info[i].arabic_shaping_action());
    if (shaped != u && hb_font_get_glyph (font, shaped, 0, &glyph))
      buffer->info[i].codepoint = shaped;
  }
221 222 223 224

  /* Mandatory ligatures */
  buffer->clear_output ();
  for (buffer->idx = 0; buffer->idx + 1 < count;) {
B
Minor  
Behdad Esfahbod 已提交
225 226
    hb_codepoint_t ligature = get_ligature (buffer->info[buffer->idx].codepoint,
					    buffer->info[buffer->idx + 1].codepoint);
227
    if (likely (!ligature) || !(hb_font_get_glyph (font, ligature, 0, &glyph))) {
228 229 230 231 232 233 234 235 236 237 238 239
      buffer->next_glyph ();
      continue;
    }

    buffer->replace_glyphs (2, 1, &ligature);

    /* Technically speaking we can skip marks and stuff, like the GSUB path does.
     * But who cares, we're in fallback! */
  }
  for (; buffer->idx < count;)
      buffer->next_glyph ();
  buffer->swap_buffers ();
B
Behdad Esfahbod 已提交
240 241
}

242
void
243
_hb_ot_shape_complex_setup_masks_arabic (hb_ot_map_t *map, hb_buffer_t *buffer, hb_font_t *font)
244
{
245
  unsigned int count = buffer->len;
246 247
  unsigned int prev = 0, state = 0;

248 249
  HB_BUFFER_ALLOCATE_VAR (buffer, arabic_shaping_action);

B
Behdad Esfahbod 已提交
250 251
  for (unsigned int i = 0; i < count; i++)
  {
252
    unsigned int this_type = get_joining_type (buffer->info[i].codepoint, _hb_glyph_info_get_general_category (&buffer->info[i]));
253

254
    if (unlikely (this_type == JOINING_TYPE_T)) {
255
      buffer->info[i].arabic_shaping_action() = NONE;
256
      continue;
257
    }
258

B
Behdad Esfahbod 已提交
259
    const arabic_state_table_entry *entry = &arabic_state_table[state][this_type];
260 261

    if (entry->prev_action != NONE)
262
      buffer->info[prev].arabic_shaping_action() = entry->prev_action;
263

264
    buffer->info[i].arabic_shaping_action() = entry->curr_action;
265 266 267 268 269

    prev = i;
    state = entry->next_state;
  }

B
Behdad Esfahbod 已提交
270
  hb_mask_t mask_array[TOTAL_NUM_FEATURES + 1] = {0};
B
Behdad Esfahbod 已提交
271
  hb_mask_t total_masks = 0;
272
  unsigned int num_masks = buffer->props.script == HB_SCRIPT_SYRIAC ? SYRIAC_NUM_FEATURES : COMMON_NUM_FEATURES;
B
Behdad Esfahbod 已提交
273
  for (unsigned int i = 0; i < num_masks; i++) {
274
    mask_array[i] = map->get_1_mask (arabic_syriac_features[i]);
B
Behdad Esfahbod 已提交
275 276
    total_masks |= mask_array[i];
  }
277

B
Behdad Esfahbod 已提交
278 279 280 281 282 283 284 285 286 287 288 289
  if (total_masks) {
    /* Has OpenType tables */
    for (unsigned int i = 0; i < count; i++)
      buffer->info[i].mask |= mask_array[buffer->info[i].arabic_shaping_action()];
  } else if (buffer->props.script == HB_SCRIPT_ARABIC) {
    /* Fallback Arabic shaping to Presentation Forms */
    /* Pitfalls:
     * - This path fires if user force-set init/medi/fina/isol off,
     * - If font does not declare script 'arab', well, what to do?
     *   Most probably it's safe to assume that init/medi/fina/isol
     *   still mean Arabic shaping, although they do not have to.
     */
290
    arabic_fallback_shape (font, buffer);
B
Behdad Esfahbod 已提交
291
  }
292 293

  HB_BUFFER_DEALLOCATE_VAR (buffer, arabic_shaping_action);
294 295 296
}