hb-ot-cmap-table.hh 31.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
/*
 * Copyright © 2014  Google, Inc.
 *
 *  This is part of HarfBuzz, a text shaping library.
 *
 * Permission is hereby granted, without written agreement and without
 * license or royalty fees, to use, copy, modify, and distribute this
 * software and its documentation for any purpose, provided that the
 * above copyright notice and the following two paragraphs appear in
 * all copies of this software.
 *
 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
 * DAMAGE.
 *
 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
 *
 * Google Author(s): Behdad Esfahbod
 */

#ifndef HB_OT_CMAP_TABLE_HH
#define HB_OT_CMAP_TABLE_HH

B
Behdad Esfahbod 已提交
30
#include "hb-open-type-private.hh"
31
#include "hb-set-private.hh"
32
#include "hb-subset-plan.hh"
33 34

/*
35 36
 * cmap -- Character to Glyph Index Mapping
 * https://docs.microsoft.com/en-us/typography/opentype/spec/cmap
37 38 39 40
 */
#define HB_OT_TAG_cmap HB_TAG('c','m','a','p')


41 42 43
namespace OT {


44 45 46 47 48 49 50 51 52 53 54
struct CmapSubtableFormat0
{
  inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
  {
    hb_codepoint_t gid = codepoint < 256 ? glyphIdArray[codepoint] : 0;
    if (!gid)
      return false;
    *glyph = gid;
    return true;
  }

B
Behdad Esfahbod 已提交
55 56
  inline bool sanitize (hb_sanitize_context_t *c) const
  {
57
    TRACE_SANITIZE (this);
B
Behdad Esfahbod 已提交
58
    return_trace (c->check_struct (this));
59 60 61
  }

  protected:
B
Behdad Esfahbod 已提交
62
  HBUINT16	format;		/* Format number is set to 0. */
63 64
  HBUINT16	length;		/* Byte length of this subtable. */
  HBUINT16	language;	/* Ignore. */
B
Minor  
Behdad Esfahbod 已提交
65
  HBUINT8	glyphIdArray[256];/* An array that maps character
66 67 68 69 70
				 * code to glyph index values. */
  public:
  DEFINE_SIZE_STATIC (6 + 256);
};

71 72
struct CmapSubtableFormat4
{
73 74 75 76 77 78 79 80 81 82 83
  struct segment_plan
  {
    HBUINT16 start_code;
    HBUINT16 end_code;
    bool use_delta;
  };

  bool serialize (hb_serialize_context_t *c,
                  const hb_subset_plan_t *plan,
                  const hb_vector_t<segment_plan> &segments)
  {
84 85 86 87 88 89 90 91
    TRACE_SERIALIZE (this);

    if (unlikely (!c->extend_min (*this))) return_trace (false);

    this->format.set (4);
    this->length.set (get_sub_table_size (segments));

    this->segCountX2.set (segments.len * 2);
92
    this->entrySelector.set (MAX (1u, hb_bit_storage (segments.len)) - 1);
93 94 95 96
    this->searchRange.set (2 * (1u << this->entrySelector));
    this->rangeShift.set (segments.len * 2 > this->searchRange
                          ? 2 * segments.len - this->searchRange
                          : 0);
97 98 99 100 101 102 103

    HBUINT16 *end_count = c->allocate_size<HBUINT16> (HBUINT16::static_size * segments.len);
    c->allocate_size<HBUINT16> (HBUINT16::static_size); // 2 bytes of padding.
    HBUINT16 *start_count = c->allocate_size<HBUINT16> (HBUINT16::static_size * segments.len);
    HBINT16 *id_delta = c->allocate_size<HBINT16> (HBUINT16::static_size * segments.len);
    HBUINT16 *id_range_offset = c->allocate_size<HBUINT16> (HBUINT16::static_size * segments.len);

104 105 106
    if (id_range_offset == nullptr)
      return_trace (false);

107 108 109 110 111 112
    for (unsigned int i = 0; i < segments.len; i++)
    {
      end_count[i].set (segments[i].end_code);
      start_count[i].set (segments[i].start_code);
      if (segments[i].use_delta)
      {
113 114
        hb_codepoint_t cp = segments[i].start_code;
        hb_codepoint_t start_gid = 0;
115
        if (unlikely (!plan->new_gid_for_codepoint (cp, &start_gid) && cp != 0xFFFF))
116
          return_trace (false);
117 118
        id_delta[i].set (start_gid - segments[i].start_code);
      } else {
119 120 121
        id_delta[i].set (0);
        unsigned int num_codepoints = segments[i].end_code - segments[i].start_code + 1;
        HBUINT16 *glyph_id_array = c->allocate_size<HBUINT16> (HBUINT16::static_size * num_codepoints);
122 123
        if (glyph_id_array == nullptr)
          return_trace (false);
124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
        // From the cmap spec:
        //
        // id_range_offset[i]/2
        // + (cp - segments[i].start_code)
        // + (id_range_offset + i)
        // =
        // glyph_id_array + (cp - segments[i].start_code)
        //
        // So, solve for id_range_offset[i]:
        //
        // id_range_offset[i]
        // =
        // 2 * (glyph_id_array - id_range_offset - i)
        id_range_offset[i].set (2 * (
            glyph_id_array - id_range_offset - i));
        for (unsigned int j = 0; j < num_codepoints; j++)
        {
          hb_codepoint_t cp = segments[i].start_code + j;
142
          hb_codepoint_t new_gid;
143
          if (unlikely (!plan->new_gid_for_codepoint (cp, &new_gid)))
144
            return_trace (false);
145 146
          glyph_id_array[j].set (new_gid);
        }
147 148 149
      }
    }

150
    return_trace (true);
151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177
  }

  static inline size_t get_sub_table_size (const hb_vector_t<segment_plan> &segments)
  {
    size_t segment_size = 0;
    for (unsigned int i = 0; i < segments.len; i++)
    {
      // Parallel array entries
      segment_size +=
            2  // end count
          + 2  // start count
          + 2  // delta
          + 2; // range offset

      if (!segments[i].use_delta)
        // Add bytes for the glyph index array entries for this segment.
        segment_size += (segments[i].end_code - segments[i].start_code + 1) * 2;
    }

    return min_size
        + 2 // Padding
        + segment_size;
  }

  static inline bool create_sub_table_plan (const hb_subset_plan_t *plan,
                                            hb_vector_t<segment_plan> *segments)
  {
178 179
    segment_plan *segment = nullptr;
    hb_codepoint_t last_gid = 0;
180 181 182

    hb_codepoint_t cp = HB_SET_VALUE_INVALID;
    while (plan->unicodes->next (&cp)) {
183
      hb_codepoint_t new_gid;
184
      if (unlikely (!plan->new_gid_for_codepoint (cp, &new_gid)))
185 186 187 188 189 190 191 192 193 194 195
      {
	DEBUG_MSG(SUBSET, nullptr, "Unable to find new gid for %04x", cp);
	return false;
      }

      if (cp > 0xFFFF) {
        // We are now outside of unicode BMP, stop adding to this cmap.
        break;
      }

      if (!segment
B
Behdad Esfahbod 已提交
196
          || cp != segment->end_code + 1u)
197 198 199 200 201 202 203
      {
        segment = segments->push ();
        segment->start_code.set (cp);
        segment->end_code.set (cp);
        segment->use_delta = true;
      } else {
        segment->end_code.set (cp);
B
Behdad Esfahbod 已提交
204
        if (last_gid + 1u != new_gid)
205 206 207 208 209 210 211
          // gid's are not consecutive in this segment so delta
          // cannot be used.
          segment->use_delta = false;
      }

      last_gid = new_gid;
    }
212 213

    // There must be a final entry with end_code == 0xFFFF. Check if we need to add one.
214 215
    if (segment == nullptr || segment->end_code != 0xFFFF)
    {
216 217 218 219 220 221
      segment = segments->push ();
      segment->start_code.set (0xFFFF);
      segment->end_code.set (0xFFFF);
      segment->use_delta = true;
    }

222
    return true;
223 224
  }

225
  struct accelerator_t
226
  {
227 228 229 230 231 232 233 234 235 236
    inline void init (const CmapSubtableFormat4 *subtable)
    {
      segCount = subtable->segCountX2 / 2;
      endCount = subtable->values;
      startCount = endCount + segCount + 1;
      idDelta = startCount + segCount;
      idRangeOffset = idDelta + segCount;
      glyphIdArray = idRangeOffset + segCount;
      glyphIdArrayLength = (subtable->length - 16 - 8 * segCount) / 2;
    }
237

238
    static inline bool get_glyph_func (const void *obj, hb_codepoint_t codepoint, hb_codepoint_t *glyph)
239
    {
240 241 242 243
      const accelerator_t *thiz = (const accelerator_t *) obj;

      /* Custom two-array bsearch. */
      int min = 0, max = (int) thiz->segCount - 1;
B
Behdad Esfahbod 已提交
244 245
      const HBUINT16 *startCount = thiz->startCount;
      const HBUINT16 *endCount = thiz->endCount;
246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266
      unsigned int i;
      while (min <= max)
      {
	int mid = (min + max) / 2;
	if (codepoint < startCount[mid])
	  max = mid - 1;
	else if (codepoint > endCount[mid])
	  min = mid + 1;
	else
	{
	  i = mid;
	  goto found;
	}
      }
      return false;

    found:
      hb_codepoint_t gid;
      unsigned int rangeOffset = thiz->idRangeOffset[i];
      if (rangeOffset == 0)
	gid = codepoint + thiz->idDelta[i];
267 268
      else
      {
269 270 271 272 273 274 275 276
	/* Somebody has been smoking... */
	unsigned int index = rangeOffset / 2 + (codepoint - thiz->startCount[i]) + i - thiz->segCount;
	if (unlikely (index >= thiz->glyphIdArrayLength))
	  return false;
	gid = thiz->glyphIdArray[index];
	if (unlikely (!gid))
	  return false;
	gid += thiz->idDelta[i];
277
      }
278 279 280

      *glyph = gid & 0xFFFFu;
      return true;
281 282
    }

283 284 285
    static inline void get_all_codepoints_func (const void *obj, hb_set_t *out)
    {
      const accelerator_t *thiz = (const accelerator_t *) obj;
286 287 288 289 290 291
      for (unsigned int i = 0; i < thiz->segCount; i++)
      {
	if (thiz->startCount[i] != 0xFFFFu
	    || thiz->endCount[i] != 0xFFFFu) // Skip the last segment (0xFFFF)
	  hb_set_add_range (out, thiz->startCount[i], thiz->endCount[i]);
      }
292 293
    }

B
Behdad Esfahbod 已提交
294 295 296 297 298
    const HBUINT16 *endCount;
    const HBUINT16 *startCount;
    const HBUINT16 *idDelta;
    const HBUINT16 *idRangeOffset;
    const HBUINT16 *glyphIdArray;
299 300 301 302 303 304 305 306 307
    unsigned int segCount;
    unsigned int glyphIdArrayLength;
  };

  inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
  {
    accelerator_t accel;
    accel.init (this);
    return accel.get_glyph_func (&accel, codepoint, glyph);
308 309
  }

B
Behdad Esfahbod 已提交
310
  inline bool sanitize (hb_sanitize_context_t *c) const
311
  {
312
    TRACE_SANITIZE (this);
313
    if (unlikely (!c->check_struct (this)))
B
Behdad Esfahbod 已提交
314
      return_trace (false);
315 316 317 318 319 320 321 322 323 324

    if (unlikely (!c->check_range (this, length)))
    {
      /* Some broken fonts have too long of a "length" value.
       * If that is the case, just change the value to truncate
       * the subtable at the end of the blob. */
      uint16_t new_length = (uint16_t) MIN ((uintptr_t) 65535,
					    (uintptr_t) (c->end -
							 (char *) this));
      if (!c->try_set (&length, new_length))
B
Behdad Esfahbod 已提交
325
	return_trace (false);
326 327
    }

B
Behdad Esfahbod 已提交
328
    return_trace (16 + 4 * (unsigned int) segCountX2 <= length);
329 330
  }

331 332


333
  protected:
B
Behdad Esfahbod 已提交
334 335
  HBUINT16	format;		/* Format number is set to 4. */
  HBUINT16	length;		/* This is the length in bytes of the
336
				 * subtable. */
337
  HBUINT16	language;	/* Ignore. */
B
Behdad Esfahbod 已提交
338
  HBUINT16	segCountX2;	/* 2 x segCount. */
339 340 341
  HBUINT16	searchRange;	/* 2 * (2**floor(log2(segCount))) */
  HBUINT16	entrySelector;	/* log2(searchRange/2) */
  HBUINT16	rangeShift;	/* 2 x segCount - searchRange */
342

B
Behdad Esfahbod 已提交
343
  HBUINT16	values[VAR];
344
#if 0
B
Behdad Esfahbod 已提交
345
  HBUINT16	endCount[segCount];	/* End characterCode for each segment,
346
					 * last=0xFFFFu. */
B
Behdad Esfahbod 已提交
347 348 349 350 351
  HBUINT16	reservedPad;		/* Set to 0. */
  HBUINT16	startCount[segCount];	/* Start character code for each segment. */
  HBINT16		idDelta[segCount];	/* Delta for all character codes in segment. */
  HBUINT16	idRangeOffset[segCount];/* Offsets into glyphIdArray or 0 */
  HBUINT16	glyphIdArray[VAR];	/* Glyph index array (arbitrary length) */
352 353 354 355 356 357
#endif

  public:
  DEFINE_SIZE_ARRAY (14, values);
};

358
struct CmapSubtableLongGroup
359 360
{
  friend struct CmapSubtableFormat12;
361
  friend struct CmapSubtableFormat13;
362 363
  template<typename U>
  friend struct CmapSubtableLongSegmented;
364
  friend struct cmap;
365 366 367 368 369 370 371 372

  int cmp (hb_codepoint_t codepoint) const
  {
    if (codepoint < startCharCode) return -1;
    if (codepoint > endCharCode)   return +1;
    return 0;
  }

B
Behdad Esfahbod 已提交
373 374
  inline bool sanitize (hb_sanitize_context_t *c) const
  {
375
    TRACE_SANITIZE (this);
B
Behdad Esfahbod 已提交
376
    return_trace (c->check_struct (this));
377 378 379
  }

  private:
B
Behdad Esfahbod 已提交
380 381 382
  HBUINT32		startCharCode;	/* First character code in this group. */
  HBUINT32		endCharCode;	/* Last character code in this group. */
  HBUINT32		glyphID;	/* Glyph index; interpretation depends on
383
				 * subtable format. */
384 385 386 387
  public:
  DEFINE_SIZE_STATIC (12);
};

388 389
template <typename UINT>
struct CmapSubtableTrimmed
390 391 392 393 394 395 396 397 398 399 400
{
  inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
  {
    /* Rely on our implicit array bound-checking. */
    hb_codepoint_t gid = glyphIdArray[codepoint - startCharCode];
    if (!gid)
      return false;
    *glyph = gid;
    return true;
  }

B
Behdad Esfahbod 已提交
401 402
  inline bool sanitize (hb_sanitize_context_t *c) const
  {
403
    TRACE_SANITIZE (this);
B
Behdad Esfahbod 已提交
404
    return_trace (c->check_struct (this) && glyphIdArray.sanitize (c));
405 406 407
  }

  protected:
408
  UINT		formatReserved;	/* Subtable format and (maybe) padding. */
409 410
  UINT		length;		/* Byte length of this subtable. */
  UINT		language;	/* Ignore. */
411
  UINT		startCharCode;	/* First character code covered. */
412
  ArrayOf<GlyphID, UINT>
413 414 415
		glyphIdArray;	/* Array of glyph index values for character
				 * codes in the range. */
  public:
416
  DEFINE_SIZE_ARRAY (5 * sizeof (UINT), glyphIdArray);
417 418
};

B
Behdad Esfahbod 已提交
419 420
struct CmapSubtableFormat6  : CmapSubtableTrimmed<HBUINT16> {};
struct CmapSubtableFormat10 : CmapSubtableTrimmed<HBUINT32 > {};
421

422 423
template <typename T>
struct CmapSubtableLongSegmented
424
{
R
Rod Sheeter 已提交
425 426
  friend struct cmap;

427 428
  inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
  {
429
    int i = groups.bsearch (codepoint);
430 431
    if (i == -1)
      return false;
432
    *glyph = T::group_get_glyph (groups[i], codepoint);
433 434 435
    return true;
  }

436 437 438 439 440 441 442 443 444
  inline void get_all_codepoints (hb_set_t *out) const
  {
    for (unsigned int i = 0; i < this->groups.len; i++) {
      hb_set_add_range (out,
			this->groups[i].startCharCode,
			this->groups[i].endCharCode);
    }
  }

B
Behdad Esfahbod 已提交
445 446
  inline bool sanitize (hb_sanitize_context_t *c) const
  {
447
    TRACE_SANITIZE (this);
B
Behdad Esfahbod 已提交
448
    return_trace (c->check_struct (this) && groups.sanitize (c));
449 450
  }

B
Behdad Esfahbod 已提交
451
  inline bool serialize (hb_serialize_context_t *c,
452
                         const hb_vector_t<CmapSubtableLongGroup> &group_data)
R
Rod Sheeter 已提交
453 454
  {
    TRACE_SERIALIZE (this);
B
Behdad Esfahbod 已提交
455
    if (unlikely (!c->extend_min (*this))) return_trace (false);
456
    Supplier<CmapSubtableLongGroup> supplier (group_data.arrayZ, group_data.len);
B
Behdad Esfahbod 已提交
457
    if (unlikely (!groups.serialize (c, supplier, group_data.len))) return_trace (false);
R
Rod Sheeter 已提交
458 459 460
    return true;
  }

461
  protected:
B
Behdad Esfahbod 已提交
462
  HBUINT16	format;		/* Subtable format; set to 12. */
463 464 465
  HBUINT16	reserved;	/* Reserved; set to 0. */
  HBUINT32	length;		/* Byte length of this subtable. */
  HBUINT32	language;	/* Ignore. */
B
Behdad Esfahbod 已提交
466
  SortedArrayOf<CmapSubtableLongGroup, HBUINT32>
467 468 469 470 471
		groups;		/* Groupings. */
  public:
  DEFINE_SIZE_ARRAY (16, groups);
};

472
struct CmapSubtableFormat12 : CmapSubtableLongSegmented<CmapSubtableFormat12>
473
{
474 475 476
  static inline hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group,
						hb_codepoint_t u)
  { return group.glyphID + (u - group.startCharCode); }
477 478 479 480 481 482 483 484


  bool serialize (hb_serialize_context_t *c,
                  const hb_vector_t<CmapSubtableLongGroup> &groups)
  {
    if (unlikely (!c->extend_min (*this))) return false;

    this->format.set (12);
485 486
    this->reserved.set (0);
    this->length.set (get_sub_table_size (groups));
487

488
    return CmapSubtableLongSegmented<CmapSubtableFormat12>::serialize (c, groups);
489 490 491 492 493 494 495 496 497 498 499 500
  }

  static inline size_t get_sub_table_size (const hb_vector_t<CmapSubtableLongGroup> &groups)
  {
    return 16 + 12 * groups.len;
  }

  static inline bool create_sub_table_plan (const hb_subset_plan_t *plan,
                                            hb_vector_t<CmapSubtableLongGroup> *groups)
  {
    CmapSubtableLongGroup *group = nullptr;

501 502
    hb_codepoint_t cp = HB_SET_VALUE_INVALID;
    while (plan->unicodes->next (&cp)) {
503
      hb_codepoint_t new_gid;
504
      if (unlikely (!plan->new_gid_for_codepoint (cp, &new_gid)))
505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539
      {
	DEBUG_MSG(SUBSET, nullptr, "Unable to find new gid for %04x", cp);
	return false;
      }

      if (!group || !_is_gid_consecutive (group, cp, new_gid))
      {
        group = groups->push ();
        group->startCharCode.set (cp);
        group->endCharCode.set (cp);
        group->glyphID.set (new_gid);
      } else
      {
        group->endCharCode.set (cp);
      }
    }

    DEBUG_MSG(SUBSET, nullptr, "cmap");
    for (unsigned int i = 0; i < groups->len; i++) {
      CmapSubtableLongGroup& group = (*groups)[i];
      DEBUG_MSG(SUBSET, nullptr, "  %d: U+%04X-U+%04X, gid %d-%d", i, (uint32_t) group.startCharCode, (uint32_t) group.endCharCode, (uint32_t) group.glyphID, (uint32_t) group.glyphID + ((uint32_t) group.endCharCode - (uint32_t) group.startCharCode));
    }

    return true;
  }

 private:
  static inline bool _is_gid_consecutive (CmapSubtableLongGroup *group,
					  hb_codepoint_t cp,
					  hb_codepoint_t new_gid)
  {
    return (cp - 1 == group->endCharCode) &&
	new_gid == group->glyphID + (cp - group->startCharCode);
  }

540
};
541

542 543 544 545 546
struct CmapSubtableFormat13 : CmapSubtableLongSegmented<CmapSubtableFormat13>
{
  static inline hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group,
						hb_codepoint_t u HB_UNUSED)
  { return group.glyphID; }
547 548
};

549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564
typedef enum
{
  GLYPH_VARIANT_NOT_FOUND = 0,
  GLYPH_VARIANT_FOUND = 1,
  GLYPH_VARIANT_USE_DEFAULT = 2
} glyph_variant_t;

struct UnicodeValueRange
{
  inline int cmp (const hb_codepoint_t &codepoint) const
  {
    if (codepoint < startUnicodeValue) return -1;
    if (codepoint > startUnicodeValue + additionalCount) return +1;
    return 0;
  }

B
Behdad Esfahbod 已提交
565 566
  inline bool sanitize (hb_sanitize_context_t *c) const
  {
567
    TRACE_SANITIZE (this);
B
Behdad Esfahbod 已提交
568
    return_trace (c->check_struct (this));
569 570
  }

571
  HBUINT24	startUnicodeValue;	/* First value in this range. */
B
Behdad Esfahbod 已提交
572
  HBUINT8		additionalCount;	/* Number of additional values in this
573 574 575 576 577
					 * range. */
  public:
  DEFINE_SIZE_STATIC (4);
};

B
Behdad Esfahbod 已提交
578
typedef SortedArrayOf<UnicodeValueRange, HBUINT32> DefaultUVS;
579 580 581 582 583 584 585 586

struct UVSMapping
{
  inline int cmp (const hb_codepoint_t &codepoint) const
  {
    return unicodeValue.cmp (codepoint);
  }

B
Behdad Esfahbod 已提交
587 588
  inline bool sanitize (hb_sanitize_context_t *c) const
  {
589
    TRACE_SANITIZE (this);
B
Behdad Esfahbod 已提交
590
    return_trace (c->check_struct (this));
591 592
  }

593
  HBUINT24	unicodeValue;	/* Base Unicode value of the UVS */
594 595 596 597 598
  GlyphID	glyphID;	/* Glyph ID of the UVS */
  public:
  DEFINE_SIZE_STATIC (5);
};

B
Behdad Esfahbod 已提交
599
typedef SortedArrayOf<UVSMapping, HBUINT32> NonDefaultUVS;
600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626

struct VariationSelectorRecord
{
  inline glyph_variant_t get_glyph (hb_codepoint_t codepoint,
				    hb_codepoint_t *glyph,
				    const void *base) const
  {
    int i;
    const DefaultUVS &defaults = base+defaultUVS;
    i = defaults.bsearch (codepoint);
    if (i != -1)
      return GLYPH_VARIANT_USE_DEFAULT;
    const NonDefaultUVS &nonDefaults = base+nonDefaultUVS;
    i = nonDefaults.bsearch (codepoint);
    if (i != -1)
    {
      *glyph = nonDefaults[i].glyphID;
       return GLYPH_VARIANT_FOUND;
    }
    return GLYPH_VARIANT_NOT_FOUND;
  }

  inline int cmp (const hb_codepoint_t &variation_selector) const
  {
    return varSelector.cmp (variation_selector);
  }

B
Behdad Esfahbod 已提交
627 628
  inline bool sanitize (hb_sanitize_context_t *c, const void *base) const
  {
629
    TRACE_SANITIZE (this);
B
Behdad Esfahbod 已提交
630 631 632
    return_trace (c->check_struct (this) &&
		  defaultUVS.sanitize (c, base) &&
		  nonDefaultUVS.sanitize (c, base));
633 634
  }

635
  HBUINT24	varSelector;	/* Variation selector. */
B
Behdad Esfahbod 已提交
636
  LOffsetTo<DefaultUVS>
637
		defaultUVS;	/* Offset to Default UVS Table. May be 0. */
B
Behdad Esfahbod 已提交
638
  LOffsetTo<NonDefaultUVS>
639 640 641 642 643 644 645 646 647 648 649 650 651 652
		nonDefaultUVS;	/* Offset to Non-Default UVS Table. May be 0. */
  public:
  DEFINE_SIZE_STATIC (11);
};

struct CmapSubtableFormat14
{
  inline glyph_variant_t get_glyph_variant (hb_codepoint_t codepoint,
					    hb_codepoint_t variation_selector,
					    hb_codepoint_t *glyph) const
  {
    return record[record.bsearch(variation_selector)].get_glyph (codepoint, glyph, this);
  }

B
Behdad Esfahbod 已提交
653 654
  inline bool sanitize (hb_sanitize_context_t *c) const
  {
655
    TRACE_SANITIZE (this);
B
Behdad Esfahbod 已提交
656 657
    return_trace (c->check_struct (this) &&
		  record.sanitize (c, this));
658 659 660
  }

  protected:
B
Behdad Esfahbod 已提交
661
  HBUINT16	format;		/* Format number is set to 14. */
662
  HBUINT32	length;		/* Byte length of this subtable. */
B
Behdad Esfahbod 已提交
663
  SortedArrayOf<VariationSelectorRecord, HBUINT32>
664 665 666 667 668 669
		record;		/* Variation selector records; sorted
				 * in increasing order of `varSelector'. */
  public:
  DEFINE_SIZE_ARRAY (10, record);
};

670 671
struct CmapSubtable
{
672 673
  /* Note: We intentionally do NOT implement subtable formats 2 and 8. */

674 675
  inline bool get_glyph (hb_codepoint_t codepoint,
			 hb_codepoint_t *glyph) const
676 677
  {
    switch (u.format) {
R
Rod Sheeter 已提交
678 679 680 681 682 683
    case  0: return u.format0 .get_glyph (codepoint, glyph);
    case  4: return u.format4 .get_glyph (codepoint, glyph);
    case  6: return u.format6 .get_glyph (codepoint, glyph);
    case 10: return u.format10.get_glyph (codepoint, glyph);
    case 12: return u.format12.get_glyph (codepoint, glyph);
    case 13: return u.format13.get_glyph (codepoint, glyph);
684 685 686 687 688
    case 14:
    default: return false;
    }
  }

B
Behdad Esfahbod 已提交
689 690
  inline bool sanitize (hb_sanitize_context_t *c) const
  {
691
    TRACE_SANITIZE (this);
B
Behdad Esfahbod 已提交
692
    if (!u.format.sanitize (c)) return_trace (false);
693
    switch (u.format) {
B
Behdad Esfahbod 已提交
694 695 696 697 698 699 700 701
    case  0: return_trace (u.format0 .sanitize (c));
    case  4: return_trace (u.format4 .sanitize (c));
    case  6: return_trace (u.format6 .sanitize (c));
    case 10: return_trace (u.format10.sanitize (c));
    case 12: return_trace (u.format12.sanitize (c));
    case 13: return_trace (u.format13.sanitize (c));
    case 14: return_trace (u.format14.sanitize (c));
    default:return_trace (true);
702 703 704
    }
  }

705
  public:
706
  union {
B
Behdad Esfahbod 已提交
707
  HBUINT16		format;		/* Format identifier */
708
  CmapSubtableFormat0	format0;
709
  CmapSubtableFormat4	format4;
710 711
  CmapSubtableFormat6	format6;
  CmapSubtableFormat10	format10;
712
  CmapSubtableFormat12	format12;
713
  CmapSubtableFormat13	format13;
714
  CmapSubtableFormat14	format14;
715 716 717 718 719 720 721 722
  } u;
  public:
  DEFINE_SIZE_UNION (2, format);
};


struct EncodingRecord
{
723
  inline int cmp (const EncodingRecord &other) const
724 725
  {
    int ret;
726
    ret = platformID.cmp (other.platformID);
727
    if (ret) return ret;
728
    ret = encodingID.cmp (other.encodingID);
729 730 731 732
    if (ret) return ret;
    return 0;
  }

B
Behdad Esfahbod 已提交
733 734
  inline bool sanitize (hb_sanitize_context_t *c, const void *base) const
  {
735
    TRACE_SANITIZE (this);
B
Behdad Esfahbod 已提交
736 737
    return_trace (c->check_struct (this) &&
		  subtable.sanitize (c, base));
738 739
  }

B
Behdad Esfahbod 已提交
740 741
  HBUINT16	platformID;	/* Platform ID. */
  HBUINT16	encodingID;	/* Platform-specific encoding ID. */
B
Behdad Esfahbod 已提交
742
  LOffsetTo<CmapSubtable>
743 744 745 746 747 748 749 750 751
		subtable;	/* Byte offset from beginning of table to the subtable for this encoding. */
  public:
  DEFINE_SIZE_STATIC (8);
};

struct cmap
{
  static const hb_tag_t tableTag	= HB_OT_TAG_cmap;

752 753 754
  struct subset_plan {
    subset_plan(void)
    {
755 756
      format4_segments.init();
      format12_groups.init();
757 758 759 760
    }

    ~subset_plan(void)
    {
761 762
      format4_segments.fini();
      format12_groups.fini();
763 764 765 766
    }

    inline size_t final_size() const
    {
767
      return 4 // header
768
          +  8 * 3 // 3 EncodingRecord
769 770
          +  CmapSubtableFormat4::get_sub_table_size (this->format4_segments)
          +  CmapSubtableFormat12::get_sub_table_size (this->format12_groups);
771 772
    }

773 774
    // Format 4
    hb_vector_t<CmapSubtableFormat4::segment_plan> format4_segments;
775
    // Format 12
776
    hb_vector_t<CmapSubtableLongGroup> format12_groups;
777 778
  };

779 780 781 782 783 784 785 786
  inline bool sanitize (hb_sanitize_context_t *c) const
  {
    TRACE_SANITIZE (this);
    return_trace (c->check_struct (this) &&
		  likely (version == 0) &&
		  encodingRecord.sanitize (c, this));
  }

787 788
  inline bool _create_plan (const hb_subset_plan_t *plan,
                            subset_plan *cmap_plan) const
789
  {
790 791
    if (unlikely( !CmapSubtableFormat4::create_sub_table_plan (plan, &cmap_plan->format4_segments)))
      return false;
792

793
    return CmapSubtableFormat12::create_sub_table_plan (plan, &cmap_plan->format12_groups);
R
Rod Sheeter 已提交
794 795
  }

796 797
  inline bool _subset (const hb_subset_plan_t *plan,
                       const subset_plan &cmap_subset_plan,
B
Behdad Esfahbod 已提交
798 799
		       size_t dest_sz,
		       void *dest) const
R
Rod Sheeter 已提交
800
  {
B
Behdad Esfahbod 已提交
801
    hb_serialize_context_t c (dest, dest_sz);
R
Rod Sheeter 已提交
802

B
Behdad Esfahbod 已提交
803 804
    OT::cmap *cmap = c.start_serialize<OT::cmap> ();
    if (unlikely (!c.extend_min (*cmap)))
R
Rod Sheeter 已提交
805 806 807 808
    {
      return false;
    }

R
Rod Sheeter 已提交
809
    cmap->version.set (0);
R
Rod Sheeter 已提交
810

811
    if (unlikely (!cmap->encodingRecord.serialize (&c, /* numTables */ 3)))
812
      return false;
813 814

    // TODO(grieger): Convert the below to a for loop
R
Rod Sheeter 已提交
815

816 817 818 819
    // Format 4, Plat 0 Encoding Record
    EncodingRecord &format4_plat0_rec = cmap->encodingRecord[0];
    format4_plat0_rec.platformID.set (0); // Unicode
    format4_plat0_rec.encodingID.set (3);
R
Rod Sheeter 已提交
820

821 822 823 824
    // Format 4, Plat 3 Encoding Record
    EncodingRecord &format4_plat3_rec = cmap->encodingRecord[1];
    format4_plat3_rec.platformID.set (3); // Windows
    format4_plat3_rec.encodingID.set (1); // Unicode BMP
R
Rod Sheeter 已提交
825

826
    // Format 12 Encoding Record
827
    EncodingRecord &format12_rec = cmap->encodingRecord[2];
828 829
    format12_rec.platformID.set (3); // Windows
    format12_rec.encodingID.set (10); // Unicode UCS-4
R
Rod Sheeter 已提交
830

831
    // Write out format 4 sub table
832
    {
833 834
      CmapSubtable &subtable = format4_plat0_rec.subtable.serialize (&c, cmap);
      format4_plat3_rec.subtable.set (format4_plat0_rec.subtable);
835 836 837
      subtable.u.format.set (4);

      CmapSubtableFormat4 &format4 = subtable.u.format4;
838 839
      if (unlikely (!format4.serialize (&c, plan, cmap_subset_plan.format4_segments)))
        return false;
840
    }
R
Rod Sheeter 已提交
841

842
    // Write out format 12 sub table.
843 844 845
    {
      CmapSubtable &subtable = format12_rec.subtable.serialize (&c, cmap);
      subtable.u.format.set (12);
R
Rod Sheeter 已提交
846

847
      CmapSubtableFormat12 &format12 = subtable.u.format12;
848 849
      if (unlikely (!format12.serialize (&c, cmap_subset_plan.format12_groups)))
        return false;
850
    }
R
Rod Sheeter 已提交
851

B
Behdad Esfahbod 已提交
852
    c.end_serialize ();
R
Rod Sheeter 已提交
853

854 855 856
    return true;
  }

857
  inline bool subset (hb_subset_plan_t *plan) const
R
Rod Sheeter 已提交
858
  {
859
    subset_plan cmap_subset_plan;
R
Rod Sheeter 已提交
860

861 862 863 864 865
    if (unlikely (!_create_plan (plan, &cmap_subset_plan)))
    {
      DEBUG_MSG(SUBSET, nullptr, "Failed to generate a cmap subsetting plan.");
      return false;
    }
R
Rod Sheeter 已提交
866 867

    // We now know how big our blob needs to be
868
    size_t dest_sz = cmap_subset_plan.final_size();
869
    void *dest = malloc (dest_sz);
R
Rod Sheeter 已提交
870
    if (unlikely (!dest)) {
B
Behdad Esfahbod 已提交
871
      DEBUG_MSG(SUBSET, nullptr, "Unable to alloc %lu for cmap subset output", (unsigned long) dest_sz);
R
Rod Sheeter 已提交
872
      return false;
R
Rod Sheeter 已提交
873 874
    }

875
    if (unlikely (!_subset (plan, cmap_subset_plan, dest_sz, dest)))
R
Rod Sheeter 已提交
876
    {
877
      DEBUG_MSG(SUBSET, nullptr, "Failed to perform subsetting of cmap.");
R
Rod Sheeter 已提交
878
      free (dest);
R
Rod Sheeter 已提交
879
      return false;
R
Rod Sheeter 已提交
880 881 882
    }

    // all done, write the blob into dest
R
Rod Sheeter 已提交
883 884 885
    hb_blob_t *cmap_prime = hb_blob_create ((const char *)dest,
                                            dest_sz,
                                            HB_MEMORY_MODE_READONLY,
886
                                            dest,
R
Rod Sheeter 已提交
887
                                            free);
888
    bool result =  plan->add_table (HB_OT_TAG_cmap, cmap_prime);
889 890
    hb_blob_destroy (cmap_prime);
    return result;
R
Rod Sheeter 已提交
891 892
  }

893 894 895 896
  struct accelerator_t
  {
    inline void init (hb_face_t *face)
    {
B
Behdad Esfahbod 已提交
897
      this->blob = OT::hb_sanitize_context_t().reference_table<OT::cmap> (face);
898
      const OT::cmap *cmap = this->blob->as<OT::cmap> ();
899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918
      const OT::CmapSubtable *subtable = nullptr;
      const OT::CmapSubtableFormat14 *subtable_uvs = nullptr;

      bool symbol = false;
      /* 32-bit subtables. */
      if (!subtable) subtable = cmap->find_subtable (3, 10);
      if (!subtable) subtable = cmap->find_subtable (0, 6);
      if (!subtable) subtable = cmap->find_subtable (0, 4);
      /* 16-bit subtables. */
      if (!subtable) subtable = cmap->find_subtable (3, 1);
      if (!subtable) subtable = cmap->find_subtable (0, 3);
      if (!subtable) subtable = cmap->find_subtable (0, 2);
      if (!subtable) subtable = cmap->find_subtable (0, 1);
      if (!subtable) subtable = cmap->find_subtable (0, 0);
      if (!subtable)
      {
	subtable = cmap->find_subtable (3, 0);
	if (subtable) symbol = true;
      }
      /* Meh. */
B
Behdad Esfahbod 已提交
919
      if (!subtable) subtable = &Null(OT::CmapSubtable);
920 921 922 923 924 925 926 927 928

      /* UVS subtable. */
      if (!subtable_uvs)
      {
	const OT::CmapSubtable *st = cmap->find_subtable (0, 5);
	if (st && st->u.format == 14)
	  subtable_uvs = &st->u.format14;
      }
      /* Meh. */
B
Behdad Esfahbod 已提交
929
      if (!subtable_uvs) subtable_uvs = &Null(OT::CmapSubtableFormat14);
930 931 932 933 934

      this->uvs_table = subtable_uvs;

      this->get_glyph_data = subtable;
      if (unlikely (symbol))
935
      {
936
	this->get_glyph_func = get_glyph_from_symbol<OT::CmapSubtable>;
937 938
	this->get_all_codepoints_func = null_get_all_codepoints_func;
      } else {
939 940
	switch (subtable->u.format) {
	/* Accelerate format 4 and format 12. */
941 942 943 944 945 946
	default:
	  this->get_glyph_func = get_glyph_from<OT::CmapSubtable>;
	  this->get_all_codepoints_func = null_get_all_codepoints_func;
	  break;
	case 12:
	  this->get_glyph_func = get_glyph_from<OT::CmapSubtableFormat12>;
947
	  this->get_all_codepoints_func = get_all_codepoints_from<OT::CmapSubtableFormat12>;
948
	  break;
949 950 951 952 953
	case  4:
	  {
	    this->format4_accel.init (&subtable->u.format4);
	    this->get_glyph_data = &this->format4_accel;
	    this->get_glyph_func = this->format4_accel.get_glyph_func;
954
	    this->get_all_codepoints_func = this->format4_accel.get_all_codepoints_func;
955 956 957
	  }
	  break;
	}
958
      }
959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987
    }

    inline void fini (void)
    {
      hb_blob_destroy (this->blob);
    }

    inline bool get_nominal_glyph (hb_codepoint_t  unicode,
				   hb_codepoint_t *glyph) const
    {
      return this->get_glyph_func (this->get_glyph_data, unicode, glyph);
    }

    inline bool get_variation_glyph (hb_codepoint_t  unicode,
				     hb_codepoint_t  variation_selector,
				     hb_codepoint_t *glyph) const
    {
      switch (this->uvs_table->get_glyph_variant (unicode,
						  variation_selector,
						  glyph))
      {
	case OT::GLYPH_VARIANT_NOT_FOUND:		return false;
	case OT::GLYPH_VARIANT_FOUND:		return true;
	case OT::GLYPH_VARIANT_USE_DEFAULT:	break;
      }

      return get_nominal_glyph (unicode, glyph);
    }

988 989 990 991 992
    inline void get_all_codepoints (hb_set_t *out) const
    {
      this->get_all_codepoints_func (get_glyph_data, out);
    }

993 994 995 996
    protected:
    typedef bool (*hb_cmap_get_glyph_func_t) (const void *obj,
					      hb_codepoint_t codepoint,
					      hb_codepoint_t *glyph);
997 998 999 1000 1001 1002 1003
    typedef void (*hb_cmap_get_all_codepoints_func_t) (const void *obj,
						       hb_set_t *out);

    static inline void null_get_all_codepoints_func (const void *obj, hb_set_t *out)
    {
      // NOOP
    }
1004 1005 1006 1007 1008 1009 1010 1011 1012 1013

    template <typename Type>
    static inline bool get_glyph_from (const void *obj,
				       hb_codepoint_t codepoint,
				       hb_codepoint_t *glyph)
    {
      const Type *typed_obj = (const Type *) obj;
      return typed_obj->get_glyph (codepoint, glyph);
    }

1014 1015 1016 1017 1018 1019 1020 1021
    template <typename Type>
    static inline void get_all_codepoints_from (const void *obj,
						hb_set_t *out)
    {
      const Type *typed_obj = (const Type *) obj;
      typed_obj->get_all_codepoints (out);
    }

1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035
    template <typename Type>
    static inline bool get_glyph_from_symbol (const void *obj,
					      hb_codepoint_t codepoint,
					      hb_codepoint_t *glyph)
    {
      const Type *typed_obj = (const Type *) obj;
      if (likely (typed_obj->get_glyph (codepoint, glyph)))
	return true;

      if (codepoint <= 0x00FFu)
      {
	/* For symbol-encoded OpenType fonts, we duplicate the
	 * U+F000..F0FF range at U+0000..U+00FF.  That's what
	 * Windows seems to do, and that's hinted about at:
1036
	 * https://docs.microsoft.com/en-us/typography/opentype/spec/recom
1037 1038 1039 1040 1041 1042 1043 1044 1045 1046
	 * under "Non-Standard (Symbol) Fonts". */
	return typed_obj->get_glyph (0xF000u + codepoint, glyph);
      }

      return false;
    }

    private:
    hb_cmap_get_glyph_func_t get_glyph_func;
    const void *get_glyph_data;
1047 1048
    hb_cmap_get_all_codepoints_func_t get_all_codepoints_func;

1049 1050 1051 1052 1053 1054 1055 1056
    OT::CmapSubtableFormat4::accelerator_t format4_accel;

    const OT::CmapSubtableFormat14 *uvs_table;
    hb_blob_t *blob;
  };

  protected:

1057 1058
  inline const CmapSubtable *find_subtable (unsigned int platform_id,
					    unsigned int encoding_id) const
1059 1060 1061 1062 1063
  {
    EncodingRecord key;
    key.platformID.set (platform_id);
    key.encodingID.set (encoding_id);

1064 1065 1066 1067
    /* Note: We can use bsearch, but since it has no performance
     * implications, we use lsearch and as such accept fonts with
     * unsorted subtable list. */
    int result = encodingRecord./*bsearch*/lsearch (key);
1068
    if (result == -1 || !encodingRecord[result].subtable)
B
Behdad Esfahbod 已提交
1069
      return nullptr;
1070 1071

    return &(this+encodingRecord[result].subtable);
1072 1073
  }

1074
  protected:
B
Behdad Esfahbod 已提交
1075
  HBUINT16		version;	/* Table version number (0). */
1076
  SortedArrayOf<EncodingRecord>
1077
			encodingRecord;	/* Encoding tables. */
1078 1079 1080 1081 1082 1083 1084 1085 1086
  public:
  DEFINE_SIZE_ARRAY (4, encodingRecord);
};


} /* namespace OT */


#endif /* HB_OT_CMAP_TABLE_HH */