hb-set-private.hh 12.8 KB
Newer Older
B
Behdad Esfahbod 已提交
1
/*
B
Behdad Esfahbod 已提交
2
 * Copyright © 2012,2017  Google, Inc.
B
Behdad Esfahbod 已提交
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
 *
 *  This is part of HarfBuzz, a text shaping library.
 *
 * Permission is hereby granted, without written agreement and without
 * license or royalty fees, to use, copy, modify, and distribute this
 * software and its documentation for any purpose, provided that the
 * above copyright notice and the following two paragraphs appear in
 * all copies of this software.
 *
 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
 * DAMAGE.
 *
 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
 *
 * Google Author(s): Behdad Esfahbod
 */

#ifndef HB_SET_PRIVATE_HH
#define HB_SET_PRIVATE_HH

#include "hb-private.hh"
#include "hb-object-private.hh"


B
Behdad Esfahbod 已提交
34 35 36
/*
 * hb_set_t
 */
B
Behdad Esfahbod 已提交
37

38
struct hb_set_t
B
Behdad Esfahbod 已提交
39
{
B
Behdad Esfahbod 已提交
40 41 42 43 44 45 46 47 48 49
  struct page_map_t
  {
    inline int cmp (const page_map_t *o) const { return (int) o->major - (int) major; }

    uint32_t major;
    uint32_t index;
  };

  struct page_t
  {
B
Behdad Esfahbod 已提交
50 51
    inline void init0 (void) { memset (&v, 0, sizeof (v)); }
    inline void init1 (void) { memset (&v, 0xff, sizeof (v)); }
B
Behdad Esfahbod 已提交
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67

    inline unsigned int len (void) const
    { return ARRAY_LENGTH_CONST (v); }

    inline bool is_empty (void) const
    {
      for (unsigned int i = 0; i < len (); i++)
        if (v[i])
	  return false;
      return true;
    }

    inline void add (hb_codepoint_t g) { elt (g) |= mask (g); }
    inline void del (hb_codepoint_t g) { elt (g) &= ~mask (g); }
    inline bool has (hb_codepoint_t g) const { return !!(elt (g) & mask (g)); }

B
Behdad Esfahbod 已提交
68 69
    inline void add_range (hb_codepoint_t a, hb_codepoint_t b)
    {
70 71 72 73 74 75 76 77 78 79 80 81 82
     elt_t *la = &elt (a);
     elt_t *lb = &elt (b);
     if (la == lb)
       *la |= (mask (b) << 1) - mask(a);
     else
     {
       *la |= ~(mask (a) - 1);

       memset (la, 0xff, (char *) lb - (char *) la);

       *lb |= ((mask (b) << 1) - 1);

     }
B
Behdad Esfahbod 已提交
83 84
    }

B
Behdad Esfahbod 已提交
85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113
    inline bool is_equal (const page_t *other) const
    {
      return 0 == memcmp (&v, &other->v, sizeof (v));
    }

    inline unsigned int get_population (void) const
    {
      unsigned int pop = 0;
      for (unsigned int i = 0; i < len (); i++)
        pop += _hb_popcount (v[i]);
      return pop;
    }

    inline bool next (hb_codepoint_t *codepoint) const
    {
      unsigned int m = (*codepoint + 1) & MASK;
      if (!m)
      {
	*codepoint = INVALID;
	return false;
      }
      unsigned int i = m / ELT_BITS;
      unsigned int j = m & ELT_MASK;

      for (; j < ELT_BITS; j++)
        if (v[i] & (elt_t (1) << j))
	  goto found;
      for (i++; i < len (); i++)
        if (v[i])
J
Jonathan Kew 已提交
114
	  for (j = 0; j < ELT_BITS; j++)
B
Behdad Esfahbod 已提交
115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
	    if (v[i] & (elt_t (1) << j))
	      goto found;

      *codepoint = INVALID;
      return false;

    found:
      *codepoint = i * ELT_BITS + j;
      return true;
    }
    inline hb_codepoint_t get_min (void) const
    {
      for (unsigned int i = 0; i < len (); i++)
        if (v[i])
	{
	  elt_t e = v[i];
	  for (unsigned int j = 0; j < ELT_BITS; j++)
	    if (e & (elt_t (1) << j))
	      return i * ELT_BITS + j;
	}
      return INVALID;
    }
    inline hb_codepoint_t get_max (void) const
    {
      for (int i = len () - 1; i >= 0; i--)
        if (v[i])
	{
	  elt_t e = v[i];
	  for (int j = ELT_BITS - 1; j >= 0; j--)
	    if (e & (elt_t (1) << j))
	      return i * ELT_BITS + j;
	}
      return 0;
    }

    static const unsigned int PAGE_BITS = 512; /* Use to tune. */
    static_assert ((PAGE_BITS & ((PAGE_BITS) - 1)) == 0, "");

153 154
    typedef uint64_t elt_t;

B
Behdad Esfahbod 已提交
155 156
#if 0 && HAVE_VECTOR_SIZE
    /* The vectorized version does not work with clang as non-const
B
Ouch.  
Behdad Esfahbod 已提交
157
     * elt() errs "non-const reference cannot bind to vector element". */
B
Behdad Esfahbod 已提交
158 159
    typedef elt_t vector_t __attribute__((vector_size (PAGE_BITS / 8)));
#else
160
    typedef hb_vector_size_t<elt_t, PAGE_BITS / 8> vector_t;
B
Behdad Esfahbod 已提交
161 162 163 164 165 166
#endif

    vector_t v;

    static const unsigned int ELT_BITS = sizeof (elt_t) * 8;
    static const unsigned int ELT_MASK = ELT_BITS - 1;
B
Behdad Esfahbod 已提交
167
    static const unsigned int BITS = sizeof (vector_t) * 8;
B
Behdad Esfahbod 已提交
168 169 170 171 172 173 174
    static const unsigned int MASK = BITS - 1;
    static_assert (PAGE_BITS == BITS, "");

    elt_t &elt (hb_codepoint_t g) { return v[(g & MASK) / ELT_BITS]; }
    elt_t const &elt (hb_codepoint_t g) const { return v[(g & MASK) / ELT_BITS]; }
    elt_t mask (hb_codepoint_t g) const { return elt_t (1) << (g & ELT_MASK); }
  };
B
Behdad Esfahbod 已提交
175
  static_assert (page_t::PAGE_BITS == sizeof (page_t) * 8, "");
B
Behdad Esfahbod 已提交
176

177 178
  hb_object_header_t header;
  ASSERT_POD ();
179
  bool in_error;
B
Behdad Esfahbod 已提交
180 181 182 183 184 185 186 187 188 189 190 191 192 193
  hb_prealloced_array_t<page_map_t, 8> page_map;
  hb_prealloced_array_t<page_t, 8> pages;

  inline bool resize (unsigned int count)
  {
    if (unlikely (in_error)) return false;
    if (!pages.resize (count) || !page_map.resize (count))
    {
      pages.resize (page_map.len);
      in_error = true;
      return false;
    }
    return true;
  }
194

B
Behdad Esfahbod 已提交
195
  inline void clear (void) {
196 197 198
    if (unlikely (hb_object_is_inert (this)))
      return;
    in_error = false;
B
Behdad Esfahbod 已提交
199 200
    page_map.resize (0);
    pages.resize (0);
B
Behdad Esfahbod 已提交
201
  }
B
Behdad Esfahbod 已提交
202
  inline bool is_empty (void) const {
B
Behdad Esfahbod 已提交
203 204 205
    unsigned int count = pages.len;
    for (unsigned int i = 0; i < count; i++)
      if (!pages[i].is_empty ())
B
Behdad Esfahbod 已提交
206 207 208
        return false;
    return true;
  }
B
Behdad Esfahbod 已提交
209

B
Behdad Esfahbod 已提交
210
  inline void add (hb_codepoint_t g)
B
Behdad Esfahbod 已提交
211
  {
212
    if (unlikely (in_error)) return;
213
    if (unlikely (g == INVALID)) return;
B
Behdad Esfahbod 已提交
214
    page_t *page = page_for_insert (g);
B
Behdad Esfahbod 已提交
215
    if (unlikely (!page)) return;
B
Behdad Esfahbod 已提交
216
    page->add (g);
B
Behdad Esfahbod 已提交
217
  }
218 219
  inline void add_range (hb_codepoint_t a, hb_codepoint_t b)
  {
220
    if (unlikely (in_error)) return;
B
Behdad Esfahbod 已提交
221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243
    unsigned int ma = get_major (a);
    unsigned int mb = get_major (b);
    if (ma == mb)
    {
      page_t *page = page_for_insert (a);
      if (unlikely (!page)) return;
      page->add_range (a, b);
    }
    else
    {
      page_t *page = page_for_insert (a);
      if (unlikely (!page)) return;
      page->add_range (a, major_start (ma + 1) - 1);

      for (unsigned int m = ma + 1; m < mb; m++)
      {
	page = page_for_insert (major_start (m));
	if (unlikely (!page)) return;
	page->init1 ();
      }

      page->add_range (major_start (mb), b);
    }
244
  }
B
Behdad Esfahbod 已提交
245
  inline void del (hb_codepoint_t g)
B
Behdad Esfahbod 已提交
246
  {
247
    if (unlikely (in_error)) return;
B
Behdad Esfahbod 已提交
248 249 250 251
    page_t *p = page_for (g);
    if (!p)
      return;
    p->del (g);
B
Behdad Esfahbod 已提交
252
  }
B
Behdad Esfahbod 已提交
253 254
  inline void del_range (hb_codepoint_t a, hb_codepoint_t b)
  {
255
    if (unlikely (in_error)) return;
B
Behdad Esfahbod 已提交
256 257 258
    for (unsigned int i = a; i < b + 1; i++)
      del (i);
  }
B
Behdad Esfahbod 已提交
259 260
  inline bool has (hb_codepoint_t g) const
  {
B
Behdad Esfahbod 已提交
261 262 263 264
    const page_t *p = page_for (g);
    if (!p)
      return false;
    return p->has (g);
B
Behdad Esfahbod 已提交
265 266 267 268
  }
  inline bool intersects (hb_codepoint_t first,
			  hb_codepoint_t last) const
  {
B
Behdad Esfahbod 已提交
269 270
    hb_codepoint_t c = first - 1;
    return next (&c) && c <= last;
B
Behdad Esfahbod 已提交
271
  }
B
Behdad Esfahbod 已提交
272 273 274 275 276 277 278 279 280 281 282
  inline void set (const hb_set_t *other)
  {
    if (unlikely (in_error)) return;
    unsigned int count = other->pages.len;
    if (!resize (count))
      return;

    memcpy (pages.array, other->pages.array, count * sizeof (pages.array[0]));
    memcpy (page_map.array, other->page_map.array, count * sizeof (page_map.array[0]));
  }

B
Behdad Esfahbod 已提交
283
  inline bool is_equal (const hb_set_t *other) const
B
Behdad Esfahbod 已提交
284
  {
B
Behdad Esfahbod 已提交
285 286 287 288 289 290 291 292 293 294
    unsigned int na = pages.len;
    unsigned int nb = other->pages.len;

    unsigned int a = 0, b = 0;
    for (; a < na && b < nb; )
    {
      if (page_at (a).is_empty ()) { a++; continue; }
      if (other->page_at (b).is_empty ()) { b++; continue; }
      if (page_map[a].major != other->page_map[b].major ||
	  !page_at (a).is_equal (&other->page_at (b)))
B
Behdad Esfahbod 已提交
295
        return false;
B
Behdad Esfahbod 已提交
296 297 298 299 300 301 302 303
      a++;
      b++;
    }
    for (; a < na; a++)
      if (!page_at (a).is_empty ()) { return false; }
    for (; b < nb; b++)
      if (!other->page_at (b).is_empty ()) { return false; }

B
Behdad Esfahbod 已提交
304 305
    return true;
  }
B
Behdad Esfahbod 已提交
306 307 308

  template <class Op>
  inline void process (const hb_set_t *other)
B
Behdad Esfahbod 已提交
309
  {
310
    if (unlikely (in_error)) return;
B
Behdad Esfahbod 已提交
311

B
Behdad Esfahbod 已提交
312 313
    unsigned int na = pages.len;
    unsigned int nb = other->pages.len;
B
Behdad Esfahbod 已提交
314 315

    unsigned int count = 0;
B
Behdad Esfahbod 已提交
316
    unsigned int a = 0, b = 0;
B
Behdad Esfahbod 已提交
317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346
    for (; a < na && b < nb; )
    {
      if (page_map[a].major == other->page_map[b].major)
      {
        count++;
	a++;
	b++;
      }
      else if (page_map[a].major < other->page_map[b].major)
      {
        if (Op::passthru_left)
	  count++;
        a++;
      }
      else
      {
        if (Op::passthru_right)
	  count++;
        b++;
      }
    }
    if (Op::passthru_left)
      count += na - a;
    if (Op::passthru_right)
      count += nb - b;

    if (!resize (count))
      return;

    /* Process in-place backward. */
B
Behdad Esfahbod 已提交
347 348 349
    a = na;
    b = nb;
    for (; a && b; )
B
Behdad Esfahbod 已提交
350
    {
351
      if (page_map[a - 1].major == other->page_map[b - 1].major)
B
Behdad Esfahbod 已提交
352 353 354
      {
	a--;
	b--;
B
Behdad Esfahbod 已提交
355
        Op::process (page_at (--count).v, page_at (a).v, other->page_at (b).v);
B
Behdad Esfahbod 已提交
356
      }
357
      else if (page_map[a - 1].major > other->page_map[b - 1].major)
B
Behdad Esfahbod 已提交
358
      {
B
Behdad Esfahbod 已提交
359
        a--;
B
Behdad Esfahbod 已提交
360 361 362 363 364
        if (Op::passthru_left)
	  page_at (--count).v = page_at (a).v;
      }
      else
      {
B
Behdad Esfahbod 已提交
365
        b--;
B
Behdad Esfahbod 已提交
366 367 368 369
        if (Op::passthru_right)
	  page_at (--count).v = other->page_at (b).v;
      }
    }
B
Behdad Esfahbod 已提交
370
    if (Op::passthru_left)
B
Behdad Esfahbod 已提交
371 372
      while (a)
	page_at (--count).v = page_at (--a).v;
B
Behdad Esfahbod 已提交
373
    if (Op::passthru_right)
B
Behdad Esfahbod 已提交
374 375
      while (b)
	page_at (--count).v = other->page_at (--b).v;
B
Behdad Esfahbod 已提交
376
    assert (!count);
B
Behdad Esfahbod 已提交
377
  }
B
Behdad Esfahbod 已提交
378

B
Behdad Esfahbod 已提交
379 380
  inline void union_ (const hb_set_t *other)
  {
381
    process<HbOpOr> (other);
B
Behdad Esfahbod 已提交
382 383 384
  }
  inline void intersect (const hb_set_t *other)
  {
385
    process<HbOpAnd> (other);
B
Behdad Esfahbod 已提交
386 387 388
  }
  inline void subtract (const hb_set_t *other)
  {
389
    process<HbOpMinus> (other);
B
Behdad Esfahbod 已提交
390
  }
B
Behdad Esfahbod 已提交
391 392
  inline void symmetric_difference (const hb_set_t *other)
  {
393
    process<HbOpXor> (other);
394
  }
B
Behdad Esfahbod 已提交
395
  inline bool next (hb_codepoint_t *codepoint) const
B
Behdad Esfahbod 已提交
396
  {
397
    if (unlikely (*codepoint == INVALID)) {
B
Behdad Esfahbod 已提交
398 399 400 401
      *codepoint = get_min ();
      return *codepoint != INVALID;
    }

B
Behdad Esfahbod 已提交
402
    page_map_t map = {get_major (*codepoint), 0};
B
Behdad Esfahbod 已提交
403 404 405 406 407 408
    unsigned int i;
    page_map.bfind (&map, &i);
    if (i < page_map.len)
    {
      if (pages[page_map[i].index].next (codepoint))
      {
B
Behdad Esfahbod 已提交
409
	*codepoint += page_map[i].major * page_t::PAGE_BITS;
B
Behdad Esfahbod 已提交
410
        return true;
411
      }
B
Behdad Esfahbod 已提交
412
      i++;
B
Behdad Esfahbod 已提交
413
    }
B
Behdad Esfahbod 已提交
414 415 416 417 418
    for (; i < page_map.len; i++)
    {
      hb_codepoint_t m = pages[page_map[i].index].get_min ();
      if (m != INVALID)
      {
B
Behdad Esfahbod 已提交
419
	*codepoint = page_map[i].major * page_t::PAGE_BITS + m;
B
Behdad Esfahbod 已提交
420 421
	return true;
      }
B
Behdad Esfahbod 已提交
422
    }
423
    *codepoint = INVALID;
B
Behdad Esfahbod 已提交
424 425
    return false;
  }
B
Behdad Esfahbod 已提交
426 427 428 429 430 431
  inline bool next_range (hb_codepoint_t *first, hb_codepoint_t *last) const
  {
    hb_codepoint_t i;

    i = *last;
    if (!next (&i))
432 433
    {
      *last = *first = INVALID;
B
Behdad Esfahbod 已提交
434
      return false;
435
    }
B
Behdad Esfahbod 已提交
436 437 438 439 440 441 442 443 444 445

    *last = *first = i;
    while (next (&i) && i == *last + 1)
      (*last)++;

    return true;
  }

  inline unsigned int get_population (void) const
  {
B
Behdad Esfahbod 已提交
446 447 448 449 450
    unsigned int pop = 0;
    unsigned int count = pages.len;
    for (unsigned int i = 0; i < count; i++)
      pop += pages[i].get_population ();
    return pop;
B
Behdad Esfahbod 已提交
451
  }
452
  inline hb_codepoint_t get_min (void) const
B
Behdad Esfahbod 已提交
453
  {
B
Behdad Esfahbod 已提交
454 455 456
    unsigned int count = pages.len;
    for (unsigned int i = 0; i < count; i++)
      if (!page_at (i).is_empty ())
B
Behdad Esfahbod 已提交
457
        return page_map[i].major * page_t::PAGE_BITS + page_at (i).get_min ();
458
    return INVALID;
B
Behdad Esfahbod 已提交
459
  }
460
  inline hb_codepoint_t get_max (void) const
B
Behdad Esfahbod 已提交
461
  {
B
Behdad Esfahbod 已提交
462 463 464
    unsigned int count = pages.len;
    for (int i = count - 1; i >= 0; i++)
      if (!page_at (i).is_empty ())
B
Behdad Esfahbod 已提交
465
        return page_map[i].major * page_t::PAGE_BITS + page_at (i).get_max ();
466
    return INVALID;
B
Behdad Esfahbod 已提交
467
  }
B
Behdad Esfahbod 已提交
468

469
  static  const hb_codepoint_t INVALID = HB_SET_VALUE_INVALID;
B
Behdad Esfahbod 已提交
470

B
Behdad Esfahbod 已提交
471
  inline page_t *page_for_insert (hb_codepoint_t g)
B
Behdad Esfahbod 已提交
472
  {
B
Behdad Esfahbod 已提交
473
    page_map_t map = {get_major (g), pages.len};
B
Behdad Esfahbod 已提交
474 475 476 477 478
    unsigned int i;
    if (!page_map.bfind (&map, &i))
    {
      if (!resize (pages.len + 1))
	return nullptr;
B
Behdad Esfahbod 已提交
479

B
Behdad Esfahbod 已提交
480
      pages[map.index].init0 ();
B
Behdad Esfahbod 已提交
481 482 483 484 485
      memmove (&page_map[i + 1], &page_map[i], (page_map.len - 1 - i) * sizeof (page_map[0]));
      page_map[i] = map;
    }
    return &pages[page_map[i].index];
  }
B
Behdad Esfahbod 已提交
486
  inline page_t *page_for (hb_codepoint_t g)
B
Behdad Esfahbod 已提交
487
  {
B
Behdad Esfahbod 已提交
488
    page_map_t key = {get_major (g)};
B
Behdad Esfahbod 已提交
489 490 491 492 493
    const page_map_t *found = page_map.bsearch (&key);
    if (found)
      return &pages[found->index];
    return nullptr;
  }
B
Behdad Esfahbod 已提交
494
  inline const page_t *page_for (hb_codepoint_t g) const
B
Behdad Esfahbod 已提交
495
  {
B
Behdad Esfahbod 已提交
496
    page_map_t key = {get_major (g)};
B
Behdad Esfahbod 已提交
497 498 499 500 501
    const page_map_t *found = page_map.bsearch (&key);
    if (found)
      return &pages[found->index];
    return nullptr;
  }
B
Behdad Esfahbod 已提交
502 503 504 505
  inline page_t &page_at (unsigned int i) { return pages[page_map[i].index]; }
  inline const page_t &page_at (unsigned int i) const { return pages[page_map[i].index]; }
  inline unsigned int get_major (hb_codepoint_t g) const { return g / page_t::PAGE_BITS; }
  inline hb_codepoint_t major_start (unsigned int major) const { return major * page_t::PAGE_BITS; }
B
Behdad Esfahbod 已提交
506 507 508 509
};


#endif /* HB_SET_PRIVATE_HH */