hb-set-private.hh 10.1 KB
Newer Older
B
Behdad Esfahbod 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
/*
 * Copyright © 2012  Google, Inc.
 *
 *  This is part of HarfBuzz, a text shaping library.
 *
 * Permission is hereby granted, without written agreement and without
 * license or royalty fees, to use, copy, modify, and distribute this
 * software and its documentation for any purpose, provided that the
 * above copyright notice and the following two paragraphs appear in
 * all copies of this software.
 *
 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
 * DAMAGE.
 *
 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
 *
 * Google Author(s): Behdad Esfahbod
 */

#ifndef HB_SET_PRIVATE_HH
#define HB_SET_PRIVATE_HH

#include "hb-private.hh"
#include "hb-object-private.hh"


B
Behdad Esfahbod 已提交
34 35 36 37 38
/*
 * The set digests here implement various "filters" that support
 * "approximate member query".  Conceptually these are like Bloom
 * Filter and Quotient Filter, however, much smaller, faster, and
 * designed to fit the requirements of our uses for glyph coverage
B
Behdad Esfahbod 已提交
39 40 41 42 43 44 45 46 47
 * queries.
 *
 * Our filters are highly accurate if the lookup covers fairly local
 * set of glyphs, but fully flooded and ineffective if coverage is
 * all over the place.
 *
 * The frozen-set can be used instead of a digest, to trade more
 * memory for 100% accuracy, but in practice, that doesn't look like
 * an attractive trade-off.
B
Behdad Esfahbod 已提交
48 49
 */

50
template <typename mask_t, unsigned int shift>
B
Behdad Esfahbod 已提交
51 52 53 54
struct hb_set_digest_lowest_bits_t
{
  ASSERT_POD ();

B
Minor  
Behdad Esfahbod 已提交
55 56
  static const unsigned int mask_bytes = sizeof (mask_t);
  static const unsigned int mask_bits = sizeof (mask_t) * 8;
57
  static const unsigned int num_bits = 0
B
Minor  
Behdad Esfahbod 已提交
58 59 60 61 62
				     + (mask_bytes >= 1 ? 3 : 0)
				     + (mask_bytes >= 2 ? 1 : 0)
				     + (mask_bytes >= 4 ? 1 : 0)
				     + (mask_bytes >= 8 ? 1 : 0)
				     + (mask_bytes >= 16? 1 : 0)
63 64
				     + 0;

65 66
  static_assert ((shift < sizeof (hb_codepoint_t) * 8), "");
  static_assert ((shift + num_bits <= sizeof (hb_codepoint_t) * 8), "");
B
Behdad Esfahbod 已提交
67 68 69 70 71 72 73 74 75 76

  inline void init (void) {
    mask = 0;
  }

  inline void add (hb_codepoint_t g) {
    mask |= mask_for (g);
  }

  inline void add_range (hb_codepoint_t a, hb_codepoint_t b) {
B
Minor  
Behdad Esfahbod 已提交
77
    if ((b >> shift) - (a >> shift) >= mask_bits - 1)
78 79 80 81 82 83
      mask = (mask_t) -1;
    else {
      mask_t ma = mask_for (a);
      mask_t mb = mask_for (b);
      mask |= mb + (mb - ma) - (mb < ma);
    }
B
Behdad Esfahbod 已提交
84 85 86 87 88 89 90 91
  }

  inline bool may_have (hb_codepoint_t g) const {
    return !!(mask & mask_for (g));
  }

  private:

B
Behdad Esfahbod 已提交
92
  static inline mask_t mask_for (hb_codepoint_t g) {
B
Minor  
Behdad Esfahbod 已提交
93
    return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1));
94
  }
B
Behdad Esfahbod 已提交
95 96 97
  mask_t mask;
};

B
Behdad Esfahbod 已提交
98 99
template <typename head_t, typename tail_t>
struct hb_set_digest_combiner_t
B
Behdad Esfahbod 已提交
100 101 102 103
{
  ASSERT_POD ();

  inline void init (void) {
B
Behdad Esfahbod 已提交
104 105
    head.init ();
    tail.init ();
B
Behdad Esfahbod 已提交
106 107 108
  }

  inline void add (hb_codepoint_t g) {
B
Behdad Esfahbod 已提交
109 110
    head.add (g);
    tail.add (g);
B
Behdad Esfahbod 已提交
111 112 113
  }

  inline void add_range (hb_codepoint_t a, hb_codepoint_t b) {
B
Behdad Esfahbod 已提交
114 115
    head.add_range (a, b);
    tail.add_range (a, b);
B
Behdad Esfahbod 已提交
116 117 118
  }

  inline bool may_have (hb_codepoint_t g) const {
B
Behdad Esfahbod 已提交
119
    return head.may_have (g) && tail.may_have (g);
B
Behdad Esfahbod 已提交
120 121 122
  }

  private:
B
Behdad Esfahbod 已提交
123 124
  head_t head;
  tail_t tail;
B
Behdad Esfahbod 已提交
125 126
};

B
Behdad Esfahbod 已提交
127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149

/*
 * hb_set_digest_t
 *
 * This is a combination of digests that performs "best".
 * There is not much science to this: it's a result of intuition
 * and testing.
 */
typedef hb_set_digest_combiner_t
<
  hb_set_digest_lowest_bits_t<unsigned long, 4>,
  hb_set_digest_combiner_t
  <
    hb_set_digest_lowest_bits_t<unsigned long, 0>,
    hb_set_digest_lowest_bits_t<unsigned long, 9>
  >
> hb_set_digest_t;



/*
 * hb_set_t
 */
B
Behdad Esfahbod 已提交
150

B
Behdad Esfahbod 已提交
151

B
Behdad Esfahbod 已提交
152
/* TODO Make this faster and memmory efficient. */
B
Behdad Esfahbod 已提交
153

154
struct hb_set_t
B
Behdad Esfahbod 已提交
155
{
B
Behdad Esfahbod 已提交
156 157
  friend struct hb_frozen_set_t;

158 159
  hb_object_header_t header;
  ASSERT_POD ();
160
  bool in_error;
161

162
  inline void init (void) {
163
    hb_object_init (this);
164 165
    clear ();
  }
B
Minor  
Behdad Esfahbod 已提交
166 167
  inline void fini (void) {
  }
B
Behdad Esfahbod 已提交
168
  inline void clear (void) {
169 170 171
    if (unlikely (hb_object_is_inert (this)))
      return;
    in_error = false;
B
Behdad Esfahbod 已提交
172 173
    memset (elts, 0, sizeof elts);
  }
B
Behdad Esfahbod 已提交
174
  inline bool is_empty (void) const {
B
Behdad Esfahbod 已提交
175 176 177 178 179
    for (unsigned int i = 0; i < ARRAY_LENGTH (elts); i++)
      if (elts[i])
        return false;
    return true;
  }
B
Behdad Esfahbod 已提交
180
  inline void add (hb_codepoint_t g)
B
Behdad Esfahbod 已提交
181
  {
182
    if (unlikely (in_error)) return;
183
    if (unlikely (g == INVALID)) return;
B
Behdad Esfahbod 已提交
184 185
    if (unlikely (g > MAX_G)) return;
    elt (g) |= mask (g);
B
Behdad Esfahbod 已提交
186
  }
187 188
  inline void add_range (hb_codepoint_t a, hb_codepoint_t b)
  {
189
    if (unlikely (in_error)) return;
B
Behdad Esfahbod 已提交
190
    /* TODO Speedup */
191 192 193
    for (unsigned int i = a; i < b + 1; i++)
      add (i);
  }
B
Behdad Esfahbod 已提交
194
  inline void del (hb_codepoint_t g)
B
Behdad Esfahbod 已提交
195
  {
196
    if (unlikely (in_error)) return;
B
Behdad Esfahbod 已提交
197 198
    if (unlikely (g > MAX_G)) return;
    elt (g) &= ~mask (g);
B
Behdad Esfahbod 已提交
199
  }
B
Behdad Esfahbod 已提交
200 201
  inline void del_range (hb_codepoint_t a, hb_codepoint_t b)
  {
202
    if (unlikely (in_error)) return;
B
Behdad Esfahbod 已提交
203 204 205 206
    /* TODO Speedup */
    for (unsigned int i = a; i < b + 1; i++)
      del (i);
  }
B
Behdad Esfahbod 已提交
207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222
  inline bool has (hb_codepoint_t g) const
  {
    if (unlikely (g > MAX_G)) return false;
    return !!(elt (g) & mask (g));
  }
  inline bool intersects (hb_codepoint_t first,
			  hb_codepoint_t last) const
  {
    if (unlikely (first > MAX_G)) return false;
    if (unlikely (last  > MAX_G)) last = MAX_G;
    unsigned int end = last + 1;
    for (hb_codepoint_t i = first; i < end; i++)
      if (has (i))
        return true;
    return false;
  }
B
Behdad Esfahbod 已提交
223
  inline bool is_equal (const hb_set_t *other) const
B
Behdad Esfahbod 已提交
224 225 226 227 228 229 230 231
  {
    for (unsigned int i = 0; i < ELTS; i++)
      if (elts[i] != other->elts[i])
        return false;
    return true;
  }
  inline void set (const hb_set_t *other)
  {
232
    if (unlikely (in_error)) return;
B
Behdad Esfahbod 已提交
233 234 235 236 237
    for (unsigned int i = 0; i < ELTS; i++)
      elts[i] = other->elts[i];
  }
  inline void union_ (const hb_set_t *other)
  {
238
    if (unlikely (in_error)) return;
B
Behdad Esfahbod 已提交
239 240 241 242 243
    for (unsigned int i = 0; i < ELTS; i++)
      elts[i] |= other->elts[i];
  }
  inline void intersect (const hb_set_t *other)
  {
244
    if (unlikely (in_error)) return;
B
Behdad Esfahbod 已提交
245 246 247 248 249
    for (unsigned int i = 0; i < ELTS; i++)
      elts[i] &= other->elts[i];
  }
  inline void subtract (const hb_set_t *other)
  {
250
    if (unlikely (in_error)) return;
B
Behdad Esfahbod 已提交
251 252 253
    for (unsigned int i = 0; i < ELTS; i++)
      elts[i] &= ~other->elts[i];
  }
B
Behdad Esfahbod 已提交
254 255
  inline void symmetric_difference (const hb_set_t *other)
  {
256
    if (unlikely (in_error)) return;
B
Behdad Esfahbod 已提交
257 258 259
    for (unsigned int i = 0; i < ELTS; i++)
      elts[i] ^= other->elts[i];
  }
260 261
  inline void invert (void)
  {
262
    if (unlikely (in_error)) return;
263 264 265
    for (unsigned int i = 0; i < ELTS; i++)
      elts[i] = ~elts[i];
  }
B
Behdad Esfahbod 已提交
266
  inline bool next (hb_codepoint_t *codepoint) const
B
Behdad Esfahbod 已提交
267
  {
268
    if (unlikely (*codepoint == INVALID)) {
B
Behdad Esfahbod 已提交
269
      hb_codepoint_t i = get_min ();
270
      if (i != INVALID) {
B
Behdad Esfahbod 已提交
271 272
        *codepoint = i;
	return true;
273 274
      } else {
	*codepoint = INVALID;
B
Behdad Esfahbod 已提交
275
        return false;
276
      }
B
Behdad Esfahbod 已提交
277 278 279 280 281 282
    }
    for (hb_codepoint_t i = *codepoint + 1; i < MAX_G + 1; i++)
      if (has (i)) {
        *codepoint = i;
	return true;
      }
283
    *codepoint = INVALID;
B
Behdad Esfahbod 已提交
284 285
    return false;
  }
B
Behdad Esfahbod 已提交
286 287 288 289 290 291
  inline bool next_range (hb_codepoint_t *first, hb_codepoint_t *last) const
  {
    hb_codepoint_t i;

    i = *last;
    if (!next (&i))
292 293
    {
      *last = *first = INVALID;
B
Behdad Esfahbod 已提交
294
      return false;
295
    }
B
Behdad Esfahbod 已提交
296 297 298 299 300 301 302 303 304 305 306 307 308 309 310

    *last = *first = i;
    while (next (&i) && i == *last + 1)
      (*last)++;

    return true;
  }

  inline unsigned int get_population (void) const
  {
    unsigned int count = 0;
    for (unsigned int i = 0; i < ELTS; i++)
      count += _hb_popcount32 (elts[i]);
    return count;
  }
311
  inline hb_codepoint_t get_min (void) const
B
Behdad Esfahbod 已提交
312 313 314
  {
    for (unsigned int i = 0; i < ELTS; i++)
      if (elts[i])
315
	for (unsigned int j = 0; j < BITS; j++)
B
Behdad Esfahbod 已提交
316
	  if (elts[i] & (1u << j))
B
Behdad Esfahbod 已提交
317
	    return i * BITS + j;
318
    return INVALID;
B
Behdad Esfahbod 已提交
319
  }
320
  inline hb_codepoint_t get_max (void) const
B
Behdad Esfahbod 已提交
321 322 323 324
  {
    for (unsigned int i = ELTS; i; i--)
      if (elts[i - 1])
	for (unsigned int j = BITS; j; j--)
B
Behdad Esfahbod 已提交
325
	  if (elts[i - 1] & (1u << (j - 1)))
B
Behdad Esfahbod 已提交
326
	    return (i - 1) * BITS + (j - 1);
327
    return INVALID;
B
Behdad Esfahbod 已提交
328
  }
B
Behdad Esfahbod 已提交
329 330

  typedef uint32_t elt_t;
B
Behdad Esfahbod 已提交
331
  static const unsigned int MAX_G = 65536 - 1; /* XXX Fix this... */
B
Behdad Esfahbod 已提交
332 333 334
  static const unsigned int SHIFT = 5;
  static const unsigned int BITS = (1 << SHIFT);
  static const unsigned int MASK = BITS - 1;
B
Behdad Esfahbod 已提交
335
  static const unsigned int ELTS = (MAX_G + 1 + (BITS - 1)) / BITS;
336
  static  const hb_codepoint_t INVALID = HB_SET_VALUE_INVALID;
B
Behdad Esfahbod 已提交
337 338

  elt_t &elt (hb_codepoint_t g) { return elts[g >> SHIFT]; }
B
Behdad Esfahbod 已提交
339
  elt_t const &elt (hb_codepoint_t g) const { return elts[g >> SHIFT]; }
B
Behdad Esfahbod 已提交
340 341
  elt_t mask (hb_codepoint_t g) const { return elt_t (1) << (g & MASK); }

B
Minor  
Behdad Esfahbod 已提交
342
  elt_t elts[ELTS]; /* XXX 8kb */
B
Behdad Esfahbod 已提交
343

344 345
  static_assert ((sizeof (elt_t) * 8 == BITS), "");
  static_assert ((sizeof (elt_t) * 8 * ELTS > MAX_G), "");
B
Behdad Esfahbod 已提交
346 347
};

B
Behdad Esfahbod 已提交
348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399
struct hb_frozen_set_t
{
  static const unsigned int SHIFT = hb_set_t::SHIFT;
  static const unsigned int BITS = hb_set_t::BITS;
  static const unsigned int MASK = hb_set_t::MASK;
  typedef hb_set_t::elt_t elt_t;

  inline void init (const hb_set_t &set)
  {
    start = count = 0;
    elts = NULL;

    unsigned int max = set.get_max ();
    if (max == set.INVALID)
      return;
    unsigned int min = set.get_min ();
    const elt_t &min_elt = set.elt (min);

    start = min & ~MASK;
    count = max - start + 1;
    unsigned int num_elts = (count + BITS - 1) / BITS;
    unsigned int elts_size = num_elts * sizeof (elt_t);
    elts = (elt_t *) malloc (elts_size);
    if (unlikely (!elts))
    {
      start = count = 0;
      return;
    }
    memcpy (elts, &min_elt, elts_size);
  }

  inline void fini (void)
  {
    if (elts)
      free (elts);
  }

  inline bool has (hb_codepoint_t g) const
  {
    /* hb_codepoint_t is unsigned. */
    g -= start;
    if (unlikely (g > count)) return false;
    return !!(elt (g) & mask (g));
  }

  elt_t const &elt (hb_codepoint_t g) const { return elts[g >> SHIFT]; }
  elt_t mask (hb_codepoint_t g) const { return elt_t (1) << (g & MASK); }

  private:
  hb_codepoint_t start, count;
  elt_t *elts;
};
B
Behdad Esfahbod 已提交
400 401 402


#endif /* HB_SET_PRIVATE_HH */