From f9edf1672511c3bcc3796cc79578ffea43b2bda1 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Thu, 15 Nov 2012 12:14:09 -0800 Subject: [PATCH] Add buffer serialization / deserialization API Two output formats for now: TEXT, and JSON. For example: hb-shape --output-format=json Deserialization API is added, but not implemented yet. --- src/hb-buffer.cc | 222 +++++++++++++++++++++++++++++++++++++++++++++++ src/hb-buffer.h | 42 +++++++++ util/hb-shape.cc | 21 ++++- util/options.cc | 57 ++++-------- util/options.hh | 7 +- 5 files changed, 304 insertions(+), 45 deletions(-) diff --git a/src/hb-buffer.cc b/src/hb-buffer.cc index 7a8cb159..a599153f 100644 --- a/src/hb-buffer.cc +++ b/src/hb-buffer.cc @@ -1023,3 +1023,225 @@ hb_buffer_normalize_glyphs (hb_buffer_t *buffer) } normalize_glyphs_cluster (buffer, start, end, backward); } + + +/* + * Serialize + */ + +static const char *serialize_formats[] = { + "TEXT", + "JSON", + NULL +}; + +hb_buffer_serialize_format_t +hb_buffer_serialize_format_from_string (const char *str, int len) +{ + /* Upper-case it. */ + return (hb_buffer_serialize_format_t) (hb_tag_from_string (str, len) & ~0x20202020); +} + +const char * +hb_buffer_serialize_format_to_string (hb_buffer_serialize_format_t format) +{ + switch (format) + { + case HB_BUFFER_SERIALIZE_FORMAT_TEXT: return serialize_formats[0]; + case HB_BUFFER_SERIALIZE_FORMAT_JSON: return serialize_formats[1]; + default: + case HB_BUFFER_SERIALIZE_FORMAT_INVALID: return NULL; + } +} + +static unsigned int +_hb_buffer_serialize_glyphs_json (hb_buffer_t *buffer, + unsigned int start, + unsigned int end, + char *buf, + unsigned int buf_size, + unsigned int *buf_consumed, + hb_font_t *font, + hb_buffer_serialize_flags_t flags) +{ + hb_glyph_info_t *info = hb_buffer_get_glyph_infos (buffer, NULL); + hb_glyph_position_t *pos = hb_buffer_get_glyph_positions (buffer, NULL); + + *buf_consumed = 0; + for (unsigned int i = start; i < end; i++) + { + char b[1024]; + char *p = b; + + /* In the following code, we know b is large enough that no overflow can happen. */ + +#define APPEND(s) HB_STMT_START { strcpy (p, s); p += strlen (s); } HB_STMT_END + + if (i) + *p++ = ','; + + *p++ = '{'; + + APPEND ("\"g\":"); + if (!(flags & HB_BUFFER_SERIALIZE_FLAG_NO_GLYPH_NAMES)) + { + char g[128]; + hb_font_glyph_to_string (font, info[i].codepoint, g, sizeof (g)); + *p++ = '"'; + for (char *q = g; *q; q++) { + if (*q == '"') + *p++ = '\\'; + *p++ = *q; + } + *p++ = '"'; + } + else + p += snprintf (p, ARRAY_LENGTH (b) - (p - b), "%u", info[i].codepoint); + + if (!(flags & HB_BUFFER_SERIALIZE_FLAG_NO_CLUSTERS)) { + p += snprintf (p, ARRAY_LENGTH (b) - (p - b), ",\"cl\":%u", info[i].cluster); + } + + if (!(flags & HB_BUFFER_SERIALIZE_FLAG_NO_POSITIONS)) + { + p += snprintf (p, ARRAY_LENGTH (b) - (p - b), ",\"dx\":%d,\"dy\":%d", + pos[i].x_offset, pos[i].y_offset); + p += snprintf (p, ARRAY_LENGTH (b) - (p - b), ",\"ax\":%d,\"ay\":%d", + pos[i].x_advance, pos[i].y_advance); + } + + *p++ = '}'; + + if (buf_size > (p - b)) + { + unsigned int l = p - b; + memcpy (buf, b, l); + buf += l; + buf_size -= l; + *buf_consumed += l; + *buf = '\0'; + } else + return i - start; + } + + return end - start; +} + +static unsigned int +_hb_buffer_serialize_glyphs_text (hb_buffer_t *buffer, + unsigned int start, + unsigned int end, + char *buf, + unsigned int buf_size, + unsigned int *buf_consumed, + hb_font_t *font, + hb_buffer_serialize_flags_t flags) +{ + hb_glyph_info_t *info = hb_buffer_get_glyph_infos (buffer, NULL); + hb_glyph_position_t *pos = hb_buffer_get_glyph_positions (buffer, NULL); + hb_direction_t direction = hb_buffer_get_direction (buffer); + + *buf_consumed = 0; + for (unsigned int i = start; i < end; i++) + { + char b[1024]; + char *p = b; + + /* In the following code, we know b is large enough that no overflow can happen. */ + + if (i) + *p++ = '|'; + + if (!(flags & HB_BUFFER_SERIALIZE_FLAG_NO_GLYPH_NAMES)) + { + hb_font_glyph_to_string (font, info[i].codepoint, p, 128); + p += strlen (p); + } + else + p += snprintf (p, ARRAY_LENGTH (b) - (p - b), "%u", info[i].codepoint); + + if (!(flags & HB_BUFFER_SERIALIZE_FLAG_NO_CLUSTERS)) { + p += snprintf (p, ARRAY_LENGTH (b) - (p - b), "=%u", info[i].cluster); + } + + if (!(flags & HB_BUFFER_SERIALIZE_FLAG_NO_POSITIONS)) + { + if (pos[i].x_offset || pos[i].y_offset) + p += snprintf (p, ARRAY_LENGTH (b) - (p - b), "@%d,%d", pos[i].x_offset, pos[i].y_offset); + + *p++ = '+'; + if (HB_DIRECTION_IS_HORIZONTAL (direction) || pos[i].x_advance) + p += snprintf (p, ARRAY_LENGTH (b) - (p - b), "%d", pos[i].x_advance); + if (HB_DIRECTION_IS_VERTICAL (direction) || pos->y_advance) + p += snprintf (p, ARRAY_LENGTH (b) - (p - b), ",%d", pos[i].y_advance); + } + + if (buf_size > (p - b)) + { + unsigned int l = p - b; + memcpy (buf, b, l); + buf += l; + buf_size -= l; + *buf_consumed += l; + *buf = '\0'; + } else + return i - start; + } + + return end - start; +} + +/* Returns number of items, starting at start, that were serialized. */ +unsigned int +hb_buffer_serialize_glyphs (hb_buffer_t *buffer, + unsigned int start, + unsigned int end, + char *buf, + unsigned int buf_size, + unsigned int *buf_consumed, + hb_font_t *font, /* May be NULL */ + hb_buffer_serialize_format_t format, + hb_buffer_serialize_flags_t flags) +{ + assert (start <= end && end <= buffer->len); + + *buf_consumed = 0; + + assert ((!buffer->len && buffer->content_type == HB_BUFFER_CONTENT_TYPE_INVALID) || + buffer->content_type == HB_BUFFER_CONTENT_TYPE_GLYPHS); + + if (unlikely (start == end)) + return 0; + + if (!font) + font = hb_font_get_empty (); + + switch (format) + { + case HB_BUFFER_SERIALIZE_FORMAT_TEXT: + return _hb_buffer_serialize_glyphs_text (buffer, start, end, + buf, buf_size, buf_consumed, + font, flags); + + case HB_BUFFER_SERIALIZE_FORMAT_JSON: + return _hb_buffer_serialize_glyphs_json (buffer, start, end, + buf, buf_size, buf_consumed, + font, flags); + + default: + case HB_BUFFER_SERIALIZE_FORMAT_INVALID: + return 0; + + } +} + +hb_bool_t +hb_buffer_deserialize_glyphs (hb_buffer_t *buffer, + const char *buf, + unsigned int buf_len, + unsigned int *buf_consumed, + hb_font_t *font, /* May be NULL */ + hb_buffer_serialize_format_t format) +{ + return false; +} diff --git a/src/hb-buffer.h b/src/hb-buffer.h index 8e1690d3..ba9cd1d8 100644 --- a/src/hb-buffer.h +++ b/src/hb-buffer.h @@ -36,6 +36,7 @@ #include "hb-common.h" #include "hb-unicode.h" +#include "hb-font.h" HB_BEGIN_DECLS @@ -238,6 +239,47 @@ hb_buffer_normalize_glyphs (hb_buffer_t *buffer); */ +typedef enum { + HB_BUFFER_SERIALIZE_FLAGS_DEFAULT = 0x00000000, + HB_BUFFER_SERIALIZE_FLAG_NO_CLUSTERS = 0x00000001, + HB_BUFFER_SERIALIZE_FLAG_NO_POSITIONS = 0x00000002, + HB_BUFFER_SERIALIZE_FLAG_NO_GLYPH_NAMES = 0x00000004 +} hb_buffer_serialize_flags_t; + +typedef enum { + HB_BUFFER_SERIALIZE_FORMAT_TEXT = HB_TAG('T','E','X','T'), + HB_BUFFER_SERIALIZE_FORMAT_JSON = HB_TAG('J','S','O','N'), + HB_BUFFER_SERIALIZE_FORMAT_INVALID = HB_TAG_NONE +} hb_buffer_serialize_format_t; + +/* len=-1 means str is NUL-terminated. */ +hb_buffer_serialize_format_t +hb_buffer_serialize_format_from_string (const char *str, int len); + +const char * +hb_buffer_serialize_format_to_string (hb_buffer_serialize_format_t format); + +/* Returns number of items, starting at start, that were serialized. */ +unsigned int +hb_buffer_serialize_glyphs (hb_buffer_t *buffer, + unsigned int start, + unsigned int end, + char *buf, + unsigned int buf_size, + unsigned int *buf_consumed, + hb_font_t *font, /* May be NULL */ + hb_buffer_serialize_format_t format, + hb_buffer_serialize_flags_t flags); + +hb_bool_t +hb_buffer_deserialize_glyphs (hb_buffer_t *buffer, + const char *buf, + unsigned int buf_len, + unsigned int *buf_consumed, + hb_font_t *font, /* May be NULL */ + hb_buffer_serialize_format_t format); + + HB_END_DECLS #endif /* HB_BUFFER_H */ diff --git a/util/hb-shape.cc b/util/hb-shape.cc index b23519bd..ef926dae 100644 --- a/util/hb-shape.cc +++ b/util/hb-shape.cc @@ -40,6 +40,20 @@ struct output_buffer_t gs = g_string_new (NULL); line_no = 0; font = hb_font_reference (font_opts->get_font ()); + + if (!options.output_format) + output_format = HB_BUFFER_SERIALIZE_FORMAT_TEXT; + else + output_format = hb_buffer_serialize_format_from_string (options.output_format, -1); + + unsigned int flags = HB_BUFFER_SERIALIZE_FLAGS_DEFAULT; + if (!format.show_glyph_names) + flags |= HB_BUFFER_SERIALIZE_FLAG_NO_GLYPH_NAMES; + if (!format.show_clusters) + flags |= HB_BUFFER_SERIALIZE_FLAG_NO_CLUSTERS; + if (!format.show_positions) + flags |= HB_BUFFER_SERIALIZE_FLAG_NO_POSITIONS; + format_flags = (hb_buffer_serialize_flags_t) flags; } void new_line (void) { @@ -51,7 +65,7 @@ struct output_buffer_t hb_bool_t utf8_clusters) { g_string_set_size (gs, 0); - format.serialize_buffer_of_text (buffer, line_no, text, text_len, font, utf8_clusters, gs); + format.serialize_buffer_of_text (buffer, line_no, text, text_len, font, gs); fprintf (options.fp, "%s", gs->str); } void shape_failed (hb_buffer_t *buffer, @@ -69,7 +83,8 @@ struct output_buffer_t hb_bool_t utf8_clusters) { g_string_set_size (gs, 0); - format.serialize_buffer_of_glyphs (buffer, line_no, text, text_len, font, utf8_clusters, gs); + format.serialize_buffer_of_glyphs (buffer, line_no, text, text_len, font, + output_format, format_flags, gs); fprintf (options.fp, "%s", gs->str); } void finish (const font_options_t *font_opts) @@ -87,6 +102,8 @@ struct output_buffer_t GString *gs; unsigned int line_no; hb_font_t *font; + hb_buffer_serialize_format_t output_format; + hb_buffer_serialize_flags_t format_flags; }; int diff --git a/util/options.cc b/util/options.cc index ca621bff..44c91736 100644 --- a/util/options.cc +++ b/util/options.cc @@ -615,46 +615,23 @@ format_options_t::serialize_unicode (hb_buffer_t *buffer, void format_options_t::serialize_glyphs (hb_buffer_t *buffer, hb_font_t *font, - hb_bool_t utf8_clusters, + hb_buffer_serialize_format_t output_format, + hb_buffer_serialize_flags_t flags, GString *gs) { - unsigned int num_glyphs = hb_buffer_get_length (buffer); - hb_glyph_info_t *info = hb_buffer_get_glyph_infos (buffer, NULL); - hb_glyph_position_t *pos = hb_buffer_get_glyph_positions (buffer, NULL); - hb_direction_t direction = hb_buffer_get_direction (buffer); - g_string_append_c (gs, '['); - for (unsigned int i = 0; i < num_glyphs; i++) - { - if (i) - g_string_append_c (gs, '|'); - - char glyph_name[128]; - if (show_glyph_names) { - hb_font_glyph_to_string (font, info->codepoint, glyph_name, sizeof (glyph_name)); - g_string_append_printf (gs, "%s", glyph_name); - } else - g_string_append_printf (gs, "%u", info->codepoint); - - if (show_clusters) { - g_string_append_printf (gs, "=%u", info->cluster); - if (utf8_clusters) - g_string_append (gs, "u8"); - } - - if (show_positions && (pos->x_offset || pos->y_offset)) { - g_string_append_printf (gs, "@%d,%d", pos->x_offset, pos->y_offset); - } - if (show_positions) { - g_string_append_c (gs, '+'); - if (HB_DIRECTION_IS_HORIZONTAL (direction) || pos->x_advance) - g_string_append_printf (gs, "%d", pos->x_advance); - if (HB_DIRECTION_IS_VERTICAL (direction) || pos->y_advance) - g_string_append_printf (gs, ",%d", pos->y_advance); - } - - info++; - pos++; + unsigned int num_glyphs = hb_buffer_get_length (buffer); + unsigned int start = 0; + + while (start < num_glyphs) { + char buf[1024]; + unsigned int consumed; + start += hb_buffer_serialize_glyphs (buffer, start, num_glyphs, + buf, sizeof (buf), &consumed, + font, output_format, flags); + if (!consumed) + break; + g_string_append (gs, buf); } g_string_append_c (gs, ']'); } @@ -671,7 +648,6 @@ format_options_t::serialize_buffer_of_text (hb_buffer_t *buffer, const char *text, unsigned int text_len, hb_font_t *font, - hb_bool_t utf8_clusters, GString *gs) { if (show_text) { @@ -703,10 +679,11 @@ format_options_t::serialize_buffer_of_glyphs (hb_buffer_t *buffer, const char *text, unsigned int text_len, hb_font_t *font, - hb_bool_t utf8_clusters, + hb_buffer_serialize_format_t output_format, + hb_buffer_serialize_flags_t format_flags, GString *gs) { serialize_line_no (line_no, gs); - serialize_glyphs (buffer, font, utf8_clusters, gs); + serialize_glyphs (buffer, font, output_format, format_flags, gs); g_string_append_c (gs, '\n'); } diff --git a/util/options.hh b/util/options.hh index be6878b4..093f052e 100644 --- a/util/options.hh +++ b/util/options.hh @@ -364,7 +364,8 @@ struct format_options_t : option_group_t GString *gs); void serialize_glyphs (hb_buffer_t *buffer, hb_font_t *font, - hb_bool_t utf8_clusters, + hb_buffer_serialize_format_t format, + hb_buffer_serialize_flags_t flags, GString *gs); void serialize_line_no (unsigned int line_no, GString *gs); @@ -373,7 +374,6 @@ struct format_options_t : option_group_t const char *text, unsigned int text_len, hb_font_t *font, - hb_bool_t utf8_clusters, GString *gs); void serialize_message (unsigned int line_no, const char *msg, @@ -383,7 +383,8 @@ struct format_options_t : option_group_t const char *text, unsigned int text_len, hb_font_t *font, - hb_bool_t utf8_clusters, + hb_buffer_serialize_format_t output_format, + hb_buffer_serialize_flags_t format_flags, GString *gs); -- GitLab