未验证 提交 fec0338f 编写于 作者: P pyoung2778 提交者: GitHub

Checkin seq_flow_lite (#10219)

上级 c6d7d57d
......@@ -16,10 +16,10 @@ http_archive(
http_archive(
name = "org_tensorflow",
sha256 = "fc6d7c57cd9427e695a38ad00fb6ecc3f623bac792dd44ad73a3f85b338b68be",
strip_prefix = "tensorflow-8a4ffe2e1ae722cff5306778df0cfca8b7f503fe",
sha256 = "40d3203ab5f246d83bae328288a24209a2b85794f1b3e2cd0329458d8e7c1985",
strip_prefix = "tensorflow-2.6.0",
urls = [
"https://github.com/tensorflow/tensorflow/archive/8a4ffe2e1ae722cff5306778df0cfca8b7f503fe.tar.gz",
"https://github.com/tensorflow/tensorflow/archive/v2.6.0.zip",
],
)
......@@ -49,41 +49,6 @@ PROTOC_VERSION = "3.9.0"
PROTOC_SHA256 = "15e395b648a1a6dda8fd66868824a396e9d3e89bc2c8648e3b9ab9801bea5d55"
reverb_protoc_deps(version = PROTOC_VERSION, sha256 = PROTOC_SHA256)
# ABSL cpp library.
http_archive(
name = "com_google_absl",
sha256 = "f368a8476f4e2e0eccf8a7318b98dafbe30b2600f4e3cf52636e5eb145aba06a", # SHARED_ABSL_SHA
strip_prefix = "abseil-cpp-df3ea785d8c30a9503321a3d35ee7d35808f190d",
urls = [
"https://storage.googleapis.com/mirror.tensorflow.org/github.com/abseil/abseil-cpp/archive/df3ea785d8c30a9503321a3d35ee7d35808f190d.tar.gz",
"https://github.com/abseil/abseil-cpp/archive/df3ea785d8c30a9503321a3d35ee7d35808f190d.tar.gz",
],
)
http_archive(
name = "rules_cc",
strip_prefix = "rules_cc-master",
urls = ["https://github.com/bazelbuild/rules_cc/archive/master.zip"],
)
# GoogleTest/GoogleMock framework. Used by most unit-tests.
http_archive(
name = "com_google_googletest",
urls = ["https://github.com/google/googletest/archive/master.zip"],
strip_prefix = "googletest-master",
)
# gflags needed by glog
http_archive(
name = "com_github_gflags_gflags",
sha256 = "6e16c8bc91b1310a44f3965e616383dbda48f83e8c1eaa2370a215057b00cabe",
strip_prefix = "gflags-77592648e3f3be87d6c7123eb81cbad75f9aef5a",
urls = [
"https://mirror.bazel.build/github.com/gflags/gflags/archive/77592648e3f3be87d6c7123eb81cbad75f9aef5a.tar.gz",
"https://github.com/gflags/gflags/archive/77592648e3f3be87d6c7123eb81cbad75f9aef5a.tar.gz",
],
)
# glog
http_archive(
name = "com_google_glog",
......@@ -92,16 +57,6 @@ http_archive(
urls = ["https://github.com/google/glog/archive/v0.4.0.tar.gz"],
)
http_archive(
name = "absl_py",
sha256 = "603febc9b95a8f2979a7bdb77d2f5e4d9b30d4e0d59579f88eba67d4e4cc5462",
strip_prefix = "abseil-py-pypi-v0.9.0",
urls = [
"https://storage.googleapis.com/mirror.tensorflow.org/github.com/abseil/abseil-py/archive/pypi-v0.9.0.tar.gz",
"https://github.com/abseil/abseil-py/archive/pypi-v0.9.0.tar.gz",
],
)
http_archive(
name = "utf_archive",
build_file = "@//third_party:utf.BUILD",
......@@ -113,25 +68,17 @@ http_archive(
)
#-----------------------------------------------------------------------------
# proto
#-----------------------------------------------------------------------------
# proto_library, cc_proto_library and java_proto_library rules implicitly depend
# on @com_google_protobuf//:proto, @com_google_protobuf//:cc_toolchain and
# @com_google_protobuf//:java_toolchain, respectively.
# This statement defines the @com_google_protobuf repo.
http_archive(
name = "com_google_protobuf",
strip_prefix = "protobuf-3.8.0",
urls = ["https://github.com/google/protobuf/archive/v3.8.0.zip"],
sha256 = "1e622ce4b84b88b6d2cdf1db38d1a634fe2392d74f0b7b74ff98f3a51838ee53",
)
load("@org_tensorflow//tensorflow:workspace3.bzl", "tf_workspace3")
tf_workspace3()
load("@org_tensorflow//tensorflow:workspace2.bzl", "tf_workspace2")
tf_workspace2()
load("//third_party/flatbuffers:workspace.bzl", flatbuffers = "repo")
flatbuffers()
load("@org_tensorflow//tensorflow:workspace1.bzl", "tf_workspace1")
tf_workspace1()
load("@org_tensorflow//tensorflow:workspace.bzl", "tf_workspace")
tf_workspace(tf_repo_name = "org_tensorflow")
load("@org_tensorflow//tensorflow:workspace0.bzl", "tf_workspace0")
tf_workspace0()
# TF submodule compilation doesn't take care of grpc deps. Do it manually here.
......@@ -168,7 +115,7 @@ new_git_repository(
remote = "https://github.com/unicode-org/icu",
build_file = "@//third_party:icu.BUILD",
patch_cmds = [
"find . -type f -exec sed -i 's/#\s*include \"unicode/#include \"icu4c\/source\/common\/unicode/g' {} \;",
"find . -type f -exec sed -i 's/#\\s*include \"unicode/#include \"icu4c\\/source\\/common\\/unicode/g' {} \\;",
],
)
......
......@@ -5,5 +5,6 @@ sh_binary(
"//tf_ops:sequence_string_projection_op_py",
"//tf_ops:sequence_string_projection_op_v2_py",
"//tf_ops:tf_custom_ops_py",
"//tflite_ops:registerer",
],
)
......@@ -30,3 +30,5 @@ cp -f "${RUNFILES_DIR}/tf_ops/libtf_custom_ops_py_gen_op.so" \
cp -f "${RUNFILES_DIR}/tf_ops/tf_custom_ops_py.py" \
"${BUILD_WORKSPACE_DIRECTORY}/tf_ops"
cp -f "${RUNFILES_DIR}/tflite_ops/registerer.so" \
"${BUILD_WORKSPACE_DIRECTORY}/tflite_ops"
......@@ -44,7 +44,7 @@ class _BazelBuildCommand(setuptools.Command):
setuptools.setup(
name='seq_flow_lite',
version='0.1',
packages=['tf_ops'],
packages=['tf_ops', 'tflite_ops'],
package_data={'': ['*.so']},
cmdclass={
'build': _BuildCommand,
......
......@@ -48,9 +48,9 @@ std::unique_ptr<tflite::Interpreter> CreateInterpreter(
tflite::ops::builtin::BuiltinOpResolver resolver;
resolver.AddCustom(
"SEQUENCE_STRING_PROJECTION",
tflite::ops::custom::Register_SEQUENCE_STRING_PROJECTION());
::seq_flow_lite::ops::custom::Register_SEQUENCE_STRING_PROJECTION());
resolver.AddCustom("ExpectedValueOp",
tflite::ops::custom::Register_EXPECTED_VALUE());
::seq_flow_lite::ops::custom::Register_EXPECTED_VALUE());
tflite::InterpreterBuilder(model, resolver,
/*error_reporter=*/nullptr)(&interpreter);
if (!interpreter) {
......@@ -105,7 +105,7 @@ std::vector<float> InvokeModel(
const size_t num_classes = output_dims[kClassOutputClassIndex];
for (int i = 0; i < num_classes; ++i) {
// Find class probability or log probability for the class index
classes.push_back(tflite::PodDequantize(*class_output, i));
classes.push_back(::seq_flow_lite::PodDequantize(*class_output, i));
}
return classes;
}
......
......@@ -30,6 +30,8 @@ from utils import tflite_utils # import seq_flow_lite module
FLAGS = flags.FLAGS
flags.DEFINE_string("output_dir", None, "The output or model directory.")
flags.DEFINE_enum("output", "sigmoid", ["logits", "sigmoid", "softmax"],
"Specification of the output tensor.")
def load_runner_config():
......@@ -51,12 +53,20 @@ def main(_):
encoder = model.Encoder(model_config, base_layers.TFLITE)
projection, seq_lengh = prxlayer(text)
logits = encoder(projection, seq_lengh)
if FLAGS.output == "logits":
outputs = logits
elif FLAGS.output == "sigmoid":
outputs = tf.math.sigmoid(logits)
else:
assert FLAGS.output == "softmax", "Unexpected output"
outputs = tf.nn.softmax(logits)
session.run(tf.global_variables_initializer())
session.run(tf.local_variables_initializer())
saver = tf.train.Saver()
saver.restore(session, tf.train.latest_checkpoint(FLAGS.output_dir))
tflite_fb = tflite_utils.generate_tflite(session, graph, [text], [logits])
tflite_fb = tflite_utils.generate_tflite(session, graph, [text],
[outputs])
output_file_name = os.path.join(FLAGS.output_dir, "tflite.fb")
with tf.gfile.Open(output_file_name, "wb") as f:
f.write(tflite_fb)
......
......@@ -54,19 +54,23 @@ class BaseLayer(tf.keras.layers.Layer):
assert len(tensor.get_shape().as_list()) == rank
assert tensor.dtype == dtype
def add_qweight(self, shape, num_bits=8):
"""Return a quantized weight variable for the given shape."""
def add_weight_wrapper(self, shape):
"""Return a weight variable for the given shape."""
if self.parameters.initializer is not None:
initializer = self.parameters.initializer
else:
initializer = tf.keras.initializers.GlorotUniform()
weight = self.add_weight(
"weight", shape, initializer=initializer, trainable=True)
"weight",
shape,
initializer=initializer,
trainable=True,
dtype=tf.float32)
self.add_reg_loss(weight)
return self._weight_quantization(weight, num_bits=num_bits)
return weight
def _weight_quantization(self, tensor, num_bits=8):
"""Quantize weights when enabled."""
def quantize_parameter(self, tensor, num_bits=8):
"""Quantize parameters when enabled."""
# For infer mode, toco computes the min/max from the weights offline to
# quantize it. During train/eval this is computed from the current value
# in the session by the graph itself.
......@@ -98,21 +102,37 @@ class BaseLayer(tf.keras.layers.Layer):
def assign_moving_average(self, var, update, ema_decay):
return var.assign(var.read_value() * (1 - ema_decay) + (ema_decay) * update)
def qrange_sigmoid(self, tensor):
if self.parameters.quantize:
def quantize_tensor(self, tf_only):
if tf_only and self.parameters.mode == TFLITE:
return False
return self.parameters.quantize
def qrange_sigmoid(self, tensor, tf_only=False):
if self.quantize_tensor(tf_only):
return tf.quantization.fake_quant_with_min_max_args(tensor, 0.0, 1.0)
return tensor
def qrange_tanh(self, tensor):
if self.parameters.quantize:
def qrange_tanh(self, tensor, tf_only=False):
if self.quantize_tensor(tf_only):
return tf.quantization.fake_quant_with_min_max_args(tensor, -1.0, 1.0)
return tensor
def quantized_tanh(self, tensor):
return self.qrange_tanh(tf.tanh(tensor))
def quantized_tanh(self, tensor, tf_only=False):
return self.qrange_tanh(tf.tanh(tensor), tf_only)
def quantized_sigmoid(self, tensor):
return self.qrange_sigmoid(tf.sigmoid(tensor))
def quantized_sigmoid(self, tensor, tf_only=False):
return self.qrange_sigmoid(tf.sigmoid(tensor), tf_only)
def get_batch_dimension(self, tensor):
return tensor.get_shape().as_list()[0] or tf.shape(tensor)[0]
def inverse_normalizer(self, mask):
return tf.math.reciprocal(tf.reduce_sum(mask))
def random_drop_to_zero(self, tensor, zero_probability):
rnd = tf.random.uniform(
shape=tf.shape(tensor),
minval=-zero_probability,
maxval=(1.0 - zero_probability),
dtype=tensor.dtype)
return tf.math.ceil(rnd)
......@@ -60,7 +60,7 @@ class EncoderQConvolution(base_layers.BaseLayer):
assert len(input_shapes) == self.rank
self.in_filters = input_shapes[-1]
shape = self.ksize + [self.in_filters, self.out_filters]
self.filters = self.add_qweight(shape=shape)
self.filters = self.add_weight_wrapper(shape=shape)
if self.bias:
self.b = self.add_bias(shape=[self.out_filters])
......@@ -70,7 +70,7 @@ class EncoderQConvolution(base_layers.BaseLayer):
def _conv_r4(self, inputs, normalize_method):
outputs = tf.nn.conv2d(
inputs,
self.filters,
self.quantize_parameter(self.filters),
strides=self.strides,
padding=self.padding,
dilations=self.dilations)
......
......@@ -47,7 +47,7 @@ class BaseQDense(base_layers.BaseLayer):
assert input_shapes[1] == 1 or input_shapes[2] == 1
self.in_units = input_shapes[-1]
shape = [self.in_units, self.units]
self.w = self.add_qweight(shape=shape)
self.w = self.add_weight_wrapper(shape=shape)
if self.bias:
self.b = self.add_bias(shape=[self.units])
......@@ -55,7 +55,7 @@ class BaseQDense(base_layers.BaseLayer):
self.normalization = normalization_layers.BatchNormalization(**kwargs)
def _dense_r2(self, inputs, normalize_method):
outputs = tf.matmul(inputs, self.w)
outputs = tf.matmul(inputs, self.quantize_parameter(self.w))
if self.bias:
outputs = tf.nn.bias_add(outputs, self.b)
if self.normalize:
......@@ -98,7 +98,9 @@ class BaseQDenseVarLen(BaseQDense):
self.normalization = normalization_layers.VarLenBatchNormalization(
rank=2, **kwargs)
def call(self, inputs, mask, inverse_normalizer):
def call(self, inputs, mask, inverse_normalizer=None):
if inverse_normalizer is None:
inverse_normalizer = self.inverse_normalizer(mask)
def normalize_method(tensor):
maskr2 = tf.reshape(mask, [-1, 1])
......
......@@ -25,7 +25,7 @@ from tf_ops import sequence_string_projection_op_v2 as sspv2 # import seq_flow_l
class ProjectionLayer(base_layers.BaseLayer):
"""Base class for encoders."""
def __init__(self, model_config, mode):
def __init__(self, model_config, mode, **kwargs):
"""Create projection."""
def _get_params(varname, default_value=None):
......@@ -50,7 +50,7 @@ class ProjectionLayer(base_layers.BaseLayer):
if mode == base_layers.TRAIN:
_get_params("distortion_probability", 0.0)
parameters = base_layers.Parameters(mode, self.quantize)
super(ProjectionLayer, self).__init__(parameters=parameters)
super(ProjectionLayer, self).__init__(parameters=parameters, **kwargs)
def call(self, inputs):
projection, _, seq_length = ssp.sequence_string_projection(
......@@ -74,15 +74,14 @@ class ProjectionLayer(base_layers.BaseLayer):
batch_size = self.get_batch_dimension(inputs)
projection = tf.reshape(projection,
[batch_size, self.max_seq_len, self.feature_size])
if self.mode in modes:
projection = self.qrange_tanh(projection)
projection = self.qrange_tanh(projection)
return projection, seq_length
class ProjectionLayerPreSegmented(base_layers.BaseLayer):
"""Base class for encoders."""
def __init__(self, model_config, mode):
def __init__(self, model_config, mode, **kwargs):
"""Create projection."""
def _get_params(varname, default_value=None):
......@@ -101,11 +100,13 @@ class ProjectionLayerPreSegmented(base_layers.BaseLayer):
if mode == base_layers.TRAIN:
_get_params("distortion_probability", 0.0)
parameters = base_layers.Parameters(mode, self.quantize)
super(ProjectionLayerPreSegmented, self).__init__(parameters=parameters)
super(ProjectionLayerPreSegmented, self).__init__(
parameters=parameters, **kwargs)
def call(self, inputs, sequence_length):
def call(self, inputs):
tokens, sequence_length = inputs
projection = sspv2.sequence_string_projection_v2(
input=inputs,
input=tokens,
sequence_length=sequence_length,
feature_size=self.feature_size,
distortion_probability=self.distortion_probability,
......
......@@ -27,6 +27,8 @@ class ActivationQuantization(base_layers.BaseLayer):
self.ema_decay = ema_decay
self.num_bits = num_bits
super(ActivationQuantization, self).__init__(**kwargs)
def build(self, input_shapes):
if self.parameters.quantize:
self.min_var = self.add_weight(
"min", initializer=tf.keras.initializers.Zeros(), trainable=False)
......@@ -53,6 +55,7 @@ class ActivationQuantization(base_layers.BaseLayer):
return inputs
def quantize_using_range(self, inputs):
# This method can only be called after a call to "call" method in this class
if self.parameters.quantize:
return tf.quantization.fake_quant_with_min_max_vars(
inputs, self.min_var, self.max_var, num_bits=self.num_bits)
......@@ -66,21 +69,24 @@ class ConcatQuantization(ActivationQuantization):
self.axis = axis
super(ConcatQuantization, self).__init__(**kwargs)
def reduce_list(self, tensor_list, functor):
def _reduce_list(self, tensor_list, functor):
reduce_result = [functor(tensor) for tensor in tensor_list]
# Toco expects 0.0 to be part of the quantization range.
reduce_result.append(tf.constant(0.0))
return functor(tf.stack(reduce_result))
def call(self, tensors):
# Ignore empty invocations done to build the keras layer.
if tensors is None:
return
if self.parameters.quantize:
if self.parameters.mode == base_layers.TRAIN:
# Toco expects 0.0 to be part of the quantization range.
batch_min = self.reduce_list(tensors, tf.reduce_min)
batch_min = self._reduce_list(tensors, tf.reduce_min)
min_var = self.assign_moving_average(self.min_var, batch_min,
self.ema_decay)
batch_max = self.reduce_list(tensors, tf.reduce_max)
batch_max = self._reduce_list(tensors, tf.reduce_max)
max_var = self.assign_moving_average(self.max_var, batch_max,
self.ema_decay)
else:
......
......@@ -27,21 +27,17 @@ def classification_metric(per_example_loss, label_ids, logits):
}
THRESHOLDS = [0.5]
def labeling_metric(per_example_loss, label_ids, logits):
"""Compute eval metrics."""
scores = tf.math.sigmoid(logits)
binary_prediction = tf.math.greater_equal(scores, 0.5)
num_classes = label_ids.get_shape().as_list()[-1]
return_dict = {"eval_loss": tf.metrics.mean(per_example_loss)}
for idx in range(num_classes):
return_dict["auc/" + str(idx)] = tf.metrics.auc(label_ids[:, idx],
scores[:, idx])
return_dict["precision@" + str(THRESHOLDS) + "/" +
str(idx)] = tf.metrics.precision_at_thresholds(
label_ids[:, idx], scores[:, idx], thresholds=THRESHOLDS)
return_dict["recall@" + str(THRESHOLDS) + "/" +
str(idx)] = tf.metrics.recall_at_thresholds(
label_ids[:, idx], scores[:, idx], thresholds=THRESHOLDS)
return_dict["precision/" + str(idx)] = tf.metrics.precision(
label_ids[:, idx], binary_prediction[:, idx])
return_dict["recall/" + str(idx)] = tf.metrics.recall(
label_ids[:, idx], binary_prediction[:, idx])
return return_dict
......@@ -38,6 +38,7 @@ class PaddedMaskedVarLenConv(conv_layers.EncoderQConvolutionVarLen):
assert bool(ngram is None) != bool(skip_bigram is None)
self.kwidth = ngram if ngram is not None else (skip_bigram + 2)
mask = [1] * self.kwidth
self.skipgram = skip_bigram is not None
if skip_bigram is not None:
mask[1], mask[skip_bigram] = 0, 0
self.mask = np.array(mask, dtype="float32").reshape((1, self.kwidth, 1, 1))
......@@ -56,10 +57,10 @@ class PaddedMaskedVarLenConv(conv_layers.EncoderQConvolutionVarLen):
return result * mask + (1 - mask) * self.invalid_value
return result
def add_qweight(self, shape, num_bits=8):
weight = super(PaddedMaskedVarLenConv, self).add_qweight(
shape=shape, num_bits=num_bits)
return weight * tf.convert_to_tensor(self.mask)
def quantize_parameter(self, weight, num_bits=8):
weight = super(PaddedMaskedVarLenConv, self).quantize_parameter(
weight, num_bits=num_bits)
return weight * tf.convert_to_tensor(self.mask) if self.skipgram else weight
class AttentionPoolReduce(base_layers.BaseLayer):
......@@ -97,8 +98,8 @@ class AttentionPoolReduce(base_layers.BaseLayer):
class Encoder(tf.keras.layers.Layer):
"""A PRADO keras model."""
def __init__(self, config, mode):
super(Encoder, self).__init__()
def __init__(self, config, mode, **kwargs):
super(Encoder, self).__init__(**kwargs)
def _get_params(varname, default_value=None):
value = config[varname] if varname in config else default_value
......@@ -118,7 +119,7 @@ class Encoder(tf.keras.layers.Layer):
_get_params("skip1bigram_channels", 0)
_get_params("skip2bigram_channels", 0)
_get_params("network_regularizer_scale", 1e-4)
_get_params("keep_prob", 0.5)
_get_params("keep_prob", 1.0)
self.num_classes = len(self.labels)
self.parameters = base_layers.Parameters(
......@@ -129,7 +130,6 @@ class Encoder(tf.keras.layers.Layer):
units=self.embedding_size, rank=3, parameters=self.parameters)
self.attention_fc = dense_layers.BaseQDenseVarLen(
units=self.embedding_size, rank=3, parameters=self.parameters)
self.dropout = tf.keras.layers.Dropout(rate=(1 - self.keep_prob))
self.parameters = copy.copy(self.parameters)
self.parameters.regularizer_scale = self.network_regularizer_scale
......@@ -161,8 +161,8 @@ class Encoder(tf.keras.layers.Layer):
def _apply_fc_dropout(self, layer, inputs, mask, inverse_normalizer):
outputs = layer(inputs, mask, inverse_normalizer)
if self.parameters.mode == base_layers.TRAIN:
return self.dropout(outputs)
if self.parameters.mode == base_layers.TRAIN and self.keep_prob < 1.0:
return tf.nn.dropout(outputs, rate=(1 - self.keep_prob))
return outputs
def call(self, projection, seq_length):
......@@ -178,14 +178,17 @@ class Encoder(tf.keras.layers.Layer):
layer(values_in, attention_in, maskr3, inverse_normalizer)
for layer in self.attention_pool_layers
]
assert tensors, "no ngram channels have been configured"
pre_logits = self.concat_quantizer(tensors)
return self.final_fc(pre_logits)
class Model(Encoder):
def __init__(self, config, mode):
super(Model, self).__init__(config, mode)
def __init__(self, config, mode, **kwargs):
super(Model, self).__init__(config, mode, **kwargs)
self.projection = projection_layers.ProjectionLayer(config, mode)
def call(self, inputs):
......
......@@ -93,8 +93,8 @@ py_binary(
# Expect numpy installed
# package TFLite flex delegate
# package TFLite interpreter
"@org_tflite_support//tensorflow_lite_support/custom_ops/kernel:ngrams_op_resolver",
"@org_tflite_support//tensorflow_lite_support/custom_ops/kernel:whitespace_tokenizer_op_resolver",
"@org_tflite_support//tensorflow_lite_support/custom_ops/kernel:_pywrap_ngrams_op_resolver",
"@org_tflite_support//tensorflow_lite_support/custom_ops/kernel:_pywrap_whitespace_tokenizer_op_resolver",
# Expect tensorflow text installed
],
)
......
......@@ -10,15 +10,6 @@ package(
],
)
py_library(
name = "text_projection",
srcs = ["text_projection.py"],
srcs_version = "PY3",
deps = [
":sequence_string_projection_op_py",
],
)
cc_library(
name = "sequence_string_projection_op",
srcs = [
......@@ -30,7 +21,6 @@ cc_library(
":projection_util",
":text_distorter",
"@com_google_absl//absl/container:flat_hash_map",
"@com_google_absl//absl/random",
"@tensorflow_includes//:includes",
"@tensorflow_solib//:framework_lib",
],
......@@ -71,11 +61,9 @@ cc_library(
srcs = ["text_distorter.cc"],
hdrs = ["text_distorter.h"],
deps = [
"@com_google_absl//absl/strings",
"@icu4c",
"@tensorflow_includes//:includes",
"@tensorflow_solib//:framework_lib",
"@utf_archive//:utf",
],
)
......@@ -102,7 +90,6 @@ cc_library(
"@tensorflow_includes//:includes",
"@tensorflow_solib//:framework_lib",
"@com_google_absl//absl/container:flat_hash_map",
"@com_google_absl//absl/random",
],
alwayslink = 1,
)
......
......@@ -79,7 +79,7 @@ std::string ContractToken(const char* input_ptr, size_t len, size_t num_chars) {
// Count how many times this pattern appeared.
int num_cur_patterns = 0;
if (cur_pattern.find(" ") == std::string::npos && !IsDigit(cur_pattern)) {
if (cur_pattern.find(' ') == std::string::npos && !IsDigit(cur_pattern)) {
num_cur_patterns =
GetNumPattern(char_tokens, i + num_chars, num_chars, cur_pattern);
}
......
......@@ -25,25 +25,28 @@ limitations under the License.
namespace {
constexpr int kInvalid = -1;
constexpr char kSpace = ' ';
} // namespace
// A HashEngine that uses MurmurHash to convert text to hashcodes.
class MurmurHash : public HashEngine {
public:
void GetHashCodes(const std::string& word, std::vector<uint64_t>* hash_codes,
int feature_size) override {
std::vector<uint64_t> GetHashCodes(const std::string& word,
int feature_size) override {
std::vector<uint64_t> hash_codes;
hash_codes.reserve(2 * (feature_size / 64 + 1));
uint64_t hash_low = 0;
uint64_t hash_high = 0;
for (int i = 0; i < feature_size; i += 64) {
if (i == 0) {
auto hash = MurmurHash128(word.c_str(), word.size());
auto hash = MurmurHash128(word.data(), word.size());
hash_low = hash.first;
hash_high = hash.second;
} else {
GetMoreBits(hash_low, hash_high, &hash_low, &hash_high);
}
hash_codes->push_back(hash_low);
hash_codes->push_back(hash_high);
hash_codes.push_back(hash_low);
hash_codes.push_back(hash_high);
}
return hash_codes;
}
private:
......@@ -78,7 +81,7 @@ class MurmurHash : public HashEngine {
std::pair<uint64_t, uint64_t> MurmurHash128(const char* buf,
const size_t len) {
// Initialize the hashing value.
uint64_t hash = len * kMul;
uint64_t hash1 = len * kMul;
// hash2 will be xored by hash during the hash computation iterations.
// In the end we use an alternative mixture multiplier for mixing
// the bits in hash2.
......@@ -90,34 +93,38 @@ class MurmurHash : public HashEngine {
for (const char* p = buf; p != end; p += 8) {
// Manually unrolling this loop 2x did not help on Intel Core 2.
hash = MurmurStep(hash, Load64VariableLength(p, 8));
hash2 ^= hash;
hash1 = MurmurStep(hash1, Load64VariableLength(p, 8));
hash2 ^= hash1;
}
if ((len & 0x7) != 0) {
const uint64_t data = Load64VariableLength(end, len & 0x7);
hash ^= data;
hash *= kMul;
hash2 ^= hash;
hash1 ^= data;
hash1 *= kMul;
hash2 ^= hash1;
}
hash = ShiftMix(hash) * kMul;
hash2 ^= hash;
hash = ShiftMix(hash);
hash1 = ShiftMix(hash1) * kMul;
hash2 ^= hash1;
hash1 = ShiftMix(hash1);
// mul2 is a prime just above golden ratio. mul2 is used to ensure that the
// impact of the last few bytes is different to the upper and lower 64 bits.
hash2 = ShiftMix(hash2 * kMul2) * kMul2;
return std::make_pair(hash, hash2);
return {hash1, hash2};
}
};
// A HashEngine that uses a prefix and suffix preserving hash to convert text
// to hashcodes.
class XFixHash : public HashEngine {
public:
explicit XFixHash(int bits_per_char)
: bits_per_char_(bits_per_char), bit_mask_((1ULL << bits_per_char) - 1) {}
void GetHashCodes(const std::string& word, std::vector<uint64_t>* hash_codes,
int feature_size) override {
std::vector<uint64_t> GetHashCodes(const std::string& word,
int feature_size) override {
std::vector<uint64_t> hash_codes;
hash_codes.reserve(2 * (feature_size / 64 + 1));
auto token_ptr = reinterpret_cast<const uint8_t*>(word.c_str());
size_t token_size = word.size();
int token_idx = 0;
......@@ -134,9 +141,10 @@ class XFixHash : public HashEngine {
hash_low = (hash_low << bits_per_char_) | (frhash & bit_mask_);
hash_high = (hash_high << bits_per_char_) | (brhash & bit_mask_);
}
hash_codes->push_back(hash_low);
hash_codes->push_back(hash_high);
hash_codes.push_back(hash_low);
hash_codes.push_back(hash_high);
}
return hash_codes;
}
private:
......@@ -146,6 +154,8 @@ class XFixHash : public HashEngine {
const uint64_t bit_mask_;
};
// A HashEngine that performs a position preserving unicode level hashing to
// convert text to hashcodes.
class UnicodeHash : public HashEngine {
public:
// bits_per_unicode should be a divisor of 64.
......@@ -154,8 +164,10 @@ class UnicodeHash : public HashEngine {
bit_mask_(((1ULL << bits_per_unicode) - 1) << (64 - bits_per_unicode)) {
}
void GetHashCodes(const std::string& word, std::vector<uint64_t>* hash_codes,
int feature_size) override {
std::vector<uint64_t> GetHashCodes(const std::string& word,
int feature_size) override {
std::vector<uint64_t> hash_codes;
hash_codes.reserve(2 * (feature_size / 64 + 1));
auto word_ptr = word.c_str();
int utflength = utflen(const_cast<char*>(word_ptr));
// Both `feature_size` and `bits_per_unicode` are bit lengths.
......@@ -187,8 +199,9 @@ class UnicodeHash : public HashEngine {
hash = hash >> bits_per_unicode_;
}
}
hash_codes->push_back(hash);
hash_codes.push_back(hash);
}
return hash_codes;
}
private:
......@@ -197,6 +210,8 @@ class UnicodeHash : public HashEngine {
const uint64_t bit_mask_;
};
} // namespace
bool Hasher::SupportedHashType(const std::string& hash_type) {
std::unordered_set<std::string> supported({kMurmurHash, kUnicodeHash8,
kUnicodeHash16, kXfixHash8,
......@@ -225,7 +240,7 @@ Hasher* Hasher::CreateHasher(int feature_size, const std::string& hash_type) {
Hasher::Hasher(int feature_size, HashEngine* hash_engine)
: feature_size_(feature_size), hash_engine_(hash_engine) {
hash_engine_->GetHashCodes(empty_string_, &null_hash_codes_, feature_size_);
null_hash_codes_ = hash_engine_->GetHashCodes(empty_string_, feature_size_);
}
std::string ProjectionUnicodeHandler::LowerCaseUTF8WithSupportedUnicodes(
......
......@@ -21,23 +21,26 @@ limitations under the License.
#include "libutf/utf.h"
constexpr int kFirstCapOffset = 3;
constexpr int kAllCapsOffset = 4;
constexpr int kWordNoveltyOffset = 1;
constexpr int kDocSizeOffset = 2;
const char kMurmurHash[] = "murmur";
const char kXfixHash8[] = "xfixhash8";
const char kXfixHash16[] = "xfixhash16";
const char kXfixHash32[] = "xfixhash32";
const char kUnicodeHash8[] = "unicodehash8";
const char kUnicodeHash16[] = "unicodehash16";
inline constexpr int kFirstCapOffset = 3;
inline constexpr int kAllCapsOffset = 4;
inline constexpr int kWordNoveltyOffset = 1;
inline constexpr int kDocSizeOffset = 2;
inline constexpr char kMurmurHash[] = "murmur";
inline constexpr char kXfixHash8[] = "xfixhash8";
inline constexpr char kXfixHash16[] = "xfixhash16";
inline constexpr char kXfixHash32[] = "xfixhash32";
inline constexpr char kUnicodeHash8[] = "unicodehash8";
inline constexpr char kUnicodeHash16[] = "unicodehash16";
// A base class that specifies the interface for a hash engine used by the
// projection operator.
class HashEngine {
public:
virtual void GetHashCodes(const std::string& word,
std::vector<uint64_t>* hash_codes,
int feature_size) = 0;
// Takes a string token `word` and a `feature_size` (measured in bits) and
// returns hash codes that represent the token.
virtual std::vector<uint64_t> GetHashCodes(const std::string& word,
int feature_size) = 0;
virtual ~HashEngine() {}
};
......@@ -50,13 +53,12 @@ class Hasher {
const std::string& hash_type = kMurmurHash);
static bool SupportedHashType(const std::string& hash_type);
bool GetHashCodes(const std::string& word,
std::vector<uint64_t>* hash_codes) {
std::vector<uint64_t>& hash_codes) {
if (!hash_engine_) return false;
if (word.empty()) {
*hash_codes = null_hash_codes_;
hash_codes = null_hash_codes_;
} else {
hash_codes->clear();
hash_engine_->GetHashCodes(word, hash_codes, feature_size_);
hash_codes = hash_engine_->GetHashCodes(word, feature_size_);
}
return true;
}
......@@ -64,8 +66,13 @@ class Hasher {
private:
explicit Hasher(int feature_size, HashEngine* hash_engine);
const std::string empty_string_ = "<null>";
// Size of the projection feature which represents the number of bits of
// hash codes that will be generated by this class.
const int feature_size_;
// The hash engine used by this class.
std::unique_ptr<HashEngine> hash_engine_;
// Hash codes for empty text is precaculated and stored below to speed
// up projection.
std::vector<uint64_t> null_hash_codes_;
};
......@@ -90,7 +97,8 @@ class ProjectionUnicodeHandler {
}
// Performs language independent lower case and returns a string with
// supported unicode segments.
// supported unicode segments and two additional flags first_cap and all_caps
// which when true indicate the text is Firstcap or ALLCAPS.
std::string LowerCaseUTF8WithSupportedUnicodes(
const std::pair<const char*, size_t>& source, bool* first_cap = nullptr,
bool* all_caps = nullptr) const;
......@@ -126,14 +134,19 @@ class ProjectionUnicodeHandler {
int max_tokens);
private:
// Parses and extracts supported unicode segments from a utf8 string.
// Parses and extracts supported or allowed unicode segments, also referred
// to as vocabulary, from a utf8 string.
void InitializeVocabulary(const std::string& vocabulary);
// A variable that maps a valid Unicode rune to its index in valid character
// vocabulary.
std::unordered_map<Rune, int> valid_chars_;
// Controls whether to exclude non-alphabetic, non-space characters from the
// output text.
bool exclude_nonalphaspace_unicodes_;
};
static constexpr size_t kEntireString = SIZE_MAX;
static constexpr size_t kAllTokens = SIZE_MAX;
inline constexpr size_t kEntireString = SIZE_MAX;
inline constexpr size_t kAllTokens = SIZE_MAX;
std::vector<std::string> SplitBySpace(const char* input_ptr, size_t len,
size_t max_input, size_t max_tokens);
......
......@@ -198,7 +198,7 @@ cc_library(
),
includes = ["tensorflow_includes"],
deps = [
"@eigen_archive//:eigen",
"@eigen_archive//:eigen3",
"@protobuf_archive//:includes",
"@zlib_includes//:includes",
"@snappy_includes//:includes",
......
......@@ -12,16 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tf_ops/projection_normalizer_util.h" // seq_flow_lite
#include "tf_ops/projection_tokenizer_util.h" // seq_flow_lite
#include "tf_ops/projection_util.h" // seq_flow_lite
#include "tf_ops/text_distorter.h" // seq_flow_lite
#include "absl/container/flat_hash_map.h"
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/shape_inference.h"
#include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/framework/tensor_shape.h"
#include "tf_ops/projection_normalizer_util.h" // seq_flow_lite
#include "tf_ops/projection_tokenizer_util.h" // seq_flow_lite
#include "tf_ops/projection_util.h" // seq_flow_lite
#include "tf_ops/text_distorter.h" // seq_flow_lite
using ::tensorflow::int32;
using ::tensorflow::int64;
......@@ -51,10 +51,11 @@ float* AllocateTensor(OpKernelContext* ctx, const std::string& tensor_name,
return &tensor->flat<float>()(0);
}
// OpKernel for the sequence string projection op.
class SequenceStringProjectionOp : public OpKernel {
public:
explicit SequenceStringProjectionOp(OpKernelConstruction* context)
: OpKernel(context) {
: OpKernel(context), philox_(171), generator_(&philox_) {
OP_REQUIRES_OK(context, context->GetAttr("feature_size", &feature_size_));
std::string hashtype;
OP_REQUIRES_OK(context, context->GetAttr("hashtype", &hashtype));
......@@ -159,7 +160,10 @@ class SequenceStringProjectionOp : public OpKernel {
}
const int64 seq_len =
static_cast<int64>(bos_tag_ + words.size() + eos_tag_);
CHECK_GT(seq_len, 0);
CHECK_GT(seq_len, 0)
<< "Projection models expect input text to have at-least one valid "
"token. If empty text is a valid input for your model, please set "
"add_bos_tag to true.";
max_seq_len = std::max(max_seq_len, seq_len);
words_batches.emplace_back(std::move(words));
}
......@@ -208,7 +212,7 @@ class SequenceStringProjectionOp : public OpKernel {
CHECK_EQ(eos_tag_, 1);
word = kEndTokenTSP;
}
hasher_->GetHashCodes(word, &hash_codes);
hasher_->GetHashCodes(word, hash_codes);
for (int hindex = 0, k = 0; hindex < hash_codes.size(); hindex++) {
auto hash = hash_codes[hindex];
for (int kmax = std::min(k + increment, feature_size_); k < kmax;) {
......@@ -229,7 +233,7 @@ class SequenceStringProjectionOp : public OpKernel {
doc_size_feature;
}
if (add_first_cap_feature_ > 0.0f) {
if (text_distorter_->BernouilleSample(add_first_cap_feature_)) {
if (generator_.RandFloat() <= add_first_cap_feature_) {
projection[offset0 + feature_size_ - kFirstCapOffset] =
first_cap ? 1.0 : -1.0;
} else {
......@@ -237,7 +241,7 @@ class SequenceStringProjectionOp : public OpKernel {
}
}
if (add_all_caps_feature_ > 0.0f) {
if (text_distorter_->BernouilleSample(add_all_caps_feature_)) {
if (generator_.RandFloat() <= add_all_caps_feature_) {
projection[offset0 + feature_size_ - kAllCapsOffset] =
all_caps ? 1.0 : -1.0;
} else {
......@@ -252,21 +256,49 @@ class SequenceStringProjectionOp : public OpKernel {
}
private:
// Objects used for random number generator.
tensorflow::random::PhiloxRandom philox_;
tensorflow::random::SimplePhilox generator_;
// Dimensionality of the ternary vector for each token in the text.
int32 feature_size_;
// An object used to hash tokens in the text.
std::unique_ptr<Hasher> hasher_;
// An object used for distorting text before projection.
std::unique_ptr<TextDistorter> text_distorter_;
// An object used for manipulating unicode in the text. It performs tasks such
// as retaining only whitelisted unicodes in the text tokens and lowercasing
// them.
std::unique_ptr<ProjectionUnicodeHandler> unicode_handler_;
// An object used for normalizing tokens in the text. This performs tasks
// such as identifying repeated characters and replace them with a single
// instance.
std::unique_ptr<ProjectionNormalizer> projection_normalizer_;
// Character whitelist used by the projection operator.
std::string vocabulary_;
// Size of the character whitelist.
int vocabulary_size_;
// Maximum number of splits allowed in the text. The number of tokens in text
// post segmentation will be utmost max_splits_ + 1.
int32 max_splits_;
// A flag that indicates how to segment text. When true text is segmented by
// space. Otherwise it is segmented on unicode boundaries.
bool split_on_space_;
// When true include an end of sentence token in the projection.
int eos_tag_;
// When true include a begin of sentence token in the projection.
int bos_tag_;
// Number of bits used to capture word novelty. See tensorflow op
// documentation below for details.
int word_novelty_bits_;
// Number of levels used to capture document size. See tensorflow op
// documentation below for details.
int doc_size_levels_;
// Distance between levels used for word novelty.
float word_novelty_offset_;
// Adds boolean feature to indicate first_cap text with the below probability.
float add_first_cap_feature_;
// Adds boolean feature to indicate all_cap text with the below probability.
float add_all_caps_feature_;
};
......
......@@ -40,6 +40,8 @@ constexpr char kEndTokenTSP[] = "<EOS>";
constexpr float kMappingTable[4] = {0, 1, -1, 0};
constexpr int kIncrement = 32;
// Version 2 OpKernel for the sequence string projection op.
// Template T can be int32 or int64.
template <typename T>
class SequenceStringProjectionOpV2 : public OpKernel {
public:
......@@ -136,7 +138,7 @@ class SequenceStringProjectionOpV2 : public OpKernel {
} else {
word = kEndTokenTSP;
}
hasher_->GetHashCodes(word, &hash_codes);
hasher_->GetHashCodes(word, hash_codes);
for (int hindex = 0, k = 0; hindex < hash_codes.size(); hindex++) {
auto hash = hash_codes[hindex];
for (int kmax = std::min(k + kIncrement, feature_size_); k < kmax;) {
......@@ -153,13 +155,25 @@ class SequenceStringProjectionOpV2 : public OpKernel {
}
private:
// Dimensionality of the ternary vector for each token in the text.
int32 feature_size_;
// An object used to hash tokens in the text.
std::unique_ptr<Hasher> hasher_;
// An object used for distorting text before projection.
std::unique_ptr<TextDistorter> text_distorter_;
// An object used for manipulating unicode in the text. It performs tasks such
// as retaining only whitelisted unicodes in the text tokens and lowercasing
// them.
std::unique_ptr<ProjectionUnicodeHandler> unicode_handler_;
// An object used for normalizing tokens in the text. This performs tasks
// such as identifying repeated characters and replace them with a single
// instance.
std::unique_ptr<ProjectionNormalizer> projection_normalizer_;
// Character whitelist used by the projection operator.
std::string vocabulary_;
// When true include an end of sentence token in the projection.
int eos_tag_;
// When true include a begin of sentence token in the projection.
int bos_tag_;
};
......
......@@ -32,7 +32,6 @@ class TextDistorter {
assert(distortion_probability_ <= 1.0);
}
std::string DistortText(icu::UnicodeString* uword);
bool BernouilleSample(float p) { return (generator_.RandFloat() <= p); }
private:
tensorflow::random::PhiloxRandom philox_;
......
......@@ -20,30 +20,6 @@ limitations under the License.
using ::tensorflow::int32;
class PoolingOp : public tensorflow::OpKernel {
public:
explicit PoolingOp(tensorflow::OpKernelConstruction* context)
: tensorflow::OpKernel(context) {}
void Compute(tensorflow::OpKernelContext* ctx) override {}
};
REGISTER_KERNEL_BUILDER(Name("PoolingOp").Device(::tensorflow::DEVICE_CPU),
PoolingOp);
REGISTER_OP("PoolingOp")
.Input("multiplier: float32")
.Input("constant: float32")
.Input("forward: float32")
.Output("state: float32")
.SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
c->set_output(0, c->input(0));
return tensorflow::Status::OK();
})
.Doc(R"doc(
Dummy pooling op.
)doc");
class ExpectedValueOp : public tensorflow::OpKernel {
public:
explicit ExpectedValueOp(tensorflow::OpKernelConstruction* context)
......@@ -93,33 +69,3 @@ REGISTER_OP("LayerNorm")
.Doc(R"doc(
Dummy layer norm op.
)doc");
class UniformCausalAttnOp : public tensorflow::OpKernel {
public:
explicit UniformCausalAttnOp(tensorflow::OpKernelConstruction* context)
: tensorflow::OpKernel(context) {}
void Compute(tensorflow::OpKernelContext* ctx) override {}
};
REGISTER_KERNEL_BUILDER(
Name("UniformCausalAttn").Device(::tensorflow::DEVICE_CPU),
UniformCausalAttnOp);
REGISTER_OP("UniformCausalAttn")
.Input("input: float32")
.Input("time_step: int32")
.Input("selected_beams: int32")
.Attr("feature_size: int")
.Attr("beam_size: int")
.Output("output: float32")
.SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
auto batch_size = c->Dim(c->input(0), 0);
int32 feature_size;
TF_RETURN_IF_ERROR(c->GetAttr("feature_size", &feature_size));
c->set_output(0, c->MakeShape({batch_size, 1, feature_size}));
return tensorflow::Status::OK();
})
.Doc(R"doc(
Dummy uniform causal attn op.
)doc");
# TFLite ops for sequence string projection.
load("@org_tensorflow//tensorflow:tensorflow.bzl", "pybind_extension")
load("@org_tensorflow//tensorflow/lite:build_def.bzl", "tflite_copts")
licenses(["notice"])
......@@ -100,3 +101,16 @@ cc_test(
"@flatbuffers",
],
)
pybind_extension(
name = "registerer",
srcs = ["registerer.cc"],
module_name = "registerer",
deps = [
":expected_value",
":layer_norm",
":sequence_string_projection",
"@org_tensorflow//tensorflow/lite:framework",
"@pybind11",
],
)
......@@ -18,7 +18,7 @@ limitations under the License.
#include "tflite_ops/quantization_util.h" // seq_flow_lite
namespace tflite {
namespace seq_flow_lite {
namespace ops {
namespace custom {
......@@ -156,4 +156,4 @@ TfLiteRegistration* Register_EXPECTED_VALUE() {
} // namespace custom
} // namespace ops
} // namespace tflite
} // namespace seq_flow_lite
......@@ -17,7 +17,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/register.h"
namespace tflite {
namespace seq_flow_lite {
namespace ops {
namespace custom {
......@@ -25,6 +25,6 @@ TfLiteRegistration* Register_EXPECTED_VALUE();
} // namespace custom
} // namespace ops
} // namespace tflite
} // namespace seq_flow_lite
#endif // TENSORFLOW_MODELS_SEQUENCE_PROJECTION_TFLITE_OPS_EXPECTED_VALUE_H_
......@@ -17,10 +17,10 @@ limitations under the License.
#include <unordered_set>
#include <vector>
#include "tflite_ops/quantization_util.h" // seq_flow_lite
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tflite_ops/quantization_util.h" // seq_flow_lite
namespace tflite {
namespace seq_flow_lite {
namespace ops {
namespace custom {
......@@ -213,6 +213,102 @@ TfLiteStatus FlexibleLayerNorm(const TfLiteTensor* input, const float scale,
return kTfLiteOk;
}
/*
* Layer normalization is optimized as follows in integer arithmetic
*
* Algorithm
* *********
* Subscript i \in {1, ..., N}, Inputs q_i, Outputs oq_i.
*
* x_i = (q_i - input_zero_point) * input_scale
* mean = sum_i x_i / N
* var = sum_i (x_i * x_i / N) - mean * mean
* std = sqrt(var + tolerance)
* xni = (xi - mean) / std
* yi = xni * scale + offset
* o_i = round(y_i / output_scale + output_zero_point)
* oq_i = clamp(o_i, 0, 255)
*
* Optimizations
* *************
* Applying linear expansion
* x_i = q_i * input_scale - input_zero_point * input_scale
* or x_i = m * qi + c
* mean = m * mean_q + c
* Variance is not affected by a constant shift to input
* var = m^2 * var_q
* std = m * sqrt(var_q + tolerance)
* Expanding xi, mean, std in equation for xni
* xni = (m * qi + c - m * mean_q - c) / m * sqrt(var_q + tolerance)
* Simplifying
* xni = (qi - mean_q) / sqrt(var_q + tolerance)
* Setting inv_std_qi = 1 / sqrt(var_q + tolerance)
* xni = qi * inv_std_qi - mean_q * inv_std_qi
* yi = qi * inv_std_qi * scale - mean_q * inv_std_qi * scale + offset
* o_i = round(qi * inv_std_qi * scale / output_scale
* - mean_q * inv_std_qi * scale / output_scale
* + offset / output_scale
* + output_zero_point)
* Setting
* static_bias = offset / output_scale + output_zero_point
* static_scale = scale / output_scale
* o_i = round(qi * inv_std_qi * static_scale
* - mean_q * inv_std_qi * static_scale
* + static_bias)
* Setting
* dynamic_scale = inv_std_qi * static_scale
* dynamic_bias = static_bias - mean_q * dynamic_scale
* o_i = round(qi * dynamic_scale + dynamic_bias)
* oq_i = clamp(round(qi * dynamic_scale + dynamic_bias), 0, 255)
*
* This results in the below optimized implementation. The strategy is to first
* compute first and second order summary statistics for qi in a loop,
* then compute mean_q, var_q and then dynamic_scale/dynamic_bias. This
* allows one to compute oqi quickly in a tight loop.
* */
TfLiteStatus IntegerLayerNorm(const TfLiteTensor* input, const float scale,
const float offset, TfLiteTensor* output) {
const int input_rank = input->dims->size;
const int num_features = input->dims->data[input_rank - 1];
const int time_steps =
static_cast<int>(GetNumberOfSteps(input) / num_features);
const float out_inverse_scale = 1.0f / output->params.scale;
const float static_scale = scale * out_inverse_scale;
const float static_bias = static_cast<float>(output->params.zero_point) +
offset * out_inverse_scale;
const float inverse_num_features = 1.0f / num_features;
const uint8_t* const in_ptr = input->data.uint8;
uint8_t* out_ptr = output->data.uint8;
for (int i = 0; i < time_steps; ++i) {
int32_t i32_sum_q = 0;
int32_t i32_sum_qq = 0;
const int32_t index = i * num_features;
for (int j = index; j < index + num_features; ++j) {
const int32_t q_i = static_cast<int32_t>(in_ptr[j]);
// Compute first and second order statistics for qi.
i32_sum_q += q_i;
i32_sum_qq += q_i * q_i;
}
const float second_moment_qq = i32_sum_qq * inverse_num_features;
const float mean_q = i32_sum_q * inverse_num_features;
const float var_q = second_moment_qq - mean_q * mean_q;
const float inv_std_q = 1.0f / sqrt(var_q + 1e-6);
const float dynamic_scale = inv_std_q * static_scale;
const float dynamic_bias = static_bias - mean_q * dynamic_scale;
for (int j = index; j < index + num_features; ++j) {
const int32_t invalue = static_cast<int32_t>(in_ptr[j]);
const float value = invalue * dynamic_scale + dynamic_bias;
// Use an offseted cast to perform float round.
const int32_t i32value =
static_cast<int32_t>(value + ((value >= 0.0) ? 0.5f : -0.5f));
// Clamp the result.
out_ptr[j] = static_cast<uint8_t>(std::max(std::min(255, i32value), 0));
}
}
return kTfLiteOk;
}
TfLiteStatus DefaultLayerNormFloat(const TfLiteTensor* input, const float scale,
const float offset, TfLiteTensor* output) {
const int input_rank = input->dims->size;
......@@ -298,7 +394,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
if (num_axis == 1 && (axis->data.i32[0] == -1 ||
axis->data.i32[0] == (input->dims->size - 1))) {
if (input->type == kTfLiteUInt8) {
return DefaultLayerNorm(input, scale, offset, output);
return IntegerLayerNorm(input, scale, offset, output);
} else if (input->type == kTfLiteFloat32) {
return DefaultLayerNormFloat(input, scale, offset, output);
} else {
......@@ -328,4 +424,4 @@ TfLiteRegistration* Register_LAYER_NORM() {
} // namespace custom
} // namespace ops
} // namespace tflite
} // namespace seq_flow_lite
......@@ -17,7 +17,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/register.h"
namespace tflite {
namespace seq_flow_lite {
namespace ops {
namespace custom {
......@@ -25,6 +25,6 @@ TfLiteRegistration* Register_LAYER_NORM();
} // namespace custom
} // namespace ops
} // namespace tflite
} // namespace seq_flow_lite
#endif // LEARNING_EXPANDER_POD_DEEP_POD_TFLITE_HANDLERS_LAYER_NORM_H_
......@@ -20,40 +20,35 @@ limitations under the License.
#include "flatbuffers/flexbuffers.h" // flatbuffer
#include "tensorflow/lite/kernels/test_util.h"
namespace tflite {
namespace seq_flow_lite {
namespace ops {
namespace custom {
namespace {
class LayerNormModel : public SingleOpModel {
using ::testing::ElementsAreArray;
using ::tflite::ArrayFloatNear;
using ::tflite::Dequantize;
using ::tflite::TensorType_INT32;
using ::tflite::TensorType_UINT8;
class LayerNormModel : public ::tflite::SingleOpModel {
public:
explicit LayerNormModel(const TensorData& input, float output_min,
explicit LayerNormModel(std::initializer_list<int> input_shape,
float input_min, float input_max, float output_min,
float output_max, float scale, float offset,
std::initializer_list<int> axis_shape,
std::initializer_list<int> axis)
std::initializer_list<int> axes)
: scale_value_(scale), offset_value_(offset) {
input_ = AddInput(input);
const int num_axes = axes.size();
input_ = AddInput({TensorType_UINT8, input_shape, input_min, input_max});
scale_ = AddInput(
{TensorType_UINT8, {1}, std::min(scale, 0.0f), std::max(scale, 0.0f)});
offset_ = AddInput({TensorType_UINT8,
{1},
std::min(offset, 0.0f),
std::max(offset, 0.0f)});
axis_ = AddConstInput(TensorType_INT32, axis, axis_shape);
axis_ = AddConstInput(TensorType_INT32, axes, {num_axes});
output_ = AddOutput({TensorType_UINT8, {}, output_min, output_max});
flexbuffers::Builder fbb;
fbb.Map([&] {
{
size_t start = fbb.StartVector("axes");
for (const int& aval : axis) {
fbb.Int(aval);
}
fbb.EndVector(start, /*typed=*/true, /*fixed=*/false);
}
});
fbb.Finish();
SetCustomOp("LayerNorm", fbb.GetBuffer(), Register_LAYER_NORM);
SetCustomOp("LayerNorm", {}, Register_LAYER_NORM);
BuildInterpreter({GetShape(input_)});
}
......@@ -88,8 +83,9 @@ TEST(LayerNormModelTest, RegularInput) {
const std::vector<float> expected_output = {0.0, -1.6, 0.53, 1.07,
0.0, -1.13, 1.59, -0.45};
LayerNormModel m({TensorType_UINT8, {1, 2, 4}, -10, 10}, -10, 10, 1.0, 0.0,
{1}, {2});
LayerNormModel m(/*input_shape=*/{1, 2, 4}, /*input_min=*/-10,
/*input_max=*/10, /*output_min=*/-10, /*output_max=*/10,
/*scale=*/1.0, /*offset=*/0.0, /*axes=*/{2});
m.SetInput(input);
m.Invoke();
EXPECT_THAT(
......@@ -106,8 +102,9 @@ TEST(LayerNormModelTest, NegativeScale) {
// Standard deviation values are 3.74, 4.41
const std::vector<float> expected_output = {0.0, 1.6, -0.53, -1.07,
0.0, 1.13, -1.59, 0.45};
LayerNormModel m({TensorType_UINT8, {1, 2, 4}, -10, 10}, -10, 10, -1.0, 0.0,
{1}, {2});
LayerNormModel m(/*input_shape=*/{1, 2, 4}, /*input_min=*/-10,
/*input_max=*/10, /*output_min=*/-10, /*output_max=*/10,
/*scale=*/-1.0, /*offset=*/0.0, /*axes=*/{2});
m.SetInput(input);
m.Invoke();
EXPECT_THAT(
......@@ -124,8 +121,9 @@ TEST(LayerNormModelTest, NegativeOffset) {
// Standard deviation values are 3.74, 4.41
const std::vector<float> expected_output = {-1.0, -2.6, -0.53, 0.07,
-1.0, -2.13, 0.59, -1.45};
LayerNormModel m({TensorType_UINT8, {1, 2, 4}, -10, 10}, -10, 10, 1.0, -1.0,
{1}, {2});
LayerNormModel m(/*input_shape=*/{1, 2, 4}, /*input_min=*/-10,
/*input_max=*/10, /*output_min=*/-10, /*output_max=*/10,
/*scale=*/1.0, /*offset=*/-1.0, /*axes=*/{2});
m.SetInput(input);
m.Invoke();
EXPECT_THAT(
......@@ -142,8 +140,9 @@ TEST(LayerNormModelTest, NegativeScaleAndOffset) {
// Standard deviation values are 3.74, 4.41
const std::vector<float> expected_output = {-1.0, 0.6, -1.53, -2.07,
-1.0, 0.13, -2.59, -0.55};
LayerNormModel m({TensorType_UINT8, {1, 2, 4}, -10, 10}, -10, 10, -1.0, -1.0,
{1}, {2});
LayerNormModel m(/*input_shape=*/{1, 2, 4}, /*input_min=*/-10,
/*input_max=*/10, /*output_min=*/-10, /*output_max=*/10,
/*scale=*/-1.0, /*offset=*/-1.0, /*axes=*/{2});
m.SetInput(input);
m.Invoke();
EXPECT_THAT(
......@@ -160,8 +159,9 @@ TEST(LayerNormModelTest, MultipleAxis) {
1.12, -2.08, 0.48, -0.16, -0.95, -1.46, -0.95, 0.06,
-0.69, -0.23, -1.60, -1.15, -0.80, -0.16, 0.48, 1.12};
LayerNormModel m({TensorType_UINT8, {1, 2, 3, 4}, -3, 3}, -3, 3, 1.0, 0.0,
{2}, {1, 3});
LayerNormModel m(/*input_shape=*/{1, 2, 3, 4}, /*input_min=*/-3,
/*input_max=*/3, /*output_min=*/-3, /*output_max=*/3,
/*scale=*/1.0, /*offset=*/0.0, /*axes=*/{1, 3});
m.SetInput(input);
m.Invoke();
EXPECT_THAT(
......@@ -178,8 +178,9 @@ TEST(LayerNormModelTest, MultipleNegativeAxis) {
1.12, -2.08, 0.48, -0.16, -0.95, -1.46, -0.95, 0.06,
-0.69, -0.23, -1.60, -1.15, -0.80, -0.16, 0.48, 1.12};
LayerNormModel m({TensorType_UINT8, {1, 2, 3, 4}, -3, 3}, -3, 3, 1.0, 0.0,
{2}, {-3, -1});
LayerNormModel m(/*input_shape=*/{1, 2, 3, 4}, /*input_min=*/-3,
/*input_max=*/3, /*output_min=*/-3, /*output_max=*/3,
/*scale=*/1.0, /*offset=*/0.0, /*axes=*/{-3, -1});
m.SetInput(input);
m.Invoke();
EXPECT_THAT(
......@@ -199,8 +200,9 @@ TEST(LayerNormModelTest, MultipleAxisWithLargeDepth) {
2.05, 2.05, -0.67, -0.28, 1.27, 1.27, -1.06, -1.06, -0.28,
0., -0.85, -0.42, 0., 0.42, -0.85, -0.42, 0., 0.42};
LayerNormModel m({TensorType_UINT8, {1, 2, 2, 9}, -1.0, 1.0}, -3.0, 3.0, 1.0,
0.0, {2}, {1, 3});
LayerNormModel m(/*input_shape=*/{1, 2, 2, 9}, /*input_min=*/-1.0,
/*input_max=*/1.0, /*output_min=*/-3.0, /*output_max=*/3.0,
/*scale=*/1.0, /*offset=*/0.0, /*axes=*/{1, 3});
m.SetInput(input);
m.Invoke();
EXPECT_THAT(
......@@ -211,4 +213,4 @@ TEST(LayerNormModelTest, MultipleAxisWithLargeDepth) {
} // namespace
} // namespace custom
} // namespace ops
} // namespace tflite
} // namespace seq_flow_lite
......@@ -20,7 +20,7 @@ limitations under the License.
#include "tensorflow/lite/context.h"
namespace tflite {
namespace seq_flow_lite {
// Returns the original (dequantized) value of 8bit value.
inline float PodDequantizeValue(const TfLiteTensor& tensor, uint8_t value) {
......@@ -48,6 +48,6 @@ inline uint8_t PodQuantize(float value, int32_t zero_point,
return static_cast<uint8_t>(std::max(std::min(255, integer_value), 0));
}
} // namespace tflite
} // namespace seq_flow_lite
#endif // TENSORFLOW_MODELS_SEQUENCE_PROJECTION_TFLITE_OPS_QUANTIZATION_UTIL_H_
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "pybind11/pybind11.h"
#include "pybind11/pytypes.h"
#include "tensorflow/lite/mutable_op_resolver.h"
#include "tflite_ops/expected_value.h" // seq_flow_lite
#include "tflite_ops/layer_norm.h" // seq_flow_lite
#include "tflite_ops/sequence_string_projection.h" // seq_flow_lite
PYBIND11_MODULE(registerer, m) {
m.doc() =
"Module that provides a registerer from the seq flow lite custom ops";
m.def(
"RegisterCustomOps",
[](uintptr_t ptr) {
::tflite::MutableOpResolver* resolver =
reinterpret_cast<::tflite::MutableOpResolver*>(ptr);
resolver->AddCustom(
"ExpectedValueOp",
::seq_flow_lite::ops::custom::Register_EXPECTED_VALUE());
resolver->AddCustom(
"LayerNorm", ::seq_flow_lite::ops::custom::Register_LAYER_NORM());
resolver->AddCustom("SEQUENCE_STRING_PROJECTION",
::seq_flow_lite::ops::custom::
Register_SEQUENCE_STRING_PROJECTION());
resolver->AddCustom("SequenceStringProjection",
::seq_flow_lite::ops::custom::
Register_SEQUENCE_STRING_PROJECTION());
resolver->AddCustom("SEQUENCE_STRING_PROJECTION_V2",
::seq_flow_lite::ops::custom::
Register_SEQUENCE_STRING_PROJECTION());
resolver->AddCustom("SequenceStringProjectionV2",
::seq_flow_lite::ops::custom::
Register_SEQUENCE_STRING_PROJECTION_V2());
},
"Register custom ops used by seq flow lite layers");
}
......@@ -31,7 +31,7 @@ limitations under the License.
#include "tf_ops/projection_util.h" // seq_flow_lite
#include "tflite_ops/quantization_util.h" // seq_flow_lite
namespace tflite {
namespace seq_flow_lite {
namespace ops {
namespace custom {
......@@ -163,7 +163,7 @@ class ProjectionParams {
DocSizeFeature(&doc_size_feature, num_tokens);
*data = PodQuantize(doc_size_feature, 127.0f, 127);
}
void Hash(const std::string& word, std::vector<uint64_t>* hash_codes) {
void Hash(const std::string& word, std::vector<uint64_t>& hash_codes) {
hasher_->GetHashCodes(word, hash_codes);
}
// Lower cases the input text and eliminates all unsupported
......@@ -269,6 +269,8 @@ class ProjectionParamsV2 : public ProjectionParams {
num_tokens, dims->data[1]);
return kTfLiteError;
}
tokens_.clear();
tokens_.reserve(num_tokens);
for (int i = 0; i < num_tokens; ++i) {
const tflite::StringRef strref = tflite::GetString(input_t, i);
tokens_.push_back(std::pair<const char*, size_t>(strref.str, strref.len));
......@@ -412,7 +414,7 @@ void TypedEval(const T* mapping_table, ProjectionParams* params, T* data) {
} else {
word = kEndToken;
}
params->Hash(word, &hash_codes);
params->Hash(word, hash_codes);
for (int hindex = 0, k = 0; hindex < hash_codes.size(); hindex++) {
auto hash = hash_codes[hindex];
for (int kmax = std::min(k + kIncrement, params->FeatureSize());
......@@ -505,4 +507,4 @@ TfLiteRegistration* Register_SEQUENCE_STRING_PROJECTION_V2() {
} // namespace custom
} // namespace ops
} // namespace tflite
} // namespace seq_flow_lite
......@@ -16,7 +16,7 @@ limitations under the License.
#define TENSORFLOW_MODELS_SEQUENCE_PROJECTION_TFLITE_OPS_SEQUENCE_STRING_PROJECTION_H_
#include "tensorflow/lite/kernels/register.h"
namespace tflite {
namespace seq_flow_lite {
namespace ops {
namespace custom {
......@@ -29,6 +29,6 @@ extern const char kSequenceStringProjectionV2[];
TfLiteRegistration* Register_SEQUENCE_STRING_PROJECTION_V2();
} // namespace custom
} // namespace ops
} // namespace tflite
} // namespace seq_flow_lite
#endif // TENSORFLOW_MODELS_SEQUENCE_PROJECTION_TFLITE_OPS_SEQUENCE_STRING_PROJECTION_H_
......@@ -25,29 +25,32 @@ limitations under the License.
#include "tf_ops/projection_util.h" // seq_flow_lite
#include "tflite_ops/tf_tflite_diff_test_util.h" // seq_flow_lite
namespace tflite {
namespace seq_flow_lite {
namespace ops {
namespace custom {
namespace {
using ::seq_flow_lite::testing::AttrValue;
using ::seq_flow_lite::testing::FloatTensor;
using ::seq_flow_lite::testing::IntTensor;
using ::seq_flow_lite::testing::OpEquivTestCase;
using ::seq_flow_lite::testing::StringTensor;
using ::seq_flow_lite::testing::TensorflowTfLiteOpTest;
using ::testing::ElementsAreArray;
using ::tflite::testing::AttrValue;
using ::tflite::testing::FloatTensor;
using ::tflite::testing::IntTensor;
using ::tflite::testing::OpEquivTestCase;
using ::tflite::testing::StringTensor;
using ::tflite::testing::TensorflowTfLiteOpTest;
class SequenceStringProjectionModel : public SingleOpModel {
using ::tflite::TensorType_FLOAT32;
using ::tflite::TensorType_STRING;
using ::tflite::TensorType_UINT8;
class SequenceStringProjectionModel : public ::tflite::SingleOpModel {
public:
explicit SequenceStringProjectionModel(
bool split_on_space, int max_splits, int word_novelty_bits,
int doc_size_levels, bool add_eos_tag, TensorType output_type,
int doc_size_levels, bool add_eos_tag, ::tflite::TensorType output_type,
const std::string& token_separators = "",
bool normalize_repetition = false, float add_first_cap = 0.0,
float add_all_caps = 0.0, const string& hashtype = kMurmurHash) {
float add_all_caps = 0.0, const std::string& hashtype = kMurmurHash) {
flexbuffers::Builder fbb;
fbb.Map([&] {
fbb.Int("feature_size", 4);
......@@ -798,11 +801,11 @@ INSTANTIATE_TEST_SUITE_P(
SequenceStringProjectionTests, SequenceStringProjectionTest,
::testing::ValuesIn(SequenceStringProjectionTestCases()));
class SequenceStringProjectionV2Model : public SingleOpModel {
class SequenceStringProjectionV2Model : public ::tflite::SingleOpModel {
public:
explicit SequenceStringProjectionV2Model(
std::vector<std::vector<int>> input_shapes,
const string& hashtype = kMurmurHash) {
const std::string& hashtype = kMurmurHash) {
flexbuffers::Builder fbb;
fbb.Map([&] {
fbb.Int("feature_size", 4);
......@@ -827,6 +830,7 @@ class SequenceStringProjectionV2Model : public SingleOpModel {
<< "Cannot allocate tensors";
return SingleOpModel::InvokeUnchecked();
}
std::vector<int> GetOutputShape() { return GetTensorShape(output_); }
private:
int input_;
......@@ -884,6 +888,15 @@ TEST(SequenceStringProjectionV2Test, RegularInputUint8) {
m.Invoke({"hello", "world"}, kTfLiteOk);
}
TEST(SequenceStringProjectionV2Test, NumberProjectionsForMultipleInputs) {
SequenceStringProjectionV2Model m({{1, 2}});
std::vector<std::string> input = {"hello", "world"};
m.Invoke(input, kTfLiteOk);
EXPECT_EQ(m.GetOutputShape()[1], input.size());
m.Invoke(input, kTfLiteOk);
EXPECT_EQ(m.GetOutputShape()[1], input.size());
}
class SequenceStringProjectionV2Test : public TensorflowTfLiteOpTest {
std::function<TfLiteRegistration*()> TfLiteOpRegistration() override {
return ops::custom::Register_SEQUENCE_STRING_PROJECTION_V2;
......@@ -986,7 +999,7 @@ INSTANTIATE_TEST_SUITE_P(
} // namespace
} // namespace custom
} // namespace ops
} // namespace tflite
} // namespace seq_flow_lite
int main(int argc, char** argv) {
// On Linux, add: absl::SetFlag(&FLAGS_logtostderr, true);
......
......@@ -19,11 +19,16 @@ limitations under the License.
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/lib/core/status_test_util.h"
namespace tflite {
namespace seq_flow_lite {
namespace testing {
using ::tensorflow::TensorProto;
using ::testing::FloatNear;
using ::tflite::TensorType_STRING;
using ::tflite::TensorType_UINT8;
using ::tflite::TensorType_INT32;
using ::tflite::TensorType_BOOL;
using ::tflite::TensorType_FLOAT32;
::tflite::TensorType TfTypeToTfLiteType(::tensorflow::DataType dtype) {
switch (dtype) {
......@@ -324,7 +329,7 @@ void TensorflowTfLiteOpTest::CompareOpOutput() {
const auto& quantization_params =
GetParam().output_tensors[i].quantization_params;
if (quantization_params.scale != 0.0) {
auto tflite_output_values = Dequantize(
auto tflite_output_values = tflite::Dequantize(
tflite_op_.ExtractVector<uint8_t>(tflite_outputs_[i]),
quantization_params.scale, quantization_params.zero_point);
for (int i = 0; i < tf_output_values.size(); i++) {
......
......@@ -25,7 +25,7 @@ limitations under the License.
#include "tensorflow/core/kernels/ops_testutil.h"
#include "tensorflow/lite/kernels/test_util.h"
namespace tflite {
namespace seq_flow_lite {
namespace testing {
// Convenience constructors.
......@@ -144,6 +144,6 @@ class TensorflowTfLiteOpTest
};
} // namespace testing
} // namespace tflite
} // namespace seq_flow_lite
#endif // TENSORFLOW_MODELS_SEQUENCE_PROJECTION_TFLITE_OPS_TF_TFLITE_DIFF_TEST_UTIL_H_
# Copyright 2021 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# Lint as: python3
"""Binary to train PRADO model with TF 2.0."""
import importlib
import json
from absl import app
from absl import flags
from absl import logging
import tensorflow as tf
import input_fn_reader # import root module
FLAGS = flags.FLAGS
flags.DEFINE_string("config_path", None, "Path to a RunnerConfig.")
flags.DEFINE_enum("runner_mode", "train", ["train", "train_and_eval", "eval"],
"Runner mode.")
flags.DEFINE_string("master", None, "TensorFlow master URL.")
flags.DEFINE_string(
"output_dir", "/tmp/testV2",
"The output directory where the model checkpoints will be written.")
flags.DEFINE_bool("use_tpu", False, "Whether to use TPU or GPU/CPU.")
flags.DEFINE_integer(
"num_tpu_cores", 8,
"Only used if `use_tpu` is True. Total number of TPU cores to use.")
def load_runner_config():
with tf.io.gfile.GFile(FLAGS.config_path, "r") as f:
return json.loads(f.read())
def compute_loss(logits, labels, model_config, mode):
"""Creates a sequence labeling model."""
if mode != tf.estimator.ModeKeys.PREDICT:
if not model_config["multilabel"]:
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
labels=labels, logits=logits)
else:
loss = tf.nn.sigmoid_cross_entropy_with_logits(
labels=labels, logits=logits)
loss = tf.reduce_mean(loss)
else:
loss = None
return loss
def model_fn_builder(runner_config, mode):
"""Returns `model_fn` closure for TPUEstimator."""
rel_module_path = "" # empty base dir
model = importlib.import_module(rel_module_path + runner_config["name"])
model_config = runner_config["model_config"]
return model.Encoder(model_config, mode)
def main(_):
runner_config = load_runner_config()
if FLAGS.output_dir:
tf.io.gfile.makedirs(FLAGS.output_dir)
train_model = model_fn_builder(runner_config, tf.estimator.ModeKeys.TRAIN)
optimizer = tf.keras.optimizers.Adam()
train_input_fn = input_fn_reader.create_input_fn(
runner_config=runner_config,
mode=tf.estimator.ModeKeys.TRAIN,
drop_remainder=True)
params = {"batch_size": runner_config["batch_size"]}
train_ds = train_input_fn(params)
train_loss = tf.keras.metrics.Mean(name="train_loss")
@tf.function
def train_step(features):
with tf.GradientTape() as tape:
logits = train_model(features["projection"], features["seq_length"])
loss = compute_loss(logits, features["label"],
runner_config["model_config"],
tf.estimator.ModeKeys.TRAIN)
gradients = tape.gradient(loss, train_model.trainable_variables)
optimizer.apply_gradients(zip(gradients, train_model.trainable_variables))
train_loss(loss)
for epoch in range(1):
train_loss.reset_states()
for features in train_ds:
train_step(features)
step = optimizer.iterations.numpy()
if step % 100 == 0:
logging.info("Running step %s in epoch %s", step, epoch)
logging.info("Training loss: %s, epoch: %s, step: %s",
round(train_loss.result().numpy(), 4), epoch, step)
if __name__ == "__main__":
app.run(main)
......@@ -29,13 +29,49 @@ def _dump_graph_in_text_format(filename, graph_def):
class InterpreterWithCustomOps(tf.lite.Interpreter):
"""Extended tf.lite.Interpreter."""
def __init__(self, model_content, custom_op_registerers):
self._custom_op_registerers = custom_op_registerers
def __init__(self, model_content, custom_op_registerers=None):
self._custom_op_registerers = custom_op_registerers or []
super(InterpreterWithCustomOps, self).__init__(model_content=model_content)
def op_details(self):
op_details = {}
try:
op_details = self._get_ops_details() # Accessing experimental method.
except AttributeError:
print('Unable to access op details')
return op_details
def set_output_quantized_for_custom_ops(graph_def):
def op_histogram(self):
op_hist = {}
op_list = self.op_details()
for op in op_list:
if op['op_name'] in op_hist:
op_hist[op['op_name']] += 1
else:
op_hist[op['op_name']] = 1
return op_hist
def check_op_histogram(self, expected):
passed = True
for k, v in self.op_histogram().items():
if k not in expected:
print('Unexpected key {} found {} times.'.format(k, v))
passed = False
continue
elif expected[k] != v:
print('Expected {} counts of key {} found {}.'.format(
expected[k], k, v))
passed = False
del expected[k]
for k, v in expected.items():
print('Missing expected key {} value {}.'.format(k, v))
passed = False
return passed
def set_output_quantized_for_custom_ops(graph_def, use_mlir=True):
"""Set output types/quantized flag for custom/unsupported ops."""
quantized_custom_ops = {
'SequenceStringProjection': [tf.float32.as_datatype_enum],
......@@ -44,6 +80,8 @@ def set_output_quantized_for_custom_ops(graph_def):
'ExpectedValueOp': [tf.float32.as_datatype_enum],
'LayerNorm': [tf.float32.as_datatype_enum],
'UniformCausalAttn': [tf.float32.as_datatype_enum],
'RnnDecoderReadState': [tf.float32.as_datatype_enum],
'RnnDecoderWriteState': [tf.float32.as_datatype_enum],
}
custom_op_renames = {
'SequenceStringProjection': 'SEQUENCE_STRING_PROJECTION',
......@@ -52,30 +90,27 @@ def set_output_quantized_for_custom_ops(graph_def):
for node in graph_def.node:
if node.op in quantized_custom_ops:
node.attr['_output_quantized'].b = True
node.attr['_output_types'].list.type[:] = quantized_custom_ops[node.op]
if node.op in custom_op_renames:
if use_mlir:
node.attr['_tfl_quant_trait'].s = str.encode('fully_quantizable')
else:
node.attr['_output_quantized'].b = True
node.attr['_output_types'].list.type[:] = quantized_custom_ops[node.op]
if not use_mlir and node.op in custom_op_renames:
node.op = custom_op_renames[node.op]
def generate_tflite(session, graph, input_tensors, output_tensors):
def generate_tflite(session,
graph,
input_tensors,
output_tensors,
use_mlir=True):
"""Generate TFLite model from a session, graph and input/output tensors."""
output_nodes = [tensor.name.split(':')[0] for tensor in output_tensors]
graph_def = tf.graph_util.convert_variables_to_constants(
session, graph.as_graph_def(), output_nodes)
set_output_quantized_for_custom_ops(graph_def)
# TODO(b/171063452): Bug needs to be fixed to handle this correctly.
# def _node_name(tensor):
# return tensor.name.split(':')[0]
set_output_quantized_for_custom_ops(graph_def, use_mlir)
# input_arrays_with_shape = [
# (_node_name(tensor), None) for tensor in input_tensors
# ]
# output_arrays = [_node_name(tensor) for tensor in output_tensors]
# converter = tf.lite.TFLiteConverter(graph_def, None, None,
# input_arrays_with_shape, output_arrays)
converter = tf.lite.TFLiteConverter(graph_def, input_tensors, output_tensors)
converter.inference_type = tf.uint8
converter.default_ranges_stats = (127.5, 127.5)
......@@ -83,5 +118,5 @@ def generate_tflite(session, graph, input_tensors, output_tensors):
tensor.op.name: (127.5, 127.5) for tensor in input_tensors
}
converter.allow_custom_ops = True
converter.experimental_new_converter = False
converter.experimental_new_converter = use_mlir
return converter.convert()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册