From fea5af4fb8ff4b3b01ab9c669afa568cf25a6473 Mon Sep 17 00:00:00 2001 From: liutuo Date: Tue, 5 Jun 2018 17:41:56 +0800 Subject: [PATCH] fast style transfer model transform --- mace/kernels/deconv_2d.h | 2 +- mace/kernels/opencl/cl/eltwise.cl | 4 +- mace/kernels/opencl/cl/reduce_mean.cl | 8 ++-- mace/kernels/opencl/reduce_mean_opencl.cc | 2 +- .../tools/converter_tool/base_converter.py | 1 + .../converter_tool/tensorflow_converter.py | 39 +++++++++++++++---- .../tools/converter_tool/transformer.py | 12 +++++- 7 files changed, 50 insertions(+), 18 deletions(-) diff --git a/mace/kernels/deconv_2d.h b/mace/kernels/deconv_2d.h index e3080e6f..e50e7aff 100644 --- a/mace/kernels/deconv_2d.h +++ b/mace/kernels/deconv_2d.h @@ -146,7 +146,7 @@ struct Deconv2dFunctorBase { static void CalcDeconvPaddingAndInputSize( const index_t *input_shape, // NHWC - const index_t *filter_shape, // OIHW + const index_t *filter_shape, // HWOI const int *strides, Padding padding, const index_t *output_shape, diff --git a/mace/kernels/opencl/cl/eltwise.cl b/mace/kernels/opencl/cl/eltwise.cl index e3cd7ecf..52ee65eb 100644 --- a/mace/kernels/opencl/cl/eltwise.cl +++ b/mace/kernels/opencl/cl/eltwise.cl @@ -74,9 +74,9 @@ __kernel void eltwise(KERNEL_ERROR_PARAMS out = diff * diff; #elif ELTWISE_TYPE == 9 #ifdef SWAPPED - out = pow(in0, in1); - #else out = pow(in1, in0); + #else + out = pow(in0, in1); #endif #endif diff --git a/mace/kernels/opencl/cl/reduce_mean.cl b/mace/kernels/opencl/cl/reduce_mean.cl index ceaac871..ee693321 100644 --- a/mace/kernels/opencl/cl/reduce_mean.cl +++ b/mace/kernels/opencl/cl/reduce_mean.cl @@ -3,7 +3,7 @@ __kernel void reduce_mean(KERNEL_ERROR_PARAMS GLOBAL_WORK_GROUP_SIZE_DIM3 __read_only image2d_t input, - __local float4* group_sum, + __local DATA_TYPE4 *group_sum, __private const int group_size, __private const int partial_len, __private const int remain_index, @@ -19,12 +19,10 @@ __kernel void reduce_mean(KERNEL_ERROR_PARAMS const int k = get_global_id(2); #ifndef NON_UNIFORM_WORK_GROUP - if (i >= local_size_dim0 || j >= local_size_dim1 || k >= global_size_dim2) + if (k >= global_size_dim2) return; - const int dim0_size = local_size_dim0; -#else - const int dim0_size = get_local_size(0); #endif + const int dim0_size = get_local_size(0); DATA_TYPE4 tmp = (DATA_TYPE4){0, 0, 0, 0}; const int index = j * dim0_size + i; const int b = k / channel_blocks; diff --git a/mace/kernels/opencl/reduce_mean_opencl.cc b/mace/kernels/opencl/reduce_mean_opencl.cc index a8737c7f..82b6f913 100644 --- a/mace/kernels/opencl/reduce_mean_opencl.cc +++ b/mace/kernels/opencl/reduce_mean_opencl.cc @@ -98,7 +98,7 @@ MaceStatus ReduceMeanFunctor::operator()( kernel_.setArg(idx++, gws[2]); } kernel_.setArg(idx++, *(input->opencl_image())); - kernel_.setArg(idx++, (group_size * 4 * sizeof(float)), + kernel_.setArg(idx++, (group_size * 4 * sizeof(T)), nullptr); kernel_.setArg(idx++, static_cast(group_size)); kernel_.setArg(idx++, static_cast(partial_len)); diff --git a/mace/python/tools/converter_tool/base_converter.py b/mace/python/tools/converter_tool/base_converter.py index ac48726c..b5ef56b9 100644 --- a/mace/python/tools/converter_tool/base_converter.py +++ b/mace/python/tools/converter_tool/base_converter.py @@ -149,6 +149,7 @@ class MaceKeyword(object): mace_device = 'device' mace_value_str = 'value' mace_wino_block_size = 'wino_block_size' + mace_output_shape_str = 'output_shape' mace_begin_mask_str = 'begin_mask' mace_end_mask_str = 'end_mask' mace_ellipsis_mask_str = 'ellipsis_mask' diff --git a/mace/python/tools/converter_tool/tensorflow_converter.py b/mace/python/tools/converter_tool/tensorflow_converter.py index 65211cfd..63d046bd 100644 --- a/mace/python/tools/converter_tool/tensorflow_converter.py +++ b/mace/python/tools/converter_tool/tensorflow_converter.py @@ -57,6 +57,7 @@ TFSupportedOps = [ 'Max', 'Neg', 'Abs', + 'Pow', 'RealDiv', 'Square', 'SquaredDifference', @@ -119,6 +120,7 @@ class TensorflowConverter(base_converter.ConverterInterface): TFOpType.Max.name: EltwiseType.MAX, TFOpType.Neg.name: EltwiseType.NEG, TFOpType.Abs.name: EltwiseType.ABS, + TFOpType.Pow.name: EltwiseType.POW, TFOpType.RealDiv.name: EltwiseType.DIV, TFOpType.SquaredDifference.name: EltwiseType.SQR_DIFF, TFOpType.Square.name: EltwiseType.POW, @@ -145,6 +147,7 @@ class TensorflowConverter(base_converter.ConverterInterface): TFOpType.Max.name: self.convert_elementwise, TFOpType.Neg.name: self.convert_elementwise, TFOpType.Abs.name: self.convert_elementwise, + TFOpType.Pow.name: self.convert_elementwise, TFOpType.RealDiv.name: self.convert_elementwise, TFOpType.SquaredDifference.name: self.convert_elementwise, TFOpType.Square.name: self.convert_elementwise, @@ -327,8 +330,17 @@ class TensorflowConverter(base_converter.ConverterInterface): dilation_val = tf_op.get_attr(tf_dilations_str)[1:3] except ValueError: dilation_val = [1, 1] - dilation_arg.ints.extend(dilation_val) + else: + del op.input[1:] + output_shape_arg = op.arg.add() + output_shape_arg.name = MaceKeyword.mace_output_shape_str + output_shape_value = tf_op.inputs[0].eval().astype(np.int32).flat + output_shape_arg.ints.extend(output_shape_value) + self._skip_tensor.add(tf_op.inputs[0].name) + del op.input[0] + if len(tf_op.inputs) >= 3: + op.input.extend([tf_op.inputs[2].name, tf_op.inputs[1].name]) def convert_elementwise(self, tf_op): op = self.convert_general_op(tf_op) @@ -348,7 +360,6 @@ class TensorflowConverter(base_converter.ConverterInterface): value_arg.f = -0.5 if type_arg.i != EltwiseType.NEG.value \ - and type_arg.i != EltwiseType.POW.value \ and type_arg.i != EltwiseType.ABS.value: if len(tf_op.inputs[0].shape) == 0: value_arg = op.arg.add() @@ -578,18 +589,30 @@ class TensorflowConverter(base_converter.ConverterInterface): op = self.convert_general_op(tf_op) del op.input[1:] - reduce_dims = tf_op.inputs[1].eval() op.type = MaceOp.ReduceMean.name axis_arg = op.arg.add() axis_arg.name = MaceKeyword.mace_axis_str + if len(tf_op.inputs) > 1: + reduce_dims = tf_op.inputs[1].eval() + else: + try: + reduce_dims = tf_op.get_attr('axis') + except ValueError: + try: + reduce_dims = tf_op.get_attr('reduction_indices') + except ValueError: + reduce_dims = [] axis_arg.ints.extend(reduce_dims) + keep_dims_arg = op.arg.add() + keep_dims_arg.name = MaceKeyword.mace_keepdims_str try: - keep_dims = tf_op.get_attr(MaceKeyword.mace_keepdims_str) - keep_dims_arg = op.arg.add() - keep_dims_arg.name = MaceKeyword.mace_keepdims_str - keep_dims_arg.i = keep_dims + keep_dims = tf_op.get_attr('keepdims') except ValueError: - pass + try: + keep_dims = tf_op.get_attr('keep_dims') + except ValueError: + keep_dims = 0 + keep_dims_arg.i = keep_dims self._skip_tensor.add(tf_op.inputs[1].name) diff --git a/mace/python/tools/converter_tool/transformer.py b/mace/python/tools/converter_tool/transformer.py index 5345129e..4a9e3fbe 100644 --- a/mace/python/tools/converter_tool/transformer.py +++ b/mace/python/tools/converter_tool/transformer.py @@ -919,7 +919,10 @@ class Transformer(base_converter.ConverterInterface): filter = self._consts[op.input[1]] filter_data = np.array(filter.float_data).reshape( filter.dims) - filter_data = filter_data.transpose(3, 2, 0, 1) + if op.type == MaceOp.Deconv2D.name: + filter_data = filter_data.transpose(2, 3, 0, 1) + else: + filter_data = filter_data.transpose(3, 2, 0, 1) filter.float_data[:] = filter_data.flat filter.dims[:] = filter_data.shape if op.type == MaceOp.FullyConnected.name: @@ -993,6 +996,13 @@ class Transformer(base_converter.ConverterInterface): self.buffer_to_image(op, 2, OpenCLBufferType.ARGUMENT) elif op.type == MaceOp.BiasAdd.name: self.buffer_to_image(op, 1, OpenCLBufferType.ARGUMENT) + elif op.type == MaceOp.Eltwise.name and len(op.input) == 2: + if op.input[0] in self._consts \ + and len(self._consts[op.input[0]].dims) == 1: + self.buffer_to_image(op, 0, OpenCLBufferType.ARGUMENT) + if op.input[1] in self._consts \ + and len(self._consts[op.input[1]].dims) == 1: + self.buffer_to_image(op, 1, OpenCLBufferType.ARGUMENT) elif op.type == MaceOp.FoldedBatchNorm.name: self.buffer_to_image(op, 1, OpenCLBufferType.ARGUMENT) self.buffer_to_image(op, 2, OpenCLBufferType.ARGUMENT) -- GitLab