diff --git a/mace/kernels/reduce_mean.h b/mace/kernels/reduce_mean.h index 2b250e365e3e5dc5cdcd07ad95c03b264adbb9d6..81fc3d9c786b42b92b6a2b4392ad40d62245f22c 100644 --- a/mace/kernels/reduce_mean.h +++ b/mace/kernels/reduce_mean.h @@ -13,8 +13,10 @@ #include #include "mace/core/future.h" -#include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" +#ifdef MACE_ENABLE_OPENCL +#include "mace/core/runtime/opencl/cl2_header.h" +#endif namespace mace { namespace kernels { @@ -37,8 +39,7 @@ struct ReduceMeanFunctor : ReduceFunctorBase{ const bool keep_dims) : ReduceFunctorBase(axis, keep_dims) {} - void Simplify(const Tensor *input, - const bool keep_dims) { + void Simplify(const Tensor *input) { std::vector bitmap(static_cast(input->dim_size()), false); if (axis_.size() == 0) { for (int i = 0; i < input->dim_size(); ++i) { @@ -56,7 +57,7 @@ struct ReduceMeanFunctor : ReduceFunctorBase{ for (unsigned int i = 0; i < input->dim_size(); ++i) { if (!bitmap[i]) { out_shape_.push_back(input->dim(i)); - } else if (keep_dims) { + } else if (keep_dims_) { out_shape_.push_back(1); } } @@ -198,7 +199,7 @@ struct ReduceMeanFunctor : ReduceFunctorBase{ Tensor *output, StatsFuture *future) { MACE_UNUSED(future); - Simplify(input, true); + Simplify(input); output->Resize(out_shape_); Compute(input, output); return MACE_SUCCESS; diff --git a/mace/ops/reduce_mean.h b/mace/ops/reduce_mean.h index 4a317259730ef437978e630f4182d2c50c3ad0bc..d099f04ae66c40bca5a1ea372fa114e4212f0fa4 100644 --- a/mace/ops/reduce_mean.h +++ b/mace/ops/reduce_mean.h @@ -20,7 +20,7 @@ class ReduceMeanOp : public Operator { ReduceMeanOp(const OperatorDef &operator_def, Workspace *ws) : Operator(operator_def, ws), functor_(OperatorBase::GetRepeatedArgs("axis"), - OperatorBase::GetOptionalArg("keepdims", true)) {} + OperatorBase::GetOptionalArg("keepdims", false)) {} MaceStatus Run(StatsFuture *future) override { const Tensor *input = this->Input(INPUT); diff --git a/mace/ops/reduce_mean_test.cc b/mace/ops/reduce_mean_test.cc index 025f9c04143912490c4f66df768bb5a13bf7b08b..06686f4b61a3e38059c3fb56c1ed00355b569a3f 100644 --- a/mace/ops/reduce_mean_test.cc +++ b/mace/ops/reduce_mean_test.cc @@ -17,7 +17,8 @@ void Simple(const std::vector &input_shape, const std::vector &input, const std::vector &axis, const std::vector &output_shape, - const std::vector &output) { + const std::vector &output, + const bool keepdims = true) { // Construct graph OpsTestNet net; // Add input data @@ -27,6 +28,7 @@ void Simple(const std::vector &input_shape, OpDefBuilder("ReduceMean", "ReduceMeanTest") .Input("Input") .AddIntsArg("axis", axis) + .AddIntArg("keepdims", keepdims ? 1 : 0) .Output("Output") .Finalize(net.NewOperatorDef()); // Run @@ -37,6 +39,7 @@ void Simple(const std::vector &input_shape, OpDefBuilder("ReduceMean", "ReduceMeanTest") .Input("InputImg") .AddIntsArg("axis", axis) + .AddIntArg("keepdims", keepdims ? 1 : 0) .Output("OutputImg") .Finalize(net.NewOperatorDef()); // Run @@ -302,6 +305,18 @@ TEST_F(ReduceMeanOpTest, CPUSimple3Axis) { Simple3Axis(); } +TEST_F(ReduceMeanOpTest, CPUSimpleReduceDims) { + Simple({2, 2, 3, 4}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}, + {1, 2}, + {2, 4}, + {10, 11, 12, 13, + 10, 11, 12, 13}, + false); +} namespace { template @@ -329,6 +344,7 @@ void RandomTest(const std::vector &input_shape, OpDefBuilder("ReduceMean", "ReduceMeanTest") .Input("InputNCHW") .AddIntsArg("axis", axis_cpu) + .AddIntArg("keepdims", 1) .Output("OutputNCHW") .Finalize(net.NewOperatorDef()); // Run @@ -340,6 +356,7 @@ void RandomTest(const std::vector &input_shape, OpDefBuilder("ReduceMean", "ReduceMeanTest") .Input("InputImg") .AddIntsArg("axis", axis) + .AddIntArg("keepdims", 1) .Output("OutputImg") .Finalize(net.NewOperatorDef()); // Run diff --git a/mace/python/tools/converter_tool/base_converter.py b/mace/python/tools/converter_tool/base_converter.py index 78226c52d3173940e47c67617389539a599788ae..ac48726c633718b0de7110d44d951b8795e4ed7e 100644 --- a/mace/python/tools/converter_tool/base_converter.py +++ b/mace/python/tools/converter_tool/base_converter.py @@ -149,6 +149,13 @@ class MaceKeyword(object): mace_device = 'device' mace_value_str = 'value' mace_wino_block_size = 'wino_block_size' + mace_begin_mask_str = 'begin_mask' + mace_end_mask_str = 'end_mask' + mace_ellipsis_mask_str = 'ellipsis_mask' + mace_new_axis_mask_str = 'new_axis_mask' + mace_shrink_axis_mask_str = 'shrink_axis_mask' + mace_transpose_a_str = 'transpose_a' + mace_transpose_b_str = 'transpose_b' class TransformerRule(Enum): diff --git a/mace/python/tools/converter_tool/tensorflow_converter.py b/mace/python/tools/converter_tool/tensorflow_converter.py index 9076658d37c628915f8bdb062ea1675481bd07ec..65211cfdfda8f1f284c01a1b30019d54ccc4d11c 100644 --- a/mace/python/tools/converter_tool/tensorflow_converter.py +++ b/mace/python/tools/converter_tool/tensorflow_converter.py @@ -58,8 +58,9 @@ TFSupportedOps = [ 'Neg', 'Abs', 'RealDiv', + 'Square', 'SquaredDifference', - 'Pow', + 'Rsqrt', 'Relu', 'Relu6', 'Tanh', @@ -69,6 +70,7 @@ TFSupportedOps = [ 'MaxPool', 'Squeeze', 'MatMul', + 'BatchMatMul', 'Identity', 'Reshape', 'Shape', @@ -84,6 +86,11 @@ TFSupportedOps = [ 'ConcatV2', 'Mean', 'Const', + 'Gather', + 'StridedSlice', + 'Slice', + 'Stack', + 'Pack', ] TFOpType = Enum('TFOpType', [(op, op) for op in TFSupportedOps], type=str) @@ -114,7 +121,8 @@ class TensorflowConverter(base_converter.ConverterInterface): TFOpType.Abs.name: EltwiseType.ABS, TFOpType.RealDiv.name: EltwiseType.DIV, TFOpType.SquaredDifference.name: EltwiseType.SQR_DIFF, - TFOpType.Pow.name: EltwiseType.POW + TFOpType.Square.name: EltwiseType.POW, + TFOpType.Rsqrt.name: EltwiseType.POW } activation_type = { TFOpType.Relu.name: ActivationType.RELU, @@ -139,7 +147,8 @@ class TensorflowConverter(base_converter.ConverterInterface): TFOpType.Abs.name: self.convert_elementwise, TFOpType.RealDiv.name: self.convert_elementwise, TFOpType.SquaredDifference.name: self.convert_elementwise, - TFOpType.Pow.name: self.convert_elementwise, + TFOpType.Square.name: self.convert_elementwise, + TFOpType.Rsqrt.name: self.convert_elementwise, TFOpType.Relu.name: self.convert_activation, TFOpType.Relu6.name: self.convert_activation, TFOpType.Tanh.name: self.convert_activation, @@ -148,6 +157,7 @@ class TensorflowConverter(base_converter.ConverterInterface): TFOpType.AvgPool.name: self.convert_pooling, TFOpType.MaxPool.name: self.convert_pooling, TFOpType.MatMul.name: self.convert_matmul, + TFOpType.BatchMatMul.name: self.convert_matmul, TFOpType.Identity.name: self.convert_identity, TFOpType.Reshape.name: self.convert_reshape, TFOpType.Shape.name: self.convert_shape, @@ -164,6 +174,11 @@ class TensorflowConverter(base_converter.ConverterInterface): TFOpType.ConcatV2.name: self.convert_concat, TFOpType.Mean.name: self.convert_mean, TFOpType.Const.name: self.convert_nop, + TFOpType.Gather.name: self.convert_gather, + TFOpType.StridedSlice.name: self.convert_stridedslice, + TFOpType.Slice.name: self.convert_slice, + TFOpType.Pack.name: self.convert_stack, + TFOpType.Stack.name: self.convert_stack } self._option = option self._mace_net_def = mace_pb2.NetDef() @@ -323,18 +338,30 @@ class TensorflowConverter(base_converter.ConverterInterface): type_arg.name = MaceKeyword.mace_element_type_str type_arg.i = self.eltwise_type[tf_op.type].value - if len(tf_op.inputs[0].shape) == 0: + if tf_op.type == TFOpType.Square: value_arg = op.arg.add() value_arg.name = MaceKeyword.mace_value_str - value_arg.f = tf_op.inputs[0].eval().astype(np.float32) - self._skip_tensor.add(tf_op.inputs[0].name) - del op.input[0] - elif len(tf_op.inputs[1].shape) == 0: + value_arg.f = 2.0 + elif tf_op.type == TFOpType.Rsqrt: value_arg = op.arg.add() value_arg.name = MaceKeyword.mace_value_str - value_arg.f = tf_op.inputs[1].eval().astype(np.float32) - self._skip_tensor.add(tf_op.inputs[1].name) - del op.input[1] + value_arg.f = -0.5 + + if type_arg.i != EltwiseType.NEG.value \ + and type_arg.i != EltwiseType.POW.value \ + and type_arg.i != EltwiseType.ABS.value: + if len(tf_op.inputs[0].shape) == 0: + value_arg = op.arg.add() + value_arg.name = MaceKeyword.mace_value_str + value_arg.f = tf_op.inputs[0].eval().astype(np.float32) + self._skip_tensor.add(tf_op.inputs[0].name) + del op.input[0] + elif len(tf_op.inputs[1].shape) == 0: + value_arg = op.arg.add() + value_arg.name = MaceKeyword.mace_value_str + value_arg.f = tf_op.inputs[1].eval().astype(np.float32) + self._skip_tensor.add(tf_op.inputs[1].name) + del op.input[1] def convert_biasadd(self, tf_op): op = self.convert_general_op(tf_op) @@ -485,14 +512,28 @@ class TensorflowConverter(base_converter.ConverterInterface): axis = 4 + axis if axis < 0 else axis axis_arg.i = axis - mace_check(axis == 3, "only support concat at channel dimension") - self._skip_tensor.add(tf_op.inputs[-1].name) def convert_matmul(self, tf_op): op = self.convert_general_op(tf_op) op.type = MaceOp.MatMul.name + try: + adj_x = tf_op.get_attr('adj_x') + transpose_a_arg = op.arg.add() + transpose_a_arg.name = MaceKeyword.mace_transpose_a_str + transpose_a_arg.i = int(adj_x) + except ValueError: + pass + + try: + adj_y = tf_op.get_attr('adj_y') + transpose_b_arg = op.arg.add() + transpose_b_arg.name = MaceKeyword.mace_transpose_b_str + transpose_b_arg.i = int(adj_y) + except ValueError: + pass + def convert_shape(self, tf_op): op = self.convert_general_op(tf_op) op.type = MaceOp.Shape.name @@ -518,18 +559,20 @@ class TensorflowConverter(base_converter.ConverterInterface): axis_arg.ints.extend(axis_value) def convert_transpose(self, tf_op): + op = self.convert_general_op(tf_op) + op.type = MaceOp.Transpose.name + perm = tf_op.inputs[1].eval().astype(np.int32) ordered_perm = np.sort(perm) - mace_check(np.array_equal(perm, ordered_perm), - "Transpose not supported yet, only internal transpose" - " in composed ops might be supported") - - op = self.convert_general_op(tf_op) - op.type = 'Identity' - del op.input[1:] - - self._skip_tensor.add(tf_op.inputs[1].name) + if np.array_equal(perm, ordered_perm): + op.type = MaceOp.Identity.name + del op.input[1:] + self._skip_tensor.add(tf_op.inputs[1].name) + else: + dims_arg = op.arg.add() + dims_arg.name = MaceKeyword.mace_dims_str + dims_arg.ints.extend(perm) def convert_mean(self, tf_op): op = self.convert_general_op(tf_op) @@ -540,8 +583,63 @@ class TensorflowConverter(base_converter.ConverterInterface): axis_arg = op.arg.add() axis_arg.name = MaceKeyword.mace_axis_str axis_arg.ints.extend(reduce_dims) - keep_dims_arg = op.arg.add() - keep_dims_arg.name = MaceKeyword.mace_keepdims_str - keep_dims_arg.i = tf_op.get_attr(MaceKeyword.mace_keepdims_str) + try: + keep_dims = tf_op.get_attr(MaceKeyword.mace_keepdims_str) + keep_dims_arg = op.arg.add() + keep_dims_arg.name = MaceKeyword.mace_keepdims_str + keep_dims_arg.i = keep_dims + except ValueError: + pass self._skip_tensor.add(tf_op.inputs[1].name) + + def convert_gather(self, tf_op): + op = self.convert_general_op(tf_op) + op.type = MaceOp.Gather.name + + if len(tf_op.inputs) >= 3: + axis_arg = op.arg.add() + axis_arg.name = MaceKeyword.mace_axis_str + axis_arg.i = tf_op.inputs[2].eval() + + def convert_stridedslice(self, tf_op): + op = self.convert_general_op(tf_op) + op.type = MaceOp.StridedSlice.name + + begin_mask_arg = op.arg.add() + begin_mask_arg.name = MaceKeyword.mace_begin_mask_str + begin_mask_arg.i = tf_op.get_attr(MaceKeyword.mace_begin_mask_str) + + end_mask_arg = op.arg.add() + end_mask_arg.name = MaceKeyword.mace_end_mask_str + end_mask_arg.i = tf_op.get_attr(MaceKeyword.mace_end_mask_str) + + ellipsis_mask_arg = op.arg.add() + ellipsis_mask_arg.name = MaceKeyword.mace_ellipsis_mask_str + ellipsis_mask_arg.i = tf_op.get_attr( + MaceKeyword.mace_ellipsis_mask_str) + + new_axis_mask_arg = op.arg.add() + new_axis_mask_arg.name = MaceKeyword.mace_new_axis_mask_str + new_axis_mask_arg.i = tf_op.get_attr( + MaceKeyword.mace_new_axis_mask_str) + + shrink_axis_mask_arg = op.arg.add() + shrink_axis_mask_arg.name = MaceKeyword.mace_shrink_axis_mask_str + shrink_axis_mask_arg.i = tf_op.get_attr( + MaceKeyword.mace_shrink_axis_mask_str) + + def convert_slice(self, tf_op): + op = self.convert_general_op(tf_op) + op.type = MaceOp.StridedSlice.name + + def convert_stack(self, tf_op): + op = self.convert_general_op(tf_op) + op.type = MaceOp.Stack.name + + axis_arg = op.arg.add() + axis_arg.name = MaceKeyword.mace_axis_str + try: + axis_arg.i = tf_op.get_attr(MaceKeyword.mace_axis_str) + except ValueError: + axis_arg.i = 0 diff --git a/mace/python/tools/converter_tool/transformer.py b/mace/python/tools/converter_tool/transformer.py index b176e29dea18d11e54e8414dda7983d57cf0c530..5345129eaf7a3222d37f11dc7937c7373179bd06 100644 --- a/mace/python/tools/converter_tool/transformer.py +++ b/mace/python/tools/converter_tool/transformer.py @@ -1105,12 +1105,13 @@ class Transformer(base_converter.ConverterInterface): for op in net.op: if op.type == MaceOp.MatMul.name: input_shape = self.get_tensor_shape(op.input[0]) - _, h, w, _ = self.sort_feature_map_shape(input_shape, - ConverterUtil.data_format(self._producer[op.input[0]])) # noqa - if h == 1 and w == 1 and op.input[1] in self._consts: - weight = self._consts[op.input[1]] - if len(weight.dims) == 2: - op.type = MaceOp.FullyConnected.name + if len(input_shape) == 4: + _, h, w, _ = self.sort_feature_map_shape(input_shape, + ConverterUtil.data_format(self._producer[op.input[0]])) # noqa + if h == 1 and w == 1 and op.input[1] in self._consts: + weight = self._consts[op.input[1]] + if len(weight.dims) == 2: + op.type = MaceOp.FullyConnected.name return False