diff --git a/mace/ops/depth_to_space.cc b/mace/ops/depth_to_space.cc index ba87830a9038ac2c791787a148b114d0a5c0c8f6..6efa4d24566972164fd39d848d037f8c850e12e2 100644 --- a/mace/ops/depth_to_space.cc +++ b/mace/ops/depth_to_space.cc @@ -25,7 +25,10 @@ namespace mace { namespace ops { template -class DepthToSpaceOp : public Operation { +class DepthToSpaceOp; + +template<> +class DepthToSpaceOp : public Operation { public: explicit DepthToSpaceOp(OpConstructContext *context) : Operation(context), @@ -55,8 +58,8 @@ class DepthToSpaceOp : public Operation { Tensor::MappingGuard logits_guard(input); Tensor::MappingGuard output_guard(output); - const T *input_ptr = input->data(); - T *output_ptr = output->mutable_data(); + const float *input_ptr = input->data(); + float *output_ptr = output->mutable_data(); for (index_t b = 0; b < batch_size; ++b) { for (index_t d = 0; d < output_depth; ++d) { @@ -89,6 +92,73 @@ class DepthToSpaceOp : public Operation { const int block_size_; }; +#ifdef MACE_ENABLE_QUANTIZE +template<> +class DepthToSpaceOp : public Operation { + public: + explicit DepthToSpaceOp(OpConstructContext *context) + : Operation(context), + block_size_(Operation::GetOptionalArg("block_size", 1)) {} + + MaceStatus Run(OpContext *context) override { + MACE_UNUSED(context); + const Tensor *input = this->Input(0); + Tensor *output = this->Output(0); + MACE_CHECK(input->dim_size() == 4, "input dim should be 4"); + const index_t batch_size = input->dim(0); + const index_t input_depth = input->dim(3); + const index_t input_height = input->dim(1); + const index_t input_width = input->dim(2); + + MACE_CHECK(input_depth % (block_size_ * block_size_) == 0, + "input depth should be dividable by block_size * block_size", + input_depth); + + const index_t output_depth = input_depth / (block_size_ * block_size_); + const index_t output_width = input_width * block_size_; + const index_t output_height = input_height * block_size_; + std::vector + output_shape = {batch_size, output_height, output_width, output_depth}; + + MACE_RETURN_IF_ERROR(output->Resize(output_shape)); + + Tensor::MappingGuard logits_guard(input); + Tensor::MappingGuard output_guard(output); + const uint8_t *input_ptr = input->data(); + uint8_t *output_ptr = output->mutable_data(); + + for (index_t b = 0; b < batch_size; ++b) { + for (index_t h = 0; h < output_height; ++h) { + const index_t in_h = h / block_size_; + const index_t offset_h = (h % block_size_); + for (int w = 0; w < output_width; ++w) { + const index_t in_w = w / block_size_; + const index_t offset_w = w % block_size_; + const index_t offset_d = + (offset_h * block_size_ + offset_w) * output_depth; + + for (index_t d = 0; d < output_depth; ++d) { + const index_t in_d = d + offset_d; + const index_t o_index = + ((b * output_height + h) * output_width + w) * output_depth + + d; + const index_t i_index = + ((b * input_height + in_h) * input_width + in_w) * input_depth + + in_d; + output_ptr[o_index] = input_ptr[i_index]; + } + } + } + } + + return MaceStatus::MACE_SUCCESS; + } + + private: + const int block_size_; +}; +#endif // MACE_ENABLE_QUANTIZE + #ifdef MACE_ENABLE_OPENCL template<> class DepthToSpaceOp : public Operation { @@ -118,6 +188,11 @@ void RegisterDepthToSpace(OpRegistryBase *op_registry) { MACE_REGISTER_OP(op_registry, "DepthToSpace", DepthToSpaceOp, DeviceType::CPU, float); +#ifdef MACE_ENABLE_QUANTIZE + MACE_REGISTER_OP(op_registry, "DepthToSpace", + DepthToSpaceOp, DeviceType::CPU, uint8_t); +#endif // MACE_ENABLE_QUANTIZE + MACE_REGISTER_GPU_OP(op_registry, "DepthToSpace", DepthToSpaceOp); } diff --git a/mace/ops/space_to_depth.cc b/mace/ops/space_to_depth.cc index d9b5473629da962985261bc955dc591ef4b3a0f7..59c1a342162d0637f8e2d30b33c9b1835fac61f5 100644 --- a/mace/ops/space_to_depth.cc +++ b/mace/ops/space_to_depth.cc @@ -25,7 +25,10 @@ namespace mace { namespace ops { template -class SpaceToDepthOp : public Operation { +class SpaceToDepthOp; + +template<> +class SpaceToDepthOp : public Operation { public: explicit SpaceToDepthOp(OpConstructContext *context) : Operation(context), @@ -55,8 +58,8 @@ class SpaceToDepthOp : public Operation { Tensor::MappingGuard logits_guard(input); Tensor::MappingGuard output_guard(output); - const T *input_ptr = input->data(); - T *output_ptr = output->mutable_data(); + const float *input_ptr = input->data(); + float *output_ptr = output->mutable_data(); for (index_t b = 0; b < batch_size; ++b) { for (index_t d = 0; d < input_depth; ++d) { @@ -87,6 +90,71 @@ class SpaceToDepthOp : public Operation { const int block_size_; }; +#ifdef MACE_ENABLE_QUANTIZE +template<> +class SpaceToDepthOp : public Operation { + public: + explicit SpaceToDepthOp(OpConstructContext *context) + : Operation(context), + block_size_(Operation::GetOptionalArg("block_size", 1)) {} + + MaceStatus Run(OpContext *context) override { + MACE_UNUSED(context); + const Tensor *input = this->Input(0); + Tensor *output = this->Output(0); + MACE_CHECK(input->dim_size() == 4, "input dim should be 4"); + const index_t batch_size = input->dim(0); + const index_t input_depth = input->dim(3); + const index_t input_height = input->dim(1); + const index_t input_width = input->dim(2); + + MACE_CHECK( + (input_width % block_size_ == 0) && (input_height % block_size_ == 0), + "input width and height should be dividable by block_size"); + + const index_t output_depth = input_depth * block_size_ * block_size_; + const index_t output_width = input_width / block_size_; + const index_t output_height = input_height / block_size_; + std::vector + output_shape = {batch_size, output_height, output_width, output_depth}; + + MACE_RETURN_IF_ERROR(output->Resize(output_shape)); + + Tensor::MappingGuard logits_guard(input); + Tensor::MappingGuard output_guard(output); + const uint8_t *input_ptr = input->data(); + uint8_t *output_ptr = output->mutable_data(); + + for (index_t b = 0; b < batch_size; ++b) { + for (index_t h = 0; h < input_height; ++h) { + const index_t out_h = h / block_size_; + const index_t offset_h = (h % block_size_); + for (index_t w = 0; w < input_width; ++w) { + const index_t out_w = w / block_size_; + const index_t offset_w = (w % block_size_); + const index_t offset_d = + (offset_h * block_size_ + offset_w) * input_depth; + + for (index_t d = 0; d < input_depth; ++d) { + const index_t out_d = d + offset_d; + const index_t o_index = + ((b * output_height + out_h) * output_width + out_w) + * output_depth + out_d; + const index_t i_index = + ((b * input_height + h) * input_width + w) * input_depth + d; + output_ptr[o_index] = input_ptr[i_index]; + } + } + } + } + return MaceStatus::MACE_SUCCESS; + } + + private: + const int block_size_; +}; +#endif // MACE_ENABLE_QUANTIZE + #ifdef MACE_ENABLE_OPENCL template<> class SpaceToDepthOp : public Operation { @@ -116,6 +184,11 @@ void RegisterSpaceToDepth(OpRegistryBase *op_registry) { MACE_REGISTER_OP(op_registry, "SpaceToDepth", SpaceToDepthOp, DeviceType::CPU, float); +#ifdef MACE_ENABLE_QUANTIZE + MACE_REGISTER_OP(op_registry, "SpaceToDepth", + SpaceToDepthOp, DeviceType::CPU, uint8_t); +#endif // MACE_ENABLE_QUANTIZE + MACE_REGISTER_GPU_OP(op_registry, "SpaceToDepth", SpaceToDepthOp); } diff --git a/mace/python/tools/converter_tool/transformer.py b/mace/python/tools/converter_tool/transformer.py index 1f2e986d5c5b04a64bc9b5a5395716ccee5b2e28..e9559861220df330ad55459577b6bbf8ce301e38 100644 --- a/mace/python/tools/converter_tool/transformer.py +++ b/mace/python/tools/converter_tool/transformer.py @@ -1819,7 +1819,9 @@ class Transformer(base_converter.ConverterInterface): MaceOp.Reshape.name, MaceOp.ResizeBilinear.name, MaceOp.BatchToSpaceND.name, - MaceOp.SpaceToBatchND.name]: + MaceOp.SpaceToBatchND.name, + MaceOp.SpaceToDepth.name, + MaceOp.DepthToSpace.name]: del op.quantize_info[:] producer_op = self._producer[op.input[0]] if producer_op.output[0] in self._option.input_nodes: diff --git a/test/ccunit/mace/ops/depth_to_space_test.cc b/test/ccunit/mace/ops/depth_to_space_test.cc index 3bf32efa3c849f3c25a872fe1c989c18c872d037..f093bae6e09a8cb64095bbdaff923dc41966f11d 100644 --- a/test/ccunit/mace/ops/depth_to_space_test.cc +++ b/test/ccunit/mace/ops/depth_to_space_test.cc @@ -262,6 +262,76 @@ TEST_F(DepthToSpaceOpTest, OPENCLRandomBatchHalf) { RandomTest(2, {2, 384, 384, 8}); } +namespace { + +void TestDepthToSpaceQuantize(const int block_size, + const std::vector &shape) { + OpsTestNet net; + net.AddRandomInput("Input", + shape, + false, + false, + true, + -1.f, + 1.f); + + // run cpu + net.TransformDataFormat( + "Input", DataFormat::NHWC, "InputNCHW", DataFormat::NCHW); + + OpDefBuilder("DepthToSpace", "DepthToSpaceTest") + .Input("InputNCHW") + .AddIntArg("block_size", block_size) + .Output("OutputNCHW") + .Finalize(net.NewOperatorDef()); + + net.RunOp(CPU); + net.TransformDataFormat( + "OutputNCHW", DataFormat::NCHW, "OutputCPU", DataFormat::NHWC); + + // run quantize + OpDefBuilder("Quantize", "QuantizeInput") + .Input("Input") + .Output("QuantizedInput") + .OutputType({DT_UINT8}) + .AddIntArg("T", DT_UINT8) + .Finalize(net.NewOperatorDef()); + net.RunOp(); + + OpDefBuilder("DepthToSpace", "DepthToSpaceTest") + .Input("QuantizedInput") + .Output("QuantizedOutput") + .AddIntArg("block_size", block_size) + .OutputType({DT_UINT8}) + .AddIntArg("T", DT_UINT8) + .Finalize(net.NewOperatorDef()); + net.RunOp(); + + Tensor *eq_output = net.GetTensor("QuantizedInput"); + Tensor *q_output = net.GetTensor("QuantizedOutput"); + q_output->SetScale(eq_output->scale()); + q_output->SetZeroPoint(eq_output->zero_point()); + OpDefBuilder("Dequantize", "DeQuantizeTest") + .Input("QuantizedOutput") + .Output("DequantizedOutput") + .OutputType({DT_FLOAT}) + .AddIntArg("T", DT_UINT8) + .Finalize(net.NewOperatorDef()); + net.RunOp(); + + // Check + ExpectTensorSimilar(*net.GetOutput("OutputCPU"), + *net.GetTensor("DequantizedOutput"), 0.01); +} + +} // namespace + +TEST_F(DepthToSpaceOpTest, Quantize) { + TestDepthToSpaceQuantize(2, {1, 192, 192, 4}); + TestDepthToSpaceQuantize(3, {1, 111, 111, 9}); + TestDepthToSpaceQuantize(5, {1, 20, 20, 25}); + TestDepthToSpaceQuantize(7, {1, 14, 14, 49}); +} } // namespace test } // namespace ops diff --git a/test/ccunit/mace/ops/space_to_depth_test.cc b/test/ccunit/mace/ops/space_to_depth_test.cc index 226083b71344ffdbe22266b30e53f333cfc2d8fc..7a7091cad5a5918524f50cd0dc8c38b3446e33fa 100644 --- a/test/ccunit/mace/ops/space_to_depth_test.cc +++ b/test/ccunit/mace/ops/space_to_depth_test.cc @@ -253,6 +253,77 @@ TEST_F(SpaceToDepthOpTest, OPENCLBatchRandomHalf) { RandomTest(2, {2, 384, 384, 32}); } +namespace { + +void TestSpaceToDepthQuantize(int block_size, + const std::vector &shape) { + OpsTestNet net; + net.AddRandomInput("Input", + shape, + false, + false, + true, + -1.f, + 1.f); + + // run cpu + net.TransformDataFormat( + "Input", DataFormat::NHWC, "InputNCHW", DataFormat::NCHW); + + OpDefBuilder("SpaceToDepth", "SpaceToDepthTest") + .Input("InputNCHW") + .AddIntArg("block_size", block_size) + .Output("OutputNCHW") + .Finalize(net.NewOperatorDef()); + + net.RunOp(CPU); + net.TransformDataFormat( + "OutputNCHW", DataFormat::NCHW, "OutputCPU", DataFormat::NHWC); + + // run quantize + OpDefBuilder("Quantize", "QuantizeInput") + .Input("Input") + .Output("QuantizedInput") + .OutputType({DT_UINT8}) + .AddIntArg("T", DT_UINT8) + .Finalize(net.NewOperatorDef()); + net.RunOp(); + + OpDefBuilder("SpaceToDepth", "SpaceToDepthTest") + .Input("QuantizedInput") + .Output("QuantizedOutput") + .AddIntArg("block_size", block_size) + .OutputType({DT_UINT8}) + .AddIntArg("T", DT_UINT8) + .Finalize(net.NewOperatorDef()); + net.RunOp(); + + Tensor *eq_output = net.GetTensor("QuantizedInput"); + Tensor *q_output = net.GetTensor("QuantizedOutput"); + q_output->SetScale(eq_output->scale()); + q_output->SetZeroPoint(eq_output->zero_point()); + OpDefBuilder("Dequantize", "DeQuantizeTest") + .Input("QuantizedOutput") + .Output("DequantizedOutput") + .OutputType({DT_FLOAT}) + .AddIntArg("T", DT_UINT8) + .Finalize(net.NewOperatorDef()); + net.RunOp(); + + // Check + ExpectTensorSimilar(*net.GetOutput("OutputCPU"), + *net.GetTensor("DequantizedOutput"), 0.01); +} + +TEST_F(SpaceToDepthOpTest, Quantize) { + TestSpaceToDepthQuantize(2, {1, 384, 384, 1}); + TestSpaceToDepthQuantize(3, {1, 333, 333, 1}); + TestSpaceToDepthQuantize(5, {1, 100, 100, 1}); + TestSpaceToDepthQuantize(7, {1, 98, 98, 1}); +} + +} // namespace + } // namespace test } // namespace ops } // namespace mace