提交 86b23aa0 编写于 作者: 刘托

Merge branch 's2d' into 'master'

Add quantized space to depth and depth to space

See merge request !1121
......@@ -25,7 +25,10 @@ namespace mace {
namespace ops {
template<DeviceType D, class T>
class DepthToSpaceOp : public Operation {
class DepthToSpaceOp;
template<>
class DepthToSpaceOp<CPU, float> : public Operation {
public:
explicit DepthToSpaceOp(OpConstructContext *context)
: Operation(context),
......@@ -55,8 +58,8 @@ class DepthToSpaceOp : public Operation {
Tensor::MappingGuard logits_guard(input);
Tensor::MappingGuard output_guard(output);
const T *input_ptr = input->data<T>();
T *output_ptr = output->mutable_data<T>();
const float *input_ptr = input->data<float>();
float *output_ptr = output->mutable_data<float>();
for (index_t b = 0; b < batch_size; ++b) {
for (index_t d = 0; d < output_depth; ++d) {
......@@ -89,6 +92,73 @@ class DepthToSpaceOp : public Operation {
const int block_size_;
};
#ifdef MACE_ENABLE_QUANTIZE
template<>
class DepthToSpaceOp<CPU, uint8_t> : public Operation {
public:
explicit DepthToSpaceOp(OpConstructContext *context)
: Operation(context),
block_size_(Operation::GetOptionalArg<int>("block_size", 1)) {}
MaceStatus Run(OpContext *context) override {
MACE_UNUSED(context);
const Tensor *input = this->Input(0);
Tensor *output = this->Output(0);
MACE_CHECK(input->dim_size() == 4, "input dim should be 4");
const index_t batch_size = input->dim(0);
const index_t input_depth = input->dim(3);
const index_t input_height = input->dim(1);
const index_t input_width = input->dim(2);
MACE_CHECK(input_depth % (block_size_ * block_size_) == 0,
"input depth should be dividable by block_size * block_size",
input_depth);
const index_t output_depth = input_depth / (block_size_ * block_size_);
const index_t output_width = input_width * block_size_;
const index_t output_height = input_height * block_size_;
std::vector<index_t>
output_shape = {batch_size, output_height, output_width, output_depth};
MACE_RETURN_IF_ERROR(output->Resize(output_shape));
Tensor::MappingGuard logits_guard(input);
Tensor::MappingGuard output_guard(output);
const uint8_t *input_ptr = input->data<uint8_t>();
uint8_t *output_ptr = output->mutable_data<uint8_t>();
for (index_t b = 0; b < batch_size; ++b) {
for (index_t h = 0; h < output_height; ++h) {
const index_t in_h = h / block_size_;
const index_t offset_h = (h % block_size_);
for (int w = 0; w < output_width; ++w) {
const index_t in_w = w / block_size_;
const index_t offset_w = w % block_size_;
const index_t offset_d =
(offset_h * block_size_ + offset_w) * output_depth;
for (index_t d = 0; d < output_depth; ++d) {
const index_t in_d = d + offset_d;
const index_t o_index =
((b * output_height + h) * output_width + w) * output_depth
+ d;
const index_t i_index =
((b * input_height + in_h) * input_width + in_w) * input_depth
+ in_d;
output_ptr[o_index] = input_ptr[i_index];
}
}
}
}
return MaceStatus::MACE_SUCCESS;
}
private:
const int block_size_;
};
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL
template<>
class DepthToSpaceOp<DeviceType::GPU, float> : public Operation {
......@@ -118,6 +188,11 @@ void RegisterDepthToSpace(OpRegistryBase *op_registry) {
MACE_REGISTER_OP(op_registry, "DepthToSpace",
DepthToSpaceOp, DeviceType::CPU, float);
#ifdef MACE_ENABLE_QUANTIZE
MACE_REGISTER_OP(op_registry, "DepthToSpace",
DepthToSpaceOp, DeviceType::CPU, uint8_t);
#endif // MACE_ENABLE_QUANTIZE
MACE_REGISTER_GPU_OP(op_registry, "DepthToSpace", DepthToSpaceOp);
}
......
......@@ -25,7 +25,10 @@ namespace mace {
namespace ops {
template<DeviceType D, class T>
class SpaceToDepthOp : public Operation {
class SpaceToDepthOp;
template<>
class SpaceToDepthOp<CPU, float> : public Operation {
public:
explicit SpaceToDepthOp(OpConstructContext *context)
: Operation(context),
......@@ -55,8 +58,8 @@ class SpaceToDepthOp : public Operation {
Tensor::MappingGuard logits_guard(input);
Tensor::MappingGuard output_guard(output);
const T *input_ptr = input->data<T>();
T *output_ptr = output->mutable_data<T>();
const float *input_ptr = input->data<float>();
float *output_ptr = output->mutable_data<float>();
for (index_t b = 0; b < batch_size; ++b) {
for (index_t d = 0; d < input_depth; ++d) {
......@@ -87,6 +90,71 @@ class SpaceToDepthOp : public Operation {
const int block_size_;
};
#ifdef MACE_ENABLE_QUANTIZE
template<>
class SpaceToDepthOp<CPU, uint8_t> : public Operation {
public:
explicit SpaceToDepthOp(OpConstructContext *context)
: Operation(context),
block_size_(Operation::GetOptionalArg<int>("block_size", 1)) {}
MaceStatus Run(OpContext *context) override {
MACE_UNUSED(context);
const Tensor *input = this->Input(0);
Tensor *output = this->Output(0);
MACE_CHECK(input->dim_size() == 4, "input dim should be 4");
const index_t batch_size = input->dim(0);
const index_t input_depth = input->dim(3);
const index_t input_height = input->dim(1);
const index_t input_width = input->dim(2);
MACE_CHECK(
(input_width % block_size_ == 0) && (input_height % block_size_ == 0),
"input width and height should be dividable by block_size");
const index_t output_depth = input_depth * block_size_ * block_size_;
const index_t output_width = input_width / block_size_;
const index_t output_height = input_height / block_size_;
std::vector<index_t>
output_shape = {batch_size, output_height, output_width, output_depth};
MACE_RETURN_IF_ERROR(output->Resize(output_shape));
Tensor::MappingGuard logits_guard(input);
Tensor::MappingGuard output_guard(output);
const uint8_t *input_ptr = input->data<uint8_t>();
uint8_t *output_ptr = output->mutable_data<uint8_t>();
for (index_t b = 0; b < batch_size; ++b) {
for (index_t h = 0; h < input_height; ++h) {
const index_t out_h = h / block_size_;
const index_t offset_h = (h % block_size_);
for (index_t w = 0; w < input_width; ++w) {
const index_t out_w = w / block_size_;
const index_t offset_w = (w % block_size_);
const index_t offset_d =
(offset_h * block_size_ + offset_w) * input_depth;
for (index_t d = 0; d < input_depth; ++d) {
const index_t out_d = d + offset_d;
const index_t o_index =
((b * output_height + out_h) * output_width + out_w)
* output_depth + out_d;
const index_t i_index =
((b * input_height + h) * input_width + w) * input_depth + d;
output_ptr[o_index] = input_ptr[i_index];
}
}
}
}
return MaceStatus::MACE_SUCCESS;
}
private:
const int block_size_;
};
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL
template<>
class SpaceToDepthOp<DeviceType::GPU, float> : public Operation {
......@@ -116,6 +184,11 @@ void RegisterSpaceToDepth(OpRegistryBase *op_registry) {
MACE_REGISTER_OP(op_registry, "SpaceToDepth",
SpaceToDepthOp, DeviceType::CPU, float);
#ifdef MACE_ENABLE_QUANTIZE
MACE_REGISTER_OP(op_registry, "SpaceToDepth",
SpaceToDepthOp, DeviceType::CPU, uint8_t);
#endif // MACE_ENABLE_QUANTIZE
MACE_REGISTER_GPU_OP(op_registry, "SpaceToDepth", SpaceToDepthOp);
}
......
......@@ -1819,7 +1819,9 @@ class Transformer(base_converter.ConverterInterface):
MaceOp.Reshape.name,
MaceOp.ResizeBilinear.name,
MaceOp.BatchToSpaceND.name,
MaceOp.SpaceToBatchND.name]:
MaceOp.SpaceToBatchND.name,
MaceOp.SpaceToDepth.name,
MaceOp.DepthToSpace.name]:
del op.quantize_info[:]
producer_op = self._producer[op.input[0]]
if producer_op.output[0] in self._option.input_nodes:
......
......@@ -262,6 +262,76 @@ TEST_F(DepthToSpaceOpTest, OPENCLRandomBatchHalf) {
RandomTest<DeviceType::GPU, half>(2, {2, 384, 384, 8});
}
namespace {
void TestDepthToSpaceQuantize(const int block_size,
const std::vector<index_t> &shape) {
OpsTestNet net;
net.AddRandomInput<CPU, float>("Input",
shape,
false,
false,
true,
-1.f,
1.f);
// run cpu
net.TransformDataFormat<DeviceType::CPU, float>(
"Input", DataFormat::NHWC, "InputNCHW", DataFormat::NCHW);
OpDefBuilder("DepthToSpace", "DepthToSpaceTest")
.Input("InputNCHW")
.AddIntArg("block_size", block_size)
.Output("OutputNCHW")
.Finalize(net.NewOperatorDef());
net.RunOp(CPU);
net.TransformDataFormat<DeviceType::CPU, float>(
"OutputNCHW", DataFormat::NCHW, "OutputCPU", DataFormat::NHWC);
// run quantize
OpDefBuilder("Quantize", "QuantizeInput")
.Input("Input")
.Output("QuantizedInput")
.OutputType({DT_UINT8})
.AddIntArg("T", DT_UINT8)
.Finalize(net.NewOperatorDef());
net.RunOp();
OpDefBuilder("DepthToSpace", "DepthToSpaceTest")
.Input("QuantizedInput")
.Output("QuantizedOutput")
.AddIntArg("block_size", block_size)
.OutputType({DT_UINT8})
.AddIntArg("T", DT_UINT8)
.Finalize(net.NewOperatorDef());
net.RunOp();
Tensor *eq_output = net.GetTensor("QuantizedInput");
Tensor *q_output = net.GetTensor("QuantizedOutput");
q_output->SetScale(eq_output->scale());
q_output->SetZeroPoint(eq_output->zero_point());
OpDefBuilder("Dequantize", "DeQuantizeTest")
.Input("QuantizedOutput")
.Output("DequantizedOutput")
.OutputType({DT_FLOAT})
.AddIntArg("T", DT_UINT8)
.Finalize(net.NewOperatorDef());
net.RunOp();
// Check
ExpectTensorSimilar<float>(*net.GetOutput("OutputCPU"),
*net.GetTensor("DequantizedOutput"), 0.01);
}
} // namespace
TEST_F(DepthToSpaceOpTest, Quantize) {
TestDepthToSpaceQuantize(2, {1, 192, 192, 4});
TestDepthToSpaceQuantize(3, {1, 111, 111, 9});
TestDepthToSpaceQuantize(5, {1, 20, 20, 25});
TestDepthToSpaceQuantize(7, {1, 14, 14, 49});
}
} // namespace test
} // namespace ops
......
......@@ -253,6 +253,77 @@ TEST_F(SpaceToDepthOpTest, OPENCLBatchRandomHalf) {
RandomTest<DeviceType::GPU, half>(2, {2, 384, 384, 32});
}
namespace {
void TestSpaceToDepthQuantize(int block_size,
const std::vector<index_t> &shape) {
OpsTestNet net;
net.AddRandomInput<CPU, float>("Input",
shape,
false,
false,
true,
-1.f,
1.f);
// run cpu
net.TransformDataFormat<DeviceType::CPU, float>(
"Input", DataFormat::NHWC, "InputNCHW", DataFormat::NCHW);
OpDefBuilder("SpaceToDepth", "SpaceToDepthTest")
.Input("InputNCHW")
.AddIntArg("block_size", block_size)
.Output("OutputNCHW")
.Finalize(net.NewOperatorDef());
net.RunOp(CPU);
net.TransformDataFormat<DeviceType::CPU, float>(
"OutputNCHW", DataFormat::NCHW, "OutputCPU", DataFormat::NHWC);
// run quantize
OpDefBuilder("Quantize", "QuantizeInput")
.Input("Input")
.Output("QuantizedInput")
.OutputType({DT_UINT8})
.AddIntArg("T", DT_UINT8)
.Finalize(net.NewOperatorDef());
net.RunOp();
OpDefBuilder("SpaceToDepth", "SpaceToDepthTest")
.Input("QuantizedInput")
.Output("QuantizedOutput")
.AddIntArg("block_size", block_size)
.OutputType({DT_UINT8})
.AddIntArg("T", DT_UINT8)
.Finalize(net.NewOperatorDef());
net.RunOp();
Tensor *eq_output = net.GetTensor("QuantizedInput");
Tensor *q_output = net.GetTensor("QuantizedOutput");
q_output->SetScale(eq_output->scale());
q_output->SetZeroPoint(eq_output->zero_point());
OpDefBuilder("Dequantize", "DeQuantizeTest")
.Input("QuantizedOutput")
.Output("DequantizedOutput")
.OutputType({DT_FLOAT})
.AddIntArg("T", DT_UINT8)
.Finalize(net.NewOperatorDef());
net.RunOp();
// Check
ExpectTensorSimilar<float>(*net.GetOutput("OutputCPU"),
*net.GetTensor("DequantizedOutput"), 0.01);
}
TEST_F(SpaceToDepthOpTest, Quantize) {
TestSpaceToDepthQuantize(2, {1, 384, 384, 1});
TestSpaceToDepthQuantize(3, {1, 333, 333, 1});
TestSpaceToDepthQuantize(5, {1, 100, 100, 1});
TestSpaceToDepthQuantize(7, {1, 98, 98, 1});
}
} // namespace
} // namespace test
} // namespace ops
} // namespace mace
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册