提交 13a7f090 编写于 作者: 叶剑武

Merge branch 'optimize_deconv_cpu' into 'master'

fix deconv bias add

See merge request !826
...@@ -34,19 +34,24 @@ ...@@ -34,19 +34,24 @@
namespace mace { namespace mace {
namespace kernels { namespace kernels {
enum FrameworkType {
TENSORFLOW = 0,
CAFFE = 1,
};
struct Deconv2dFunctorBase : OpKernel { struct Deconv2dFunctorBase : OpKernel {
Deconv2dFunctorBase(OpKernelContext *context, Deconv2dFunctorBase(OpKernelContext *context,
const std::vector<int> &strides, const std::vector<int> &strides,
const Padding &padding_type, const Padding &padding_type,
const std::vector<int> &paddings, const std::vector<int> &paddings,
const std::vector<index_t> &output_shape, const FrameworkType model_type,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit) const float relux_max_limit)
: OpKernel(context), : OpKernel(context),
strides_(strides), strides_(strides),
padding_type_(padding_type), padding_type_(padding_type),
paddings_(paddings), paddings_(paddings),
output_shape_(output_shape), model_type_(model_type),
activation_(activation), activation_(activation),
relux_max_limit_(relux_max_limit) {} relux_max_limit_(relux_max_limit) {}
...@@ -156,7 +161,7 @@ struct Deconv2dFunctorBase : OpKernel { ...@@ -156,7 +161,7 @@ struct Deconv2dFunctorBase : OpKernel {
std::vector<int> strides_; // [stride_h, stride_w] std::vector<int> strides_; // [stride_h, stride_w]
const Padding padding_type_; const Padding padding_type_;
std::vector<int> paddings_; std::vector<int> paddings_;
std::vector<index_t> output_shape_; const FrameworkType model_type_;
const ActivationType activation_; const ActivationType activation_;
const float relux_max_limit_; const float relux_max_limit_;
}; };
...@@ -171,14 +176,14 @@ struct Deconv2dFunctor<DeviceType::CPU, float>: Deconv2dFunctorBase { ...@@ -171,14 +176,14 @@ struct Deconv2dFunctor<DeviceType::CPU, float>: Deconv2dFunctorBase {
const std::vector<int> &strides, const std::vector<int> &strides,
const Padding &padding_type, const Padding &padding_type,
const std::vector<int> &paddings, const std::vector<int> &paddings,
const std::vector<index_t> &output_shape, const FrameworkType model_type,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit) const float relux_max_limit)
: Deconv2dFunctorBase(context, : Deconv2dFunctorBase(context,
strides, strides,
padding_type, padding_type,
paddings, paddings,
output_shape, model_type,
activation, activation,
relux_max_limit) {} relux_max_limit) {}
...@@ -277,19 +282,16 @@ struct Deconv2dFunctor<DeviceType::CPU, float>: Deconv2dFunctorBase { ...@@ -277,19 +282,16 @@ struct Deconv2dFunctor<DeviceType::CPU, float>: Deconv2dFunctorBase {
std::vector<int> paddings(2); std::vector<int> paddings(2);
std::vector<int> out_paddings(2); std::vector<int> out_paddings(2);
std::vector<index_t> output_shape(4); std::vector<index_t> output_shape(4);
if (paddings_.empty()) { // tensorflow if (model_type_ == FrameworkType::TENSORFLOW) { // tensorflow
paddings = std::vector<int>(2, 0); paddings = std::vector<int>(2, 0);
if (output_shape_.size() == 4) { MACE_CHECK_NOTNULL(output_shape_tensor);
output_shape = output_shape_; MACE_CHECK(output_shape_tensor->size() == 4);
} else { Tensor::MappingGuard output_shape_mapper(output_shape_tensor);
MACE_CHECK_NOTNULL(output_shape_tensor); auto output_shape_data =
MACE_CHECK(output_shape_tensor->size() == 4); output_shape_tensor->data<int32_t>();
Tensor::MappingGuard output_shape_mapper(output_shape_tensor); output_shape =
auto output_shape_data = std::vector<index_t>(output_shape_data, output_shape_data + 4);
output_shape_tensor->data<int32_t>();
output_shape =
std::vector<index_t>(output_shape_data, output_shape_data + 4);
}
const index_t t = output_shape[1]; const index_t t = output_shape[1];
output_shape[1] = output_shape[3]; output_shape[1] = output_shape[3];
output_shape[3] = output_shape[2]; output_shape[3] = output_shape[2];
...@@ -437,21 +439,6 @@ struct Deconv2dFunctor<DeviceType::CPU, float>: Deconv2dFunctorBase { ...@@ -437,21 +439,6 @@ struct Deconv2dFunctor<DeviceType::CPU, float>: Deconv2dFunctorBase {
padded_out_h == output_shape[2] && padded_out_w == output_shape[3]; padded_out_h == output_shape[2] && padded_out_w == output_shape[3];
float *out_data = no_pad ? output_data : padded_out_data; float *out_data = no_pad ? output_data : padded_out_data;
if (bias_data != nullptr) {
const index_t batch = output_shape[0];
const index_t channels = output_shape[1];
const index_t img_size = output_shape[2] * output_shape[3];
#pragma omp parallel for collapse(3)
for (index_t b = 0; b < batch; ++b) {
for (index_t c = 0; c < channels; ++c) {
for (index_t i = 0; i < img_size; ++i) {
output_data[(b * channels + c) * img_size + i] +=
bias_data[c];
}
}
}
}
deconv_func(input_data, deconv_func(input_data,
filter_data, filter_data,
in_shape, in_shape,
...@@ -466,7 +453,20 @@ struct Deconv2dFunctor<DeviceType::CPU, float>: Deconv2dFunctorBase { ...@@ -466,7 +453,20 @@ struct Deconv2dFunctor<DeviceType::CPU, float>: Deconv2dFunctorBase {
output_data); output_data);
} }
if (bias_data != nullptr) {
const index_t batch = output_shape[0];
const index_t channels = output_shape[1];
const index_t img_size = output_shape[2] * output_shape[3];
#pragma omp parallel for collapse(3)
for (index_t b = 0; b < batch; ++b) {
for (index_t c = 0; c < channels; ++c) {
for (index_t i = 0; i < img_size; ++i) {
output_data[(b * channels + c) * img_size + i] +=
bias_data[c];
}
}
}
}
DoActivation<float>(output_data, DoActivation<float>(output_data,
output_data, output_data,
...@@ -501,7 +501,7 @@ struct Deconv2dFunctor<DeviceType::GPU, T> : Deconv2dFunctorBase { ...@@ -501,7 +501,7 @@ struct Deconv2dFunctor<DeviceType::GPU, T> : Deconv2dFunctorBase {
const std::vector<int> &strides, const std::vector<int> &strides,
const Padding &padding_type, const Padding &padding_type,
const std::vector<int> &paddings, const std::vector<int> &paddings,
const std::vector<index_t> &output_shape, const FrameworkType model_type,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit); const float relux_max_limit);
......
...@@ -24,14 +24,14 @@ Deconv2dFunctor<DeviceType::GPU, T>::Deconv2dFunctor( ...@@ -24,14 +24,14 @@ Deconv2dFunctor<DeviceType::GPU, T>::Deconv2dFunctor(
const std::vector<int> &strides, const std::vector<int> &strides,
const Padding &padding_type, const Padding &padding_type,
const std::vector<int> &paddings, const std::vector<int> &paddings,
const std::vector<index_t> &output_shape, const FrameworkType model_type,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit) const float relux_max_limit)
: Deconv2dFunctorBase(context, : Deconv2dFunctorBase(context,
strides, strides,
padding_type, padding_type,
paddings, paddings,
output_shape, model_type,
activation, activation,
relux_max_limit) { relux_max_limit) {
if (context->device()->opencl_runtime()->UseImageMemory()) { if (context->device()->opencl_runtime()->UseImageMemory()) {
...@@ -55,19 +55,15 @@ MaceStatus Deconv2dFunctor<DeviceType::GPU, T>::operator()( ...@@ -55,19 +55,15 @@ MaceStatus Deconv2dFunctor<DeviceType::GPU, T>::operator()(
std::vector<int> paddings(2); std::vector<int> paddings(2);
std::vector<int> out_paddings(2); std::vector<int> out_paddings(2);
std::vector<index_t> output_shape(4); std::vector<index_t> output_shape(4);
if (paddings_.empty()) { if (model_type_ == FrameworkType::TENSORFLOW) {
paddings = std::vector<int>(2, 0); paddings = std::vector<int>(2, 0);
if (output_shape_.size() != 4) { MACE_CHECK_NOTNULL(output_shape_tensor);
MACE_CHECK_NOTNULL(output_shape_tensor); MACE_CHECK(output_shape_tensor->size() == 4);
MACE_CHECK(output_shape_tensor->size() == 4); Tensor::MappingGuard output_shape_mapper(output_shape_tensor);
Tensor::MappingGuard output_shape_mapper(output_shape_tensor); auto output_shape_data =
auto output_shape_data = output_shape_tensor->data<int32_t>();
output_shape_tensor->data<int32_t>(); output_shape =
output_shape = std::vector<index_t>(output_shape_data, output_shape_data + 4);
std::vector<index_t>(output_shape_data, output_shape_data + 4);
} else {
output_shape = output_shape_;
}
CalcDeconvPaddingAndInputSize(input->shape().data(), CalcDeconvPaddingAndInputSize(input->shape().data(),
filter->shape().data(), filter->shape().data(),
strides_.data(), strides_.data(),
......
...@@ -34,28 +34,39 @@ class Deconv2dOp : public Operator<D, T> { ...@@ -34,28 +34,39 @@ class Deconv2dOp : public Operator<D, T> {
static_cast<Padding>(OperatorBase::GetOptionalArg<int>( static_cast<Padding>(OperatorBase::GetOptionalArg<int>(
"padding", static_cast<int>(SAME))), "padding", static_cast<int>(SAME))),
OperatorBase::GetRepeatedArgs<int>("padding_values"), OperatorBase::GetRepeatedArgs<int>("padding_values"),
OperatorBase::GetRepeatedArgs<index_t>("output_shape"), static_cast<kernels::FrameworkType>(
OperatorBase::GetOptionalArg<int>("framework_type", 0)),
kernels::StringToActivationType( kernels::StringToActivationType(
OperatorBase::GetOptionalArg<std::string>("activation", OperatorBase::GetOptionalArg<std::string>("activation",
"NOOP")), "NOOP")),
OperatorBase::GetOptionalArg<float>("max_limit", 0.0f)) {} OperatorBase::GetOptionalArg<float>("max_limit", 0.0f)) {}
MaceStatus Run(StatsFuture *future) override { MaceStatus Run(StatsFuture *future) override {
const Tensor *input = this->Input(INPUT); MACE_CHECK(this->InputSize() >= 2, "deconv needs >= 2 inputs.");
const Tensor *filter = this->Input(FILTER); const Tensor *input = this->Input(0);
const Tensor *output_shape = const Tensor *filter = this->Input(1);
this->InputSize() >= 3 ? this->Input(OUTPUT_SHAPE) : nullptr; const kernels::FrameworkType model_type =
const Tensor *bias = this->InputSize() >= 4 ? this->Input(BIAS) : nullptr; static_cast<kernels::FrameworkType>(
Tensor *output = this->Output(OUTPUT); OperatorBase::GetOptionalArg<int>("framework_type", 0));
if (model_type == kernels::CAFFE) {
const Tensor *bias = this->InputSize() >= 3 ? this->Input(2) : nullptr;
Tensor *output = this->Output(OUTPUT);
return functor_(input, filter, bias, output_shape, output, future); return functor_(input, filter, bias, nullptr, output, future);
} else {
const Tensor *output_shape =
this->InputSize() >= 3 ? this->Input(2) : nullptr;
const Tensor *bias = this->InputSize() >= 4 ? this->Input(3) : nullptr;
Tensor *output = this->Output(OUTPUT);
return functor_(input, filter, bias, output_shape, output, future);
}
} }
private: private:
kernels::Deconv2dFunctor<D, T> functor_; kernels::Deconv2dFunctor<D, T> functor_;
protected: protected:
MACE_OP_INPUT_TAGS(INPUT, FILTER, OUTPUT_SHAPE, BIAS);
MACE_OP_OUTPUT_TAGS(OUTPUT); MACE_OP_OUTPUT_TAGS(OUTPUT);
}; };
......
...@@ -49,28 +49,35 @@ static void Deconv2d(int iters, ...@@ -49,28 +49,35 @@ static void Deconv2d(int iters,
net.AddRandomInput<D, float>("Filter", net.AddRandomInput<D, float>("Filter",
{output_channels, channels, kernel_h, {output_channels, channels, kernel_h,
kernel_w}); kernel_w});
net.AddRandomInput<D, float>("Bias", {output_channels});
net.AddInputFromArray<D, int32_t>("OutputShape", {4},
{batch, out_h, out_w, output_channels});
if (D == DeviceType::GPU) { if (D == DeviceType::GPU) {
BufferToImage<D, T>(&net, "Input", "InputImage", BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(&net, "Filter", "FilterImage", BufferToImage<D, T>(&net, "Filter", "FilterImage",
kernels::BufferType::CONV2D_FILTER); kernels::BufferType::CONV2D_FILTER);
BufferToImage<D, T>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("Deconv2D", "Deconv2dTest") OpDefBuilder("Deconv2D", "Deconv2dTest")
.Input("InputImage") .Input("InputImage")
.Input("FilterImage") .Input("FilterImage")
.Input("OutputShape")
.Input("BiasImage")
.Output("Output") .Output("Output")
.AddIntsArg("strides", {stride, stride}) .AddIntsArg("strides", {stride, stride})
.AddIntArg("padding", padding) .AddIntArg("padding", padding)
.AddIntsArg("output_shape", {batch, out_h, out_w, output_channels})
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value)) .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
} else { } else {
OpDefBuilder("Deconv2D", "Deconv2dTest") OpDefBuilder("Deconv2D", "Deconv2dTest")
.Input("Input") .Input("Input")
.Input("Filter") .Input("Filter")
.Input("OutputShape")
.Input("Bias")
.Output("Output") .Output("Output")
.AddIntsArg("strides", {stride, stride}) .AddIntsArg("strides", {stride, stride})
.AddIntArg("padding", padding) .AddIntArg("padding", padding)
.AddIntsArg("output_shape", {batch, out_h, out_w, output_channels})
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value)) .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
} }
......
...@@ -28,34 +28,58 @@ namespace { ...@@ -28,34 +28,58 @@ namespace {
template <DeviceType D> template <DeviceType D>
void RunTestSimple(const std::vector<index_t> &input_shape, void RunTestSimple(const std::vector<index_t> &input_shape,
const std::vector<float> &input_data, const std::vector<float> &input_data,
const std::vector<float> &bias_data,
const int stride, const int stride,
Padding padding, Padding padding,
const std::vector<int> &padding_size, const std::vector<int> &padding_size,
const std::vector<int> &output_shape, const std::vector<int32_t> &output_shape,
const std::vector<index_t> &filter_shape, const std::vector<index_t> &filter_shape,
const std::vector<float> &filter_data, const std::vector<float> &filter_data,
const std::vector<index_t> &expected_shape, const std::vector<index_t> &expected_shape,
const std::vector<float> &expected_data) { const std::vector<float> &expected_data,
kernels::FrameworkType model_type) {
OpsTestNet net; OpsTestNet net;
// Add input data // Add input data
const index_t batch = input_shape[0];
const index_t out_channels = filter_shape[2];
net.AddInputFromArray<D, float>("Input", input_shape, input_data); net.AddInputFromArray<D, float>("Input", input_shape, input_data);
net.AddInputFromArray<D, float>("Filter", filter_shape, filter_data); net.AddInputFromArray<D, float>("Filter", filter_shape, filter_data);
net.AddInputFromArray<D, float>("Bias", {out_channels}, bias_data);
net.TransformDataFormat<D, float>("Filter", HWOI, "FilterOIHW", OIHW); net.TransformDataFormat<D, float>("Filter", HWOI, "FilterOIHW", OIHW);
if (D == DeviceType::GPU) { if (D == DeviceType::GPU) {
BufferToImage<D, float>(&net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, float>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT);
BufferToImage<D, float>(&net, "FilterOIHW", "FilterImage", BufferToImage<D, float>(&net, "FilterOIHW", "FilterImage",
kernels::BufferType::CONV2D_FILTER); kernels::BufferType::CONV2D_FILTER);
OpDefBuilder("Deconv2D", "Deconv2dTest") if (model_type == kernels::FrameworkType::CAFFE) {
.Input("InputImage") OpDefBuilder("Deconv2D", "Deconv2dTest")
.Input("FilterImage") .Input("InputImage")
.Output("OutputImage") .Input("FilterImage")
.AddIntsArg("strides", {stride, stride}) .Input("BiasImage")
.AddIntArg("padding", padding) .Output("OutputImage")
.AddIntsArg("padding_values", padding_size) .AddIntsArg("strides", {stride, stride})
.AddIntsArg("output_shape", output_shape) .AddIntArg("padding", padding)
.Finalize(net.NewOperatorDef()); .AddIntsArg("padding_values", padding_size)
.AddIntArg("framework_type", model_type)
.Finalize(net.NewOperatorDef());
} else {
net.AddInputFromArray<D, int32_t>("OutputShape", {4}, output_shape);
OpDefBuilder("Deconv2D", "Deconv2dTest")
.Input("InputImage")
.Input("FilterImage")
.Input("OutputShape")
.Input("BiasImage")
.Output("OutputImage")
.AddIntsArg("strides", {stride, stride})
.AddIntArg("padding", padding)
.AddIntsArg("padding_values", padding_size)
.AddIntArg("framework_type", model_type)
.Finalize(net.NewOperatorDef());
}
net.RunOp(D); net.RunOp(D);
// Transfer output // Transfer output
...@@ -64,15 +88,34 @@ void RunTestSimple(const std::vector<index_t> &input_shape, ...@@ -64,15 +88,34 @@ void RunTestSimple(const std::vector<index_t> &input_shape,
} else { } else {
net.TransformDataFormat<DeviceType::CPU, float>("Input", NHWC, "InputNCHW", net.TransformDataFormat<DeviceType::CPU, float>("Input", NHWC, "InputNCHW",
NCHW); NCHW);
OpDefBuilder("Deconv2D", "Deconv2dTest")
.Input("InputNCHW") if (model_type == kernels::FrameworkType::CAFFE) {
.Input("FilterOIHW") OpDefBuilder("Deconv2D", "Deconv2dTest")
.Output("OutputNCHW") .Input("InputNCHW")
.AddIntsArg("strides", {stride, stride}) .Input("FilterOIHW")
.AddIntArg("padding", padding) .Input("Bias")
.AddIntsArg("padding_values", padding_size) .Output("OutputNCHW")
.AddIntsArg("output_shape", output_shape) .AddIntsArg("strides", {stride, stride})
.Finalize(net.NewOperatorDef()); .AddIntArg("padding", padding)
.AddIntsArg("padding_values", padding_size)
.AddIntArg("framework_type", model_type)
.Finalize(net.NewOperatorDef());
} else {
net.AddInputFromArray<D, int32_t>("OutputShape", {4}, output_shape);
OpDefBuilder("Deconv2D", "Deconv2dTest")
.Input("InputNCHW")
.Input("FilterOIHW")
.Input("OutputShape")
.Input("Bias")
.Output("OutputNCHW")
.AddIntsArg("strides", {stride, stride})
.AddIntArg("padding", padding)
.AddIntsArg("padding_values", padding_size)
.AddIntArg("framework_type", model_type)
.Finalize(net.NewOperatorDef());
}
// Run // Run
net.RunOp(D); net.RunOp(D);
net.TransformDataFormat<DeviceType::CPU, float>("OutputNCHW", NCHW, net.TransformDataFormat<DeviceType::CPU, float>("OutputNCHW", NCHW,
...@@ -85,144 +128,186 @@ void RunTestSimple(const std::vector<index_t> &input_shape, ...@@ -85,144 +128,186 @@ void RunTestSimple(const std::vector<index_t> &input_shape,
template <DeviceType D> template <DeviceType D>
void TestNHWCSimple3x3SAME_S1() { void TestNHWCSimple3x3SAME_S1() {
RunTestSimple<D>({1, 3, 3, 1}, {1, 1, 1, 1, 1, 1, 1, 1, 1}, 1, Padding::SAME, RunTestSimple<D>({1, 3, 3, 1}, {1, 1, 1, 1, 1, 1, 1, 1, 1}, {0.5, 0.6, 0.7},
{}, {1, 3, 3, 3}, {3, 3, 3, 1}, 1, Padding::SAME, {},
{1, 3, 3, 3}, {3, 3, 3, 1},
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
{1, 3, 3, 3}, {4, 4, 4, 6, 6, 6, 4, 4, 4, 6, 6, 6, 9, 9, {1, 3, 3, 3},
9, 6, 6, 6, 4, 4, 4, 6, 6, 6, 4, 4, 4}); {4.5, 4.6, 4.7, 6.5, 6.6, 6.7, 4.5, 4.6, 4.7,
RunTestSimple<D>({1, 3, 3, 1}, {1, 1, 1, 1, 1, 1, 1, 1, 1}, 1, Padding::VALID, 6.5, 6.6, 6.7, 9.5, 9.6, 9.7, 6.5, 6.6, 6.7,
{2, 2}, {0}, {3, 3, 3, 1}, 4.5, 4.6, 4.7, 6.5, 6.6, 6.7, 4.5, 4.6, 4.7},
kernels::FrameworkType::TENSORFLOW);
RunTestSimple<D>({1, 3, 3, 1}, {1, 1, 1, 1, 1, 1, 1, 1, 1}, {0, 0, 0},
1, Padding::VALID, {2, 2},
{0}, {3, 3, 3, 1},
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
{1, 3, 3, 3}, {4, 4, 4, 6, 6, 6, 4, 4, 4, 6, 6, 6, 9, 9, {1, 3, 3, 3},
9, 6, 6, 6, 4, 4, 4, 6, 6, 6, 4, 4, 4}); {4, 4, 4, 6, 6, 6, 4, 4, 4, 6, 6, 6, 9, 9,
RunTestSimple<D>({1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, 1, Padding::SAME, 9, 6, 6, 6, 4, 4, 4, 6, 6, 6, 4, 4, 4},
{}, {1, 3, 3, 3}, {3, 3, 3, 1}, kernels::FrameworkType::CAFFE);
RunTestSimple<D>({1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, {0, 0, 0},
1, Padding::SAME, {},
{1, 3, 3, 3}, {3, 3, 3, 1},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27},
{1, 3, 3, 3}, {54, 66, 78, 126, 147, 168, 130, 146, 162, {1, 3, 3, 3},
198, 225, 252, 405, 450, 495, 366, 399, 432, {54, 66, 78, 126, 147, 168, 130, 146, 162,
354, 378, 402, 630, 669, 708, 502, 530, 558}); 198, 225, 252, 405, 450, 495, 366, 399, 432,
RunTestSimple<D>( 354, 378, 402, 630, 669, 708, 502, 530, 558},
{1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, 1, Padding::SAME, {2, 2}, {0}, kernels::FrameworkType::TENSORFLOW);
{3, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, RunTestSimple<D>({1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, {0, 0, 0},
1, Padding::SAME, {2, 2},
{0}, {3, 3, 3, 1},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27},
{1, 3, 3, 3}, {1, 3, 3, 3},
{54, 66, 78, 126, 147, 168, 130, 146, 162, 198, 225, 252, 405, 450, {54, 66, 78, 126, 147, 168, 130, 146, 162,
495, 366, 399, 432, 354, 378, 402, 630, 669, 708, 502, 530, 558}); 198, 225, 252, 405, 450, 495, 366, 399, 432,
354, 378, 402, 630, 669, 708, 502, 530, 558},
kernels::FrameworkType::CAFFE);
} }
template <DeviceType D> template <DeviceType D>
void TestNHWCSimple3x3SAME_S2() { void TestNHWCSimple3x3SAME_S2() {
RunTestSimple<D>( RunTestSimple<D>({1, 3, 3, 1}, {1, 1, 1, 1, 1, 1, 1, 1, 1}, {0, 0, 0},
{1, 3, 3, 1}, {1, 1, 1, 1, 1, 1, 1, 1, 1}, 2, Padding::SAME, {}, 2, Padding::SAME, {},
{1, 6, 6, 3}, {3, 3, 3, 1}, {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, {1, 6, 6, 3},
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, {3, 3, 3, 1},
{1, 6, 6, 3}, {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
{1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 4, 4, {1, 6, 6, 3},
4, 2, 2, 2, 4, 4, 4, 2, 2, 2, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, {1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1,
2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 4, 4, 4, 2, 2, 2, 4, 4, 4, 2, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1,
2, 2, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1}); 2, 2, 2, 2, 2, 2, 4, 4, 4, 2, 2, 2, 4, 4, 4, 2, 2, 2,
RunTestSimple<D>( 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1,
{1, 3, 3, 1}, {1, 1, 1, 1, 1, 1, 1, 1, 1}, 2, Padding::SAME, {2, 2}, {0}, 2, 2, 2, 2, 2, 2, 4, 4, 4, 2, 2, 2, 4, 4, 4, 2, 2, 2,
{3, 3, 3, 1}, {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1},
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, kernels::FrameworkType::TENSORFLOW);
{1, 5, 5, 3}, {1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 4, RunTestSimple<D>({1, 3, 3, 1}, {1, 1, 1, 1, 1, 1, 1, 1, 1}, {0, 0, 0},
4, 4, 2, 2, 2, 4, 4, 4, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 2, Padding::SAME, {2, 2},
1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 4, 4, 4, 2, 2, 2, 4, 4, 4, {0}, {3, 3, 3, 1},
2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1}); {1, 1, 1, 1, 1, 1, 1, 1, 1,
RunTestSimple<D>( 1, 1, 1, 1, 1, 1, 1, 1, 1,
{1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, 2, Padding::SAME, {}, 1, 1, 1, 1, 1, 1, 1, 1, 1},
{1, 6, 6, 3}, {3, 3, 3, 1}, {1, 5, 5, 3},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, {1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}, 2, 2, 2, 4, 4, 4, 2, 2, 2, 4, 4, 4, 2, 2, 2,
{1, 6, 6, 3}, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1,
{1, 2, 3, 4, 5, 6, 9, 12, 15, 8, 10, 12, 17, 22, 2, 2, 2, 4, 4, 4, 2, 2, 2, 4, 4, 4, 2, 2, 2,
27, 12, 15, 18, 10, 11, 12, 13, 14, 15, 36, 39, 42, 26, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1},
28, 30, 62, 67, 72, 39, 42, 45, 23, 28, 33, 38, 43, 48, kernels::FrameworkType::CAFFE);
96, 108, 120, 64, 71, 78, 148, 164, 180, 90, 99, 108, 40, 44, RunTestSimple<D>({1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, {0, 0, 0},
48, 52, 56, 60, 114, 123, 132, 65, 70, 75, 140, 151, 162, 78, 2, Padding::SAME, {},
84, 90, 83, 94, 105, 116, 127, 138, 252, 276, 300, 142, 155, 168, {1, 6, 6, 3}, {3, 3, 3, 1},
304, 332, 360, 168, 183, 198, 70, 77, 84, 91, 98, 105, 192, 207, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
222, 104, 112, 120, 218, 235, 252, 117, 126, 135}); 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27},
RunTestSimple<D>( {1, 6, 6, 3},
{1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, 2, Padding::SAME, {2, 2}, {0}, {1, 2, 3, 4, 5, 6, 9, 12, 15, 8, 10, 12, 17, 22, 27, 12, 15,
{3, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 18,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}, 10, 11, 12, 13, 14, 15, 36, 39, 42, 26, 28, 30, 62, 67, 72,
{1, 5, 5, 3}, 39, 42, 45,
{13, 14, 15, 36, 39, 42, 26, 28, 30, 62, 67, 72, 39, 23, 28, 33, 38, 43, 48, 96, 108, 120, 64, 71, 78, 148, 164,
42, 45, 38, 43, 48, 96, 108, 120, 64, 71, 78, 148, 164, 180, 90, 99, 108,
180, 90, 99, 108, 52, 56, 60, 114, 123, 132, 65, 70, 75, 40, 44, 48, 52, 56, 60, 114, 123, 132, 65, 70, 75, 140,
140, 151, 162, 78, 84, 90, 116, 127, 138, 252, 276, 300, 142, 151, 162, 78, 84, 90,
155, 168, 304, 332, 360, 168, 183, 198, 91, 98, 105, 192, 207, 83, 94, 105, 116, 127, 138, 252, 276, 300, 142, 155, 168,
222, 104, 112, 120, 218, 235, 252, 117, 126, 135}); 304, 332, 360, 168, 183, 198, 70, 77, 84, 91, 98, 105, 192,
207, 222, 104, 112, 120, 218, 235, 252, 117, 126, 135},
kernels::FrameworkType::TENSORFLOW);
RunTestSimple<D>({1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, {0, 0, 0},
2, Padding::SAME, {2, 2},
{0}, {3, 3, 3, 1},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27},
{1, 5, 5, 3},
{13, 14, 15, 36, 39, 42, 26, 28, 30, 62, 67, 72, 39, 42, 45,
38, 43, 48, 96, 108, 120, 64, 71, 78, 148, 164, 180,
90, 99, 108, 52, 56, 60, 114, 123, 132, 65, 70, 75,
140, 151, 162, 78, 84, 90, 116, 127, 138, 252, 276, 300,
142, 155, 168, 304, 332, 360, 168, 183, 198, 91, 98, 105,
192, 207, 222, 104, 112, 120, 218, 235, 252, 117, 126, 135},
kernels::FrameworkType::CAFFE);
} }
template <DeviceType D> template <DeviceType D>
void TestNHWCSimple3x3SAME_S2_1() { void TestNHWCSimple3x3SAME_S2_1() {
RunTestSimple<D>( RunTestSimple<D>({1, 3, 3, 1}, {12, 18, 12, 18, 27, 18, 12, 18, 12},
{1, 3, 3, 1}, {12, 18, 12, 18, 27, 18, 12, 18, 12}, 2, Padding::SAME, {0, 0, 0},
{}, {1, 5, 5, 3}, {3, 3, 3, 1}, 2, Padding::SAME, {},
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, {1, 5, 5, 3}, {3, 3, 3, 1},
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
{1, 5, 5, 3}, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
{12, 12, 12, 30, 30, 30, 18, 18, 18, 30, 30, 30, 12, 12, 12, {1, 5, 5, 3},
30, 30, 30, 75, 75, 75, 45, 45, 45, 75, 75, 75, 30, 30, 30, {12, 12, 12, 30, 30, 30, 18, 18, 18, 30, 30, 30, 12, 12, 12,
18, 18, 18, 45, 45, 45, 27, 27, 27, 45, 45, 45, 18, 18, 18, 30, 30, 30, 75, 75, 75, 45, 45, 45, 75, 75, 75, 30, 30, 30,
30, 30, 30, 75, 75, 75, 45, 45, 45, 75, 75, 75, 30, 30, 30, 18, 18, 18, 45, 45, 45, 27, 27, 27, 45, 45, 45, 18, 18, 18,
12, 12, 12, 30, 30, 30, 18, 18, 18, 30, 30, 30, 12, 12, 12}); 30, 30, 30, 75, 75, 75, 45, 45, 45, 75, 75, 75, 30, 30, 30,
12, 12, 12, 30, 30, 30, 18, 18, 18, 30, 30, 30, 12, 12, 12},
kernels::FrameworkType::TENSORFLOW);
} }
template <DeviceType D> template <DeviceType D>
void TestNHWCSimple3x3VALID_S2() { void TestNHWCSimple3x3VALID_S2() {
RunTestSimple<D>( RunTestSimple<D>({1, 3, 3, 1}, {1, 1, 1, 1, 1, 1, 1, 1, 1}, {0, 0, 0},
{1, 3, 3, 1}, {1, 1, 1, 1, 1, 1, 1, 1, 1}, 2, Padding::VALID, {}, 2, Padding::VALID, {},
{1, 7, 7, 3}, {3, 3, 3, 1}, {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, {1, 7, 7, 3}, {3, 3, 3, 1},
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
{1, 7, 7, 3}, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
{1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, {1, 7, 7, 3},
1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, {1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1,
2, 2, 2, 2, 2, 2, 4, 4, 4, 2, 2, 2, 4, 4, 4, 2, 2, 2, 2, 2, 2, 1, 1, 1,
1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1,
2, 2, 2, 2, 2, 2, 4, 4, 4, 2, 2, 2, 4, 4, 4, 2, 2, 2, 2, 2, 2, 1, 1, 1,
1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 4, 4, 4, 2, 2, 2, 4, 4, 4, 2, 2, 2,
1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1}); 2, 2, 2,
1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1,
1, 1, 1,
2, 2, 2, 2, 2, 2, 4, 4, 4, 2, 2, 2, 4, 4, 4, 2, 2, 2,
2, 2, 2,
1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1,
1, 1, 1,
1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1,
1, 1, 1},
kernels::FrameworkType::TENSORFLOW);
} }
template <DeviceType D> template <DeviceType D>
void TestNHWCSimple3x3VALID_S1() { void TestNHWCSimple3x3VALID_S1() {
RunTestSimple<D>( RunTestSimple<D>({1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, {0, 0, 0},
{1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, 1, Padding::VALID, {}, 1, Padding::VALID, {},
{1, 5, 5, 3}, {3, 3, 3, 1}, {1, 5, 5, 3}, {3, 3, 3, 1},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27},
{1, 5, 5, 3}, {1, 5, 5, 3},
{1, 2, 3, 6, 9, 12, 18, 24, 30, 26, 31, 36, 21, {1, 2, 3, 6, 9, 12, 18, 24, 30, 26, 31, 36, 21,
24, 27, 14, 19, 24, 54, 66, 78, 126, 147, 168, 130, 146, 24, 27, 14, 19, 24, 54, 66, 78, 126, 147, 168, 130, 146,
162, 90, 99, 108, 66, 78, 90, 198, 225, 252, 405, 450, 495, 162, 90, 99, 108, 66, 78, 90, 198, 225, 252, 405, 450, 495,
366, 399, 432, 234, 252, 270, 146, 157, 168, 354, 378, 402, 630, 366, 399, 432, 234, 252, 270, 146, 157, 168, 354, 378, 402,
669, 708, 502, 530, 558, 294, 309, 324, 133, 140, 147, 306, 321, 630, 669, 708, 502, 530, 558, 294, 309, 324, 133, 140, 147,
336, 522, 546, 570, 398, 415, 432, 225, 234, 243}); 306, 321, 336, 522, 546, 570, 398, 415, 432, 225, 234, 243},
kernels::FrameworkType::TENSORFLOW);
} }
template <DeviceType D> template <DeviceType D>
void TestNHWCSimple2x2SAME() { void TestNHWCSimple2x2SAME() {
RunTestSimple<D>({1, 2, 2, 1}, {1, 1, 1, 1}, 1, Padding::SAME, {}, RunTestSimple<D>({1, 2, 2, 1}, {1, 1, 1, 1}, {0}, 1, Padding::SAME, {},
{1, 2, 2, 1}, {3, 3, 1, 1}, {1, 2, 2, 1}, {3, 3, 1, 1},
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f},
{1, 2, 2, 1}, {4.f, 4.f, 4.f, 4.f}); {1, 2, 2, 1}, {4.f, 4.f, 4.f, 4.f},
kernels::FrameworkType::TENSORFLOW);
} }
template <DeviceType D> template <DeviceType D>
void TestNHWCSimple2x2VALID() { void TestNHWCSimple2x2VALID() {
RunTestSimple<D>( RunTestSimple<D>(
{1, 2, 2, 1}, {1, 1, 1, 1}, 2, Padding::VALID, {}, {1, 5, 5, 1}, {1, 2, 2, 1}, {1, 1, 1, 1}, {0}, 2, Padding::VALID, {}, {1, 5, 5, 1},
{3, 3, 1, 1}, {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, {3, 3, 1, 1}, {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f},
{1, 5, 5, 1}, {1, 5, 5, 1},
{1.f, 1.f, 2.f, 1.f, 1.f, 1.f, 1.f, 2.f, 1.f, 1.f, 2.f, 2.f, 4.f, {1.f, 1.f, 2.f, 1.f, 1.f, 1.f, 1.f, 2.f, 1.f, 1.f, 2.f, 2.f, 4.f,
2.f, 2.f, 1.f, 1.f, 2.f, 1.f, 1.f, 1.f, 1.f, 2.f, 1.f, 1.f}); 2.f, 2.f, 1.f, 1.f, 2.f, 1.f, 1.f, 1.f, 1.f, 2.f, 1.f, 1.f},
kernels::FrameworkType::TENSORFLOW);
} }
} // namespace } // namespace
...@@ -311,7 +396,11 @@ void TestComplexDeconvNxNS12(const int batch, ...@@ -311,7 +396,11 @@ void TestComplexDeconvNxNS12(const int batch,
std::vector<int> paddings; std::vector<int> paddings;
std::vector<int> output_shape; std::vector<int> output_shape;
if (padding < 0) { kernels::FrameworkType model_type =
padding < 0 ?
kernels::FrameworkType::TENSORFLOW : kernels::FrameworkType::CAFFE;
if (model_type == kernels::FrameworkType::TENSORFLOW) {
if (type == Padding::SAME) { if (type == Padding::SAME) {
out_h = (height - 1) * stride_h + 1; out_h = (height - 1) * stride_h + 1;
out_w = (width - 1) * stride_w + 1; out_w = (width - 1) * stride_w + 1;
...@@ -323,24 +412,38 @@ void TestComplexDeconvNxNS12(const int batch, ...@@ -323,24 +412,38 @@ void TestComplexDeconvNxNS12(const int batch,
output_shape.push_back(out_h); output_shape.push_back(out_h);
output_shape.push_back(out_w); output_shape.push_back(out_w);
output_shape.push_back(output_channels); output_shape.push_back(output_channels);
net.AddInputFromArray<D, int32_t>("OutputShape", {4}, output_shape);
} else { } else {
// out_h = (height - 1) * stride + 1 + padding - kernel_h + 1; // out_h = (height - 1) * stride + 1 + padding - kernel_h + 1;
// out_w = (width -1) * stride + 1 + padding - kernel_w + 1; // out_w = (width -1) * stride + 1 + padding - kernel_w + 1;
paddings.push_back(padding); paddings.push_back(padding);
paddings.push_back(padding); paddings.push_back(padding);
} }
// Construct graph
OpDefBuilder("Deconv2D", "Deconv2dTest") if (model_type == kernels::FrameworkType::CAFFE) {
.Input("InputNCHW") OpDefBuilder("Deconv2D", "Deconv2dTest")
.Input("Filter") .Input("InputNCHW")
.Input("Bias") .Input("Filter")
.Output("OutputNCHW") .Input("Bias")
.AddIntsArg("strides", {stride_h, stride_w}) .Output("OutputNCHW")
.AddIntArg("padding", type) .AddIntsArg("strides", {stride_h, stride_w})
.AddIntsArg("padding_values", paddings) .AddIntsArg("padding_values", paddings)
.AddIntsArg("output_shape", output_shape) .AddIntArg("framework_type", model_type)
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value)) .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
} else {
OpDefBuilder("Deconv2D", "Deconv2dTest")
.Input("InputNCHW")
.Input("Filter")
.Input("OutputShape")
.Input("Bias")
.Output("OutputNCHW")
.AddIntsArg("strides", {stride_h, stride_w})
.AddIntArg("padding", type)
.AddIntArg("framework_type", model_type)
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Finalize(net.NewOperatorDef());
}
// run on cpu // run on cpu
net.RunOp(); net.RunOp();
...@@ -360,17 +463,30 @@ void TestComplexDeconvNxNS12(const int batch, ...@@ -360,17 +463,30 @@ void TestComplexDeconvNxNS12(const int batch,
BufferToImage<D, T>(&net, "Bias", "BiasImage", BufferToImage<D, T>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
OpDefBuilder("Deconv2D", "Deconv2dTest") if (model_type == kernels::FrameworkType::CAFFE) {
.Input("InputImage") OpDefBuilder("Deconv2D", "Deconv2dTest")
.Input("FilterImage") .Input("InputImage")
.Input("BiasImage") .Input("FilterImage")
.Output("OutputImage") .Input("BiasImage")
.AddIntsArg("strides", {stride_h, stride_w}) .Output("OutputImage")
.AddIntArg("padding", type) .AddIntsArg("strides", {stride_h, stride_w})
.AddIntsArg("padding_values", paddings) .AddIntsArg("padding_values", paddings)
.AddIntsArg("output_shape", output_shape) .AddIntArg("framework_type", model_type)
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value)) .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
} else {
OpDefBuilder("Deconv2D", "Deconv2dTest")
.Input("InputImage")
.Input("FilterImage")
.Input("OutputShape")
.Input("BiasImage")
.Output("OutputImage")
.AddIntsArg("strides", {stride_h, stride_w})
.AddIntArg("padding", type)
.AddIntArg("framework_type", model_type)
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Finalize(net.NewOperatorDef());
}
// Run on device // Run on device
net.RunOp(D); net.RunOp(D);
......
...@@ -70,6 +70,11 @@ class EltwiseType(Enum): ...@@ -70,6 +70,11 @@ class EltwiseType(Enum):
EQUAL = 10 EQUAL = 10
class FrameworkType(Enum):
TENSORFLOW = 0
CAFFE = 1
MaceSupportedOps = [ MaceSupportedOps = [
'Activation', 'Activation',
'AddN', 'AddN',
...@@ -176,6 +181,7 @@ class MaceKeyword(object): ...@@ -176,6 +181,7 @@ class MaceKeyword(object):
mace_seperate_buffer_str = 'seperate_buffer' mace_seperate_buffer_str = 'seperate_buffer'
mace_scalar_input_index_str = 'scalar_input_index' mace_scalar_input_index_str = 'scalar_input_index'
mace_opencl_mem_type = "opencl_mem_type" mace_opencl_mem_type = "opencl_mem_type"
mace_framework_type_str = "framework_type"
class TransformerRule(Enum): class TransformerRule(Enum):
......
...@@ -25,6 +25,7 @@ from mace.python.tools.converter_tool import shape_inference ...@@ -25,6 +25,7 @@ from mace.python.tools.converter_tool import shape_inference
from mace.python.tools.converter_tool.base_converter import PoolingType from mace.python.tools.converter_tool.base_converter import PoolingType
from mace.python.tools.converter_tool.base_converter import ActivationType from mace.python.tools.converter_tool.base_converter import ActivationType
from mace.python.tools.converter_tool.base_converter import EltwiseType from mace.python.tools.converter_tool.base_converter import EltwiseType
from mace.python.tools.converter_tool.base_converter import FrameworkType
from mace.python.tools.converter_tool.base_converter import DataFormat from mace.python.tools.converter_tool.base_converter import DataFormat
from mace.python.tools.converter_tool.base_converter import FilterFormat from mace.python.tools.converter_tool.base_converter import FilterFormat
from mace.python.tools.converter_tool.base_converter import MaceOp from mace.python.tools.converter_tool.base_converter import MaceOp
...@@ -351,6 +352,10 @@ class CaffeConverter(base_converter.ConverterInterface): ...@@ -351,6 +352,10 @@ class CaffeConverter(base_converter.ConverterInterface):
data_type_arg.name = 'T' data_type_arg.name = 'T'
data_type_arg.i = self._option.data_type data_type_arg.i = self._option.data_type
framework_type_arg = op.arg.add()
framework_type_arg.name = MaceKeyword.mace_framework_type_str
framework_type_arg.i = FrameworkType.CAFFE.value
ConverterUtil.add_data_format_arg(op, DataFormat.NCHW) ConverterUtil.add_data_format_arg(op, DataFormat.NCHW)
return op return op
......
...@@ -25,6 +25,7 @@ from mace.python.tools.converter_tool.base_converter import PoolingType ...@@ -25,6 +25,7 @@ from mace.python.tools.converter_tool.base_converter import PoolingType
from mace.python.tools.converter_tool.base_converter import PaddingMode from mace.python.tools.converter_tool.base_converter import PaddingMode
from mace.python.tools.converter_tool.base_converter import ActivationType from mace.python.tools.converter_tool.base_converter import ActivationType
from mace.python.tools.converter_tool.base_converter import EltwiseType from mace.python.tools.converter_tool.base_converter import EltwiseType
from mace.python.tools.converter_tool.base_converter import FrameworkType
from mace.python.tools.converter_tool.base_converter import DataFormat from mace.python.tools.converter_tool.base_converter import DataFormat
from mace.python.tools.converter_tool.base_converter import FilterFormat from mace.python.tools.converter_tool.base_converter import FilterFormat
from mace.python.tools.converter_tool.base_converter import MaceOp from mace.python.tools.converter_tool.base_converter import MaceOp
...@@ -372,6 +373,10 @@ class TensorflowConverter(base_converter.ConverterInterface): ...@@ -372,6 +373,10 @@ class TensorflowConverter(base_converter.ConverterInterface):
except ValueError: except ValueError:
data_type_arg.i = self._option.data_type data_type_arg.i = self._option.data_type
framework_type_arg = op.arg.add()
framework_type_arg.name = MaceKeyword.mace_framework_type_str
framework_type_arg.i = FrameworkType.TENSORFLOW.value
ConverterUtil.add_data_format_arg(op, DataFormat.NHWC) ConverterUtil.add_data_format_arg(op, DataFormat.NHWC)
return op return op
...@@ -414,13 +419,13 @@ class TensorflowConverter(base_converter.ConverterInterface): ...@@ -414,13 +419,13 @@ class TensorflowConverter(base_converter.ConverterInterface):
"deconv should have (>=) 3 inputs.") "deconv should have (>=) 3 inputs.")
output_shape_arg = op.arg.add() output_shape_arg = op.arg.add()
output_shape_arg.name = MaceKeyword.mace_output_shape_str output_shape_arg.name = MaceKeyword.mace_output_shape_str
if tf_op.inputs[0].op.type == TFOpType.Const.name: # if tf_op.inputs[0].op.type == TFOpType.Const.name:
output_shape_value = \ # output_shape_value = \
tf_op.inputs[0].eval().astype(np.int32).flat # tf_op.inputs[0].eval().astype(np.int32).flat
output_shape_arg.ints.extend(output_shape_value) # output_shape_arg.ints.extend(output_shape_value)
else: # else:
output_shape_value = {} # output_shape_value = {}
output_shape_arg.ints.extend(output_shape_value) # output_shape_arg.ints.extend(output_shape_value)
del op.input[:] del op.input[:]
op.input.extend([tf_op.inputs[2].name, op.input.extend([tf_op.inputs[2].name,
tf_op.inputs[1].name, tf_op.inputs[1].name,
......
...@@ -26,6 +26,7 @@ from mace.python.tools.converter_tool.base_converter import ConverterUtil ...@@ -26,6 +26,7 @@ from mace.python.tools.converter_tool.base_converter import ConverterUtil
from mace.python.tools.converter_tool.base_converter import DataFormat from mace.python.tools.converter_tool.base_converter import DataFormat
from mace.python.tools.converter_tool.base_converter import DeviceType from mace.python.tools.converter_tool.base_converter import DeviceType
from mace.python.tools.converter_tool.base_converter import EltwiseType from mace.python.tools.converter_tool.base_converter import EltwiseType
from mace.python.tools.converter_tool.base_converter import FrameworkType
from mace.python.tools.converter_tool.base_converter import FilterFormat from mace.python.tools.converter_tool.base_converter import FilterFormat
from mace.python.tools.converter_tool.base_converter import MaceKeyword from mace.python.tools.converter_tool.base_converter import MaceKeyword
from mace.python.tools.converter_tool.base_converter import MaceOp from mace.python.tools.converter_tool.base_converter import MaceOp
...@@ -810,12 +811,22 @@ class Transformer(base_converter.ConverterInterface): ...@@ -810,12 +811,22 @@ class Transformer(base_converter.ConverterInterface):
net = self._model net = self._model
for op in net.op: for op in net.op:
if (((op.type == MaceOp.Conv2D.name if (((op.type == MaceOp.Conv2D.name
or op.type == MaceOp.Deconv2D.name
or op.type == MaceOp.DepthwiseConv2d.name or op.type == MaceOp.DepthwiseConv2d.name
or op.type == MaceOp.FullyConnected.name) or op.type == MaceOp.FullyConnected.name)
and len(op.input) == 2) and len(op.input) == 2)
or (op.type == MaceOp.WinogradInverseTransform.name or (op.type == MaceOp.WinogradInverseTransform.name
and len(op.input) == 1)) \ and len(op.input) == 1)
or (op.type == MaceOp.Deconv2D.name
and ((ConverterUtil.get_arg(
op,
MaceKeyword.mace_framework_type_str).i ==
FrameworkType.CAFFE.value
and len(op.input) == 2)
or (ConverterUtil.get_arg(
op,
MaceKeyword.mace_framework_type_str).i
== FrameworkType.TENSORFLOW.value
and len(op.input) == 3)))) \
and len(self._consumers.get(op.output[0], [])) == 1: and len(self._consumers.get(op.output[0], [])) == 1:
consumer_op = self._consumers[op.output[0]][0] consumer_op = self._consumers[op.output[0]][0]
if consumer_op.type == MaceOp.BiasAdd.name: if consumer_op.type == MaceOp.BiasAdd.name:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册