提交 e9d1103b 编写于 作者: L liutuo

fold shape_stridedslice_stack

上级 50666a81
......@@ -90,20 +90,18 @@ void Deconv2dNCHW(const T *input,
} // namespace deconv
struct Deconv2dFunctorBase {
Deconv2dFunctorBase(const int *strides,
Deconv2dFunctorBase(const std::vector<int> &strides,
const Padding &padding_type,
const std::vector<int> &paddings,
const std::vector<index_t> &output_shape,
const ActivationType activation,
const float relux_max_limit,
const bool from_caffe)
const float relux_max_limit)
: strides_(strides),
padding_type_(padding_type),
paddings_(paddings),
output_shape_(output_shape),
activation_(activation),
relux_max_limit_(relux_max_limit),
from_caffe_(from_caffe) {}
relux_max_limit_(relux_max_limit) {}
static void CalcDeconvOutputSize(
const index_t *input_shape, // NHWC
......@@ -202,31 +200,28 @@ struct Deconv2dFunctorBase {
padding_size[1] = std::max<int>(0, p_w);
}
const int *strides_; // [stride_h, stride_w]
std::vector<int> strides_; // [stride_h, stride_w]
const Padding padding_type_;
std::vector<int> paddings_;
std::vector<index_t> output_shape_;
const ActivationType activation_;
const float relux_max_limit_;
const bool from_caffe_;
};
template <DeviceType D, typename T>
struct Deconv2dFunctor : Deconv2dFunctorBase {
Deconv2dFunctor(const int *strides,
Deconv2dFunctor(const std::vector<int> &strides,
const Padding &padding_type,
const std::vector<int> &paddings,
const std::vector<index_t> &output_shape,
const ActivationType activation,
const float relux_max_limit,
const bool from_caffe)
const float relux_max_limit)
: Deconv2dFunctorBase(strides,
padding_type,
paddings,
output_shape,
activation,
relux_max_limit,
from_caffe) {}
relux_max_limit) {}
MaceStatus operator()(const Tensor *input, // NCHW
const Tensor *filter, // OIHW
......@@ -239,13 +234,12 @@ struct Deconv2dFunctor : Deconv2dFunctorBase {
MACE_CHECK_NOTNULL(filter);
MACE_CHECK_NOTNULL(output);
if (!from_caffe_) { // tensorflow
std::vector<index_t> output_shape(4);
std::vector<int> paddings(2);
std::vector<index_t> output_shape(4);
if (paddings_.empty()) { // tensorflow
paddings = std::vector<int>(2, 0);
if (output_shape_.size() == 4) {
output_shape[0] = output_shape_[0];
output_shape[1] = output_shape_[3];
output_shape[2] = output_shape_[1];
output_shape[3] = output_shape_[2];
output_shape = output_shape_;
} else {
MACE_CHECK_NOTNULL(output_shape_tensor);
MACE_CHECK(output_shape_tensor->size() == 4);
......@@ -255,36 +249,38 @@ struct Deconv2dFunctor : Deconv2dFunctorBase {
output_shape =
std::vector<index_t>(output_shape_data, output_shape_data + 4);
}
paddings_.clear();
paddings_ = std::vector<int>(2, 0);
const index_t t = output_shape[1];
output_shape[1] = output_shape[3];
output_shape[3] = output_shape[2];
output_shape[2] = t;
CalcDeconvPaddingAndInputSize(
input->shape().data(),
filter->shape().data(),
strides_, padding_type_,
strides_.data(), padding_type_,
output_shape.data(),
paddings_.data(), true);
MACE_RETURN_IF_ERROR(output->Resize(output_shape));
paddings.data(), true);
} else { // caffe
output_shape_.clear();
output_shape_ = std::vector<index_t>(4, 0);
paddings = paddings_;
output_shape = std::vector<index_t>(4, 0);
CalcDeconvOutputSize(input->shape().data(),
filter->shape().data(),
strides_,
output_shape_.data(),
paddings_.data(), true);
MACE_RETURN_IF_ERROR(output->Resize(output_shape_));
strides_.data(),
output_shape.data(),
paddings.data(), true);
}
MACE_RETURN_IF_ERROR(output->Resize(output_shape));
index_t kernel_h = filter->dim(2);
index_t kernel_w = filter->dim(3);
const index_t *in_shape = input->shape().data();
const index_t *out_shape = output->shape().data();
const index_t kernel_hw[2] = {kernel_h, kernel_w};
MACE_CHECK(filter->dim(0) == out_shape[1], filter->dim(0), " != ",
out_shape[1]);
MACE_CHECK(filter->dim(0) == output_shape[1], filter->dim(0), " != ",
output_shape[1]);
MACE_CHECK(filter->dim(1) == in_shape[1], filter->dim(1), " != ",
in_shape[1]);
MACE_CHECK(in_shape[0] == out_shape[0], "Input/Output batch size mismatch");
MACE_CHECK(in_shape[0] == output_shape[0],
"Input/Output batch size mismatch");
Tensor::MappingGuard input_mapper(input);
Tensor::MappingGuard filter_mapper(filter);
Tensor::MappingGuard bias_mapper(bias);
......@@ -294,15 +290,15 @@ struct Deconv2dFunctor : Deconv2dFunctorBase {
auto bias_data = bias == nullptr ? nullptr : bias->data<T>();
auto output_data = output->mutable_data<T>();
int padding[2];
padding[0] = (paddings_[0] + 1) >> 1;
padding[1] = (paddings_[1] + 1) >> 1;
padding[0] = (paddings[0] + 1) >> 1;
padding[1] = (paddings[1] + 1) >> 1;
deconv::Deconv2dNCHW(input_data,
filter_data,
bias_data,
in_shape,
out_shape,
output_shape.data(),
kernel_hw,
strides_,
strides_.data(),
padding,
output_data);
......@@ -319,20 +315,18 @@ struct Deconv2dFunctor : Deconv2dFunctorBase {
#ifdef MACE_ENABLE_OPENCL
template <typename T>
struct Deconv2dFunctor<DeviceType::GPU, T> : Deconv2dFunctorBase {
Deconv2dFunctor(const int *strides,
Deconv2dFunctor(const std::vector<int> &strides,
const Padding &padding_type,
const std::vector<int> &paddings,
const std::vector<index_t> &output_shape,
const ActivationType activation,
const float relux_max_limit,
const bool from_caffe)
const float relux_max_limit)
: Deconv2dFunctorBase(strides,
padding_type,
paddings,
output_shape,
activation,
relux_max_limit,
from_caffe) {}
relux_max_limit) {}
MaceStatus operator()(const Tensor *input,
const Tensor *filter,
......
......@@ -15,8 +15,10 @@ __kernel void deconv_2d(KERNEL_ERROR_PARAMS
__private const int out_height,
__private const int out_width,
__private const int out_channel,
__private const int stride,
__private const float stride_r,
__private const int stride_h,
__private const int stride_w,
__private const float stride_h_r,
__private const float stride_w_r,
__private const int align_h,
__private const int align_w,
__private const int padding_h,
......@@ -53,18 +55,18 @@ __kernel void deconv_2d(KERNEL_ERROR_PARAMS
DATA_TYPE4 out4 = 0;
#endif
const int n_stride = mad(w_id, stride_r, 0);
const int mod_stride = w_id - mul24(n_stride, stride);
const int w = mad24(mul24(n_stride, 5), stride, mod_stride);
const int n_stride = mad(w_id, stride_w_r, 0);
const int mod_stride = w_id - mul24(n_stride, stride_w);
const int w = mad24(mul24(n_stride, 5), stride_w, mod_stride);
const int b = hb / out_height;
const int h = hb - mul24(b, out_height);
if (w < out_width) {
int start_x = floor((float) (w + align_w) * stride_r);
int start_y = (h + align_h) * stride_r;
int start_x = floor((float) (w + align_w) * stride_w_r);
int start_y = (h + align_h) * stride_h_r;
start_y = max(0, start_y);
int f_start_x = mad24(start_x, stride, padding_w) - w;
int f_start_y = mad24(start_y, stride, padding_h) - h;
int f_start_x = mad24(start_x, stride_w, padding_w) - w;
int f_start_y = mad24(start_y, stride_h, padding_h) - h;
f_start_x = kernel_w - 1 - f_start_x;
f_start_y = kernel_h - 1 - f_start_y;
......@@ -79,10 +81,10 @@ __kernel void deconv_2d(KERNEL_ERROR_PARAMS
f_pos_x1 = f_pos_x0 + 1;
f_pos_x2 = f_pos_x0 + 2;
f_pos_x3 = f_pos_x0 + 3;
for (int f_y = f_start_y, idx_h = start_y ; f_y >= 0; f_y -= stride, ++idx_h) {
for (int f_y = f_start_y, idx_h = start_y ; f_y >= 0; f_y -= stride_h, ++idx_h) {
index_y = mad24(b, in_height, idx_h);
in_pos.y = select(index_y, -1, idx_h < 0 || idx_h >= in_height);
for (int f_x = f_start_x, idx_w = start_x; f_x >= 0; f_x -= stride, ++idx_w) {
for (int f_x = f_start_x, idx_w = start_x; f_x >= 0; f_x -= stride_w, ++idx_w) {
f_pos_y = mad24(f_y, kernel_w, f_x);
f_pos_y = mad24(c, kernel_size, f_pos_y);
weight0 = READ_IMAGET(weights, SAMPLER, (int2)(f_pos_x0, f_pos_y));
......@@ -141,24 +143,24 @@ __kernel void deconv_2d(KERNEL_ERROR_PARAMS
out_pos.x = mad24(c, out_width, ow);
WRITE_IMAGET(output, out_pos, out0);
ow += stride;
ow += stride_w;
if (ow >= out_width) return;
out_pos.x += stride;
out_pos.x += stride_w;
WRITE_IMAGET(output, out_pos, out1);
ow += stride;
ow += stride_w;
if (ow >= out_width) return;
out_pos.x += stride;
out_pos.x += stride_w;
WRITE_IMAGET(output, out_pos, out2);
ow += stride;
ow += stride_w;
if (ow >= out_width) return;
out_pos.x += stride;
out_pos.x += stride_w;
WRITE_IMAGET(output, out_pos, out3);
ow += stride;
ow += stride_w;
if (ow >= out_width) return;
out_pos.x += stride;
out_pos.x += stride_w;
WRITE_IMAGET(output, out_pos, out4);
}
}
\ No newline at end of file
......@@ -24,7 +24,7 @@ MaceStatus Deconv2dOpencl(cl::Kernel *kernel,
const Tensor *input,
const Tensor *filter,
const Tensor *bias,
const int stride,
const int *strides,
const int *paddings,
const ActivationType activation,
const float relux_max_limit,
......@@ -42,17 +42,20 @@ MaceStatus Deconv2dOpencl(cl::Kernel *kernel,
const index_t channel_blocks = RoundUpDiv4(channels);
const index_t input_channel_blocks = RoundUpDiv4(input_channels);
MACE_CHECK(stride > 0, "stride should > 0.");
const int stride_h = strides[0];
const int stride_w = strides[1];
MACE_CHECK(stride_w > 0 && stride_h > 0, "strides should be > 0.");
#define MACE_WIDTH_BLK 5
const index_t n_strides = (width + stride - 1) / stride;
const index_t n_strides = (width + stride_w - 1) / stride_w;
const index_t width_blocks =
((n_strides + MACE_WIDTH_BLK - 1) / MACE_WIDTH_BLK) * stride;
const float stride_r = 1.f / static_cast<float>(stride);
((n_strides + MACE_WIDTH_BLK - 1) / MACE_WIDTH_BLK) * stride_w;
const float stride_h_r = 1.f / static_cast<float>(stride_h);
const float stride_w_r = 1.f / static_cast<float>(stride_w);
const int padding_h = (paddings[0] + 1) >> 1;
const int padding_w = (paddings[0] + 1) >> 1;
const int padding_w = (paddings[1] + 1) >> 1;
const int align_h = stride - 1 - padding_h;
const int align_w = stride - 1 - padding_w;
const int align_h = stride_h - 1 - padding_h;
const int align_w = stride_w - 1 - padding_w;
const int kernel_size = filter->dim(2) * filter->dim(3);
auto runtime = OpenCLRuntime::Global();
......@@ -113,8 +116,10 @@ MaceStatus Deconv2dOpencl(cl::Kernel *kernel,
kernel->setArg(idx++, static_cast<int32_t>(height));
kernel->setArg(idx++, static_cast<int32_t>(width));
kernel->setArg(idx++, static_cast<int32_t>(channels));
kernel->setArg(idx++, static_cast<int32_t>(stride));
kernel->setArg(idx++, stride_r);
kernel->setArg(idx++, static_cast<int32_t>(stride_h));
kernel->setArg(idx++, static_cast<int32_t>(stride_w));
kernel->setArg(idx++, stride_h_r);
kernel->setArg(idx++, stride_w_r);
kernel->setArg(idx++, static_cast<int32_t>(align_h));
kernel->setArg(idx++, static_cast<int32_t>(align_w));
kernel->setArg(idx++, static_cast<int32_t>(padding_h));
......@@ -152,34 +157,43 @@ MaceStatus Deconv2dFunctor<DeviceType::GPU, T>::operator()(
MACE_CHECK_NOTNULL(input);
MACE_CHECK_NOTNULL(filter);
MACE_CHECK_NOTNULL(output);
if (!from_caffe_) {
std::vector<int> paddings(2);
std::vector<index_t> output_shape(4);
if (paddings_.empty()) {
paddings = std::vector<int>(2, 0);
if (output_shape_.size() != 4) {
MACE_CHECK_NOTNULL(output_shape_tensor);
MACE_CHECK(output_shape_tensor->size() == 4);
Tensor::MappingGuard output_shape_mapper(output_shape_tensor);
auto output_shape_data =
output_shape_tensor->data<int32_t>();
output_shape_ =
output_shape =
std::vector<index_t>(output_shape_data, output_shape_data + 4);
} else {
output_shape = output_shape_;
}
paddings_.clear();
paddings_ = std::vector<int>(2, 0);
CalcDeconvPaddingAndInputSize(input->shape().data(), filter->shape().data(),
strides_, padding_type_, output_shape_.data(),
paddings_.data());
CalcDeconvPaddingAndInputSize(input->shape().data(),
filter->shape().data(),
strides_.data(),
padding_type_,
output_shape.data(),
paddings.data());
} else {
output_shape_.clear();
output_shape_ = std::vector<index_t>(4, 0);
CalcDeconvOutputSize(input->shape().data(), filter->shape().data(),
strides_, output_shape_.data(), paddings_.data());
paddings = paddings_;
output_shape = std::vector<index_t>(4, 0);
CalcDeconvOutputSize(input->shape().data(),
filter->shape().data(),
strides_.data(),
output_shape.data(),
paddings.data());
}
std::vector<size_t> output_image_shape;
CalImage2DShape(output_shape_, BufferType::IN_OUT_CHANNEL,
CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL,
&output_image_shape);
MACE_RETURN_IF_ERROR(output->ResizeImage(output_shape_, output_image_shape));
MACE_RETURN_IF_ERROR(output->ResizeImage(output_shape, output_image_shape));
return Deconv2dOpencl(&kernel_, input, filter, bias, strides_[0],
paddings_.data(), activation_, relux_max_limit_,
return Deconv2dOpencl(&kernel_, input, filter, bias, strides_.data(),
paddings.data(), activation_, relux_max_limit_,
DataTypeToEnum<T>::value, &input_shape_, output, future,
&kwg_size_, &kernel_error_);
}
......
......@@ -19,23 +19,22 @@
#include "mace/core/operator.h"
#include "mace/kernels/deconv_2d.h"
#include "mace/ops/conv_pool_2d_base.h"
namespace mace {
namespace ops {
template <DeviceType D, typename T>
class Deconv2dOp : public ConvPool2dOpBase<D, T> {
class Deconv2dOp : public Operator<D, T> {
public:
Deconv2dOp(const OperatorDef &op_def, Workspace *ws)
: ConvPool2dOpBase<D, T>(op_def, ws),
functor_(this->strides_.data(),
this->padding_type_,
this->paddings_,
: Operator<D, T>(op_def, ws),
functor_(OperatorBase::GetRepeatedArgs<int>("strides"),
static_cast<Padding>(OperatorBase::GetOptionalArg<int>(
"padding", static_cast<int>(SAME))),
OperatorBase::GetRepeatedArgs<int>("padding_values"),
OperatorBase::GetRepeatedArgs<index_t>("output_shape"),
kernels::ActivationType::NOOP,
0.0f,
OperatorBase::GetOptionalArg<bool>("from_caffe", false)) {}
0.0f) {}
MaceStatus Run(StatsFuture *future) override {
const Tensor *input = this->Input(INPUT);
......
......@@ -41,7 +41,6 @@ void RunTestSimple(const std::vector<index_t> &input_shape,
net.AddInputFromArray<D, float>("Input", input_shape, input_data);
net.AddInputFromArray<D, float>("Filter", filter_shape, filter_data);
net.TransformDataFormat<D, float>("Filter", HWOI, "FilterOIHW", OIHW);
bool from_caffe = output_shape.size() != 4;
if (D == DeviceType::GPU) {
BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
......@@ -55,7 +54,6 @@ void RunTestSimple(const std::vector<index_t> &input_shape,
.AddIntArg("padding", padding)
.AddIntsArg("padding_values", padding_size)
.AddIntsArg("output_shape", output_shape)
.AddIntArg("from_caffe", from_caffe)
.Finalize(net.NewOperatorDef());
net.RunOp(D);
......@@ -74,7 +72,6 @@ void RunTestSimple(const std::vector<index_t> &input_shape,
.AddIntArg("padding", padding)
.AddIntsArg("padding_values", padding_size)
.AddIntsArg("output_shape", output_shape)
.AddIntArg("from_caffe", from_caffe)
.Finalize(net.NewOperatorDef());
// Run
net.RunOp(D);
......@@ -89,7 +86,7 @@ void RunTestSimple(const std::vector<index_t> &input_shape,
template <DeviceType D>
void TestNHWCSimple3x3SAME_S1() {
RunTestSimple<D>({1, 3, 3, 1}, {1, 1, 1, 1, 1, 1, 1, 1, 1}, 1, Padding::SAME,
{0, 0}, {1, 3, 3, 3}, {3, 3, 3, 1},
{}, {1, 3, 3, 3}, {3, 3, 3, 1},
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
{1, 3, 3, 3}, {4, 4, 4, 6, 6, 6, 4, 4, 4, 6, 6, 6, 9, 9,
......@@ -101,7 +98,7 @@ void TestNHWCSimple3x3SAME_S1() {
{1, 3, 3, 3}, {4, 4, 4, 6, 6, 6, 4, 4, 4, 6, 6, 6, 9, 9,
9, 6, 6, 6, 4, 4, 4, 6, 6, 6, 4, 4, 4});
RunTestSimple<D>({1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, 1, Padding::SAME,
{0, 0}, {1, 3, 3, 3}, {3, 3, 3, 1},
{}, {1, 3, 3, 3}, {3, 3, 3, 1},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27},
{1, 3, 3, 3}, {54, 66, 78, 126, 147, 168, 130, 146, 162,
......@@ -119,7 +116,7 @@ void TestNHWCSimple3x3SAME_S1() {
template <DeviceType D>
void TestNHWCSimple3x3SAME_S2() {
RunTestSimple<D>(
{1, 3, 3, 1}, {1, 1, 1, 1, 1, 1, 1, 1, 1}, 2, Padding::SAME, {0, 0},
{1, 3, 3, 1}, {1, 1, 1, 1, 1, 1, 1, 1, 1}, 2, Padding::SAME, {},
{1, 6, 6, 3}, {3, 3, 3, 1}, {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
{1, 6, 6, 3},
......@@ -137,7 +134,7 @@ void TestNHWCSimple3x3SAME_S2() {
1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 4, 4, 4, 2, 2, 2, 4, 4, 4,
2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1});
RunTestSimple<D>(
{1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, 2, Padding::SAME, {0, 0},
{1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, 2, Padding::SAME, {},
{1, 6, 6, 3}, {3, 3, 3, 1},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27},
......@@ -167,7 +164,7 @@ template <DeviceType D>
void TestNHWCSimple3x3SAME_S2_1() {
RunTestSimple<D>(
{1, 3, 3, 1}, {12, 18, 12, 18, 27, 18, 12, 18, 12}, 2, Padding::SAME,
{0, 0}, {1, 5, 5, 3}, {3, 3, 3, 1},
{}, {1, 5, 5, 3}, {3, 3, 3, 1},
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
{1, 5, 5, 3},
......@@ -181,7 +178,7 @@ void TestNHWCSimple3x3SAME_S2_1() {
template <DeviceType D>
void TestNHWCSimple3x3VALID_S2() {
RunTestSimple<D>(
{1, 3, 3, 1}, {1, 1, 1, 1, 1, 1, 1, 1, 1}, 2, Padding::VALID, {0, 0},
{1, 3, 3, 1}, {1, 1, 1, 1, 1, 1, 1, 1, 1}, 2, Padding::VALID, {},
{1, 7, 7, 3}, {3, 3, 3, 1}, {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
{1, 7, 7, 3},
......@@ -197,7 +194,7 @@ void TestNHWCSimple3x3VALID_S2() {
template <DeviceType D>
void TestNHWCSimple3x3VALID_S1() {
RunTestSimple<D>(
{1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, 1, Padding::VALID, {0, 0},
{1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, 1, Padding::VALID, {},
{1, 5, 5, 3}, {3, 3, 3, 1},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27},
......@@ -212,7 +209,7 @@ void TestNHWCSimple3x3VALID_S1() {
template <DeviceType D>
void TestNHWCSimple2x2SAME() {
RunTestSimple<D>({1, 2, 2, 1}, {1, 1, 1, 1}, 1, Padding::SAME, {0, 0},
RunTestSimple<D>({1, 2, 2, 1}, {1, 1, 1, 1}, 1, Padding::SAME, {},
{1, 2, 2, 1}, {3, 3, 1, 1},
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f},
{1, 2, 2, 1}, {4.f, 4.f, 4.f, 4.f});
......@@ -221,7 +218,7 @@ void TestNHWCSimple2x2SAME() {
template <DeviceType D>
void TestNHWCSimple2x2VALID() {
RunTestSimple<D>(
{1, 2, 2, 1}, {1, 1, 1, 1}, 2, Padding::VALID, {0, 0}, {1, 5, 5, 1},
{1, 2, 2, 1}, {1, 1, 1, 1}, 2, Padding::VALID, {}, {1, 5, 5, 1},
{3, 3, 1, 1}, {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f},
{1, 5, 5, 1},
{1.f, 1.f, 2.f, 1.f, 1.f, 1.f, 1.f, 2.f, 1.f, 1.f, 2.f, 2.f, 4.f,
......@@ -333,7 +330,6 @@ void TestComplexDeconvNxNS12(const int batch,
paddings.push_back(padding);
paddings.push_back(padding);
}
bool from_caffe = output_shape.size() != 4;
// Construct graph
OpDefBuilder("Deconv2D", "Deconv2dTest")
.Input("InputNCHW")
......@@ -344,7 +340,6 @@ void TestComplexDeconvNxNS12(const int batch,
.AddIntArg("padding", type)
.AddIntsArg("padding_values", paddings)
.AddIntsArg("output_shape", output_shape)
.AddIntArg("from_caffe", from_caffe)
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Finalize(net.NewOperatorDef());
......@@ -375,7 +370,6 @@ void TestComplexDeconvNxNS12(const int batch,
.AddIntArg("padding", type)
.AddIntsArg("padding_values", paddings)
.AddIntsArg("output_shape", output_shape)
.AddIntArg("from_caffe", from_caffe)
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Finalize(net.NewOperatorDef());
// Run on device
......
......@@ -39,8 +39,20 @@ class ShapeOp : public Operator<D, T> {
Tensor::MappingGuard output_guard(output);
int32_t *output_data = output->mutable_data<int32_t>();
for (index_t i = 0; i < input->dim_size(); ++i) {
output_data[i] = input->dim(i);
const int data_format =
OperatorBase::GetOptionalArg<int>("data_format", 0);
if (input->dim_size() == 4 &&
D == DeviceType::CPU &&
data_format == DataFormat::NCHW) {
// transpose NCHW to NHWC for cpu runtime
output_data[0] = static_cast<int32_t>(input->dim(0));
output_data[1] = static_cast<int32_t>(input->dim(2));
output_data[2] = static_cast<int32_t>(input->dim(3));
output_data[3] = static_cast<int32_t>(input->dim(1));
} else {
for (unsigned int i = 0; i < input->dim_size(); ++i) {
output_data[i] = static_cast<int32_t>(input->dim(i));
}
}
SetFutureDefaultWaitFn(future);
......
......@@ -167,7 +167,6 @@ class MaceKeyword(object):
mace_transpose_b_str = 'transpose_b'
mace_op_data_type_str = 'T'
mace_offset_str = 'offset'
mace_from_caffe_str = 'from_caffe'
mace_opencl_max_image_size = "opencl_max_image_size"
mace_seperate_buffer_str = 'seperate_buffer'
mace_scalar_input_index_str = 'scalar_input_index'
......
......@@ -414,10 +414,6 @@ class CaffeConverter(base_converter.ConverterInterface):
op.type = MaceOp.Deconv2D.name
from_caffe_arg = op.arg.add()
from_caffe_arg.name = MaceKeyword.mace_from_caffe_str
from_caffe_arg.i = 1
self.add_stride_pad_kernel_arg(param, op)
# dilation is specific for convolution in caffe
dilations = [1, 1]
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册