提交 fafb7998 编写于 作者: 李超

Merge branch 'fix-crop-bug' into 'master'

BUG: fix crop layer bugs.

See merge request !1025
......@@ -175,7 +175,6 @@ SerialNet::SerialNet(const OpRegistryBase *op_registry,
// NHWC -> NCHW
input_shape =
TransposeShape<index_t, index_t>(input_shape, {0, 3, 1, 2});
input_data_format = DataFormat::NCHW;
}
}
}
......
......@@ -15,21 +15,34 @@
#include <memory>
#include "mace/core/operator.h"
#include "mace/utils/math.h"
#include "mace/utils/memory.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/ops/opencl/image/crop.h"
#endif // MACE_ENABLE_OPENCL
#include "mace/utils/memory.h"
namespace mace {
namespace ops {
template <DeviceType D, class T>
class CropOp : public Operation {
class CropOp;
template <class T>
class CropOp<DeviceType::CPU, T> : public Operation {
public:
explicit CropOp(OpConstructContext *context)
: Operation(context),
axis_(Operation::GetOptionalArg<int>("axis", 2)),
offset_(Operation::GetRepeatedArgs<int>("offset")) {}
offset_(Operation::GetRepeatedArgs<int>("offset")) {
MACE_CHECK(offset_.size() == 4,
"crop op only supports 4-dims inputs now.");
auto has_df = Operation::GetOptionalArg<int>(
"has_data_format", 0);
if (has_df) {
// NHWC -> NCHW
offset_ = TransposeShape<int, int>(offset_, {0, 3, 1, 2});
}
}
MaceStatus Run(OpContext *context) override {
MACE_UNUSED(context);
......@@ -47,21 +60,13 @@ class CropOp : public Operation {
std::vector<index_t> output_shape(input0->shape());
for (index_t i = 0; i < in0_dims; ++i) {
int32_t crop_offset = 0;
index_t new_size = input0->dim(i);
if (i >= axis_) {
new_size = input1->dim(i);
if (offset_.size() == 1) {
crop_offset = offset_[0];
} else if (offset_.size() > 1) {
crop_offset = offset_[i - axis_];
}
MACE_CHECK(input0->dim(i) - crop_offset >= input1->dim(i))
<< "the crop for dimension" << i << "is out of bound with size"
<< input1->dim(i) << "and offset" << crop_offset;
if (offset_[i] >= 0) {
output_shape[i] = input1->dim(i);
offsets[i] = offset_[i];
MACE_CHECK(input0->dim(i) - offset_[i] >= input1->dim(i))
<< "the crop for dimension " << i << " is out of bound with size "
<< input1->dim(i) << " and offset " << offsets[i];
}
output_shape[i] = new_size;
offsets[i] = crop_offset;
}
MACE_RETURN_IF_ERROR(output->Resize(output_shape));
T *output_data = output->mutable_data<T>();
......@@ -103,7 +108,6 @@ class CropOp : public Operation {
}
private:
const int axis_;
std::vector<int> offset_;
};
......@@ -113,10 +117,9 @@ class CropOp<DeviceType::GPU, T> : public Operation {
public:
explicit CropOp(OpConstructContext *context)
: Operation(context) {
const int axis = Operation::GetOptionalArg<int>("axis", 2);
if (context->device()->gpu_runtime()->UseImageMemory()) {
kernel_ = make_unique<opencl::image::CropKernel<T>>(
axis, Operation::GetRepeatedArgs<int>("offset"));
Operation::GetRepeatedArgs<int>("offset"));
} else {
MACE_NOT_IMPLEMENTED;
}
......
......@@ -21,107 +21,80 @@ namespace test {
namespace {
template <DeviceType D, typename T>
void CropHelper(int iters, int crop_axis, int dim1, int offset) {
void CropHelper(int iters,
const std::vector<index_t> &shape0,
const std::vector<index_t> &shape1,
int crop_axis,
int offset) {
mace::testing::StopTiming();
OpsTestNet net;
OpDefBuilder("Crop", "CropBM")
.Input("Input0")
.Input("Input1")
.AddIntArg("axis", crop_axis)
.AddIntsArg("offset", {offset})
.Output("Output")
.Finalize(net.NewOperatorDef());
// Add input data
const int kDim0 = 100;
net.AddRandomInput<DeviceType::CPU, T>("Input0", {1, kDim0, dim1, dim1, });
net.AddRandomInput<DeviceType::CPU, T>("Input1",
{1, kDim0 / 2, dim1 / 2, dim1 / 2});
std::vector<int> offsets(4, -1);
// Warm-up
for (int i = 0; i < 5; ++i) {
net.RunOp(D);
for (int i = crop_axis; i < 4; ++i) {
offsets[i] = offset;
}
const int64_t tot = static_cast<int64_t>(iters) * kDim0 * dim1 * dim1;
testing::BytesProcessed(tot * sizeof(T));
mace::testing::StartTiming();
while (iters--) {
net.RunOp(D);
}
}
} // namespace
#define MACE_BM_CROP_CPU_MACRO(AXIS, DIM, OFFSET) \
static void MACE_BM_CROP_CPU_##AXIS##_##DIM##_##OFFSET(int iters) { \
CropHelper<DeviceType::CPU, float>(iters, AXIS, DIM, OFFSET); \
} \
MACE_BENCHMARK(MACE_BM_CROP_CPU_##AXIS##_##DIM##_##OFFSET)
MACE_BM_CROP_CPU_MACRO(1, 256, 3);
MACE_BM_CROP_CPU_MACRO(2, 256, 3);
MACE_BM_CROP_CPU_MACRO(3, 512, 3);
MACE_BM_CROP_CPU_MACRO(2, 512, 6);
namespace {
template <typename T>
void OpenCLCropHelper(int iters,
const std::vector<index_t> &shape0,
const std::vector<index_t> &shape1,
int crop_axis,
int offset) {
mace::testing::StopTiming();
OpsTestNet net;
// Add input data
net.AddRandomInput<DeviceType::GPU, float>("Input0", shape0);
net.AddRandomInput<DeviceType::GPU, float>("Input1", shape1);
if (D == DeviceType::CPU) {
auto input_shape0 = TransposeShape<index_t, index_t>(shape0, {0, 3, 1, 2});
auto input_shape1 = TransposeShape<index_t, index_t>(shape1, {0, 3, 1, 2});
net.AddRandomInput<D, float>("Input0", input_shape0);
net.AddRandomInput<D, float>("Input1", input_shape1);
} else if (D == DeviceType::GPU) {
// Add input data
net.AddRandomInput<D, T>("Input0", shape0);
net.AddRandomInput<D, T>("Input1", shape1);
} else {
MACE_NOT_IMPLEMENTED;
}
OpDefBuilder("Crop", "CropBM")
.Input("Input0")
.Input("Input1")
.AddIntArg("axis", crop_axis)
.AddIntsArg("offset", {offset})
.AddIntsArg("offset", offsets)
.AddIntArg("has_data_format", 1)
.Output("Output")
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Finalize(net.NewOperatorDef());
// Warm-up
for (int i = 0; i < 5; ++i) {
net.RunOp(DeviceType::GPU);
net.Setup(D);
for (int i = 0; i < 1; ++i) {
net.Run();
}
const int64_t tot =
static_cast<int64_t>(iters) *
(net.GetTensor("Input0")->size() + net.GetTensor("Input1")->size());
(net.GetTensor("Input0")->size());
testing::BytesProcessed(tot * sizeof(T));
mace::testing::StartTiming();
while (iters--) {
net.RunOp(DeviceType::GPU);
net.Run();
}
}
} // namespace
#define MACE_BM_CROP_GPU_MACRO(N, H, W, C, AXIS, OFFSET, TYPE) \
static void MACE_BM_CROP_GPU_##N##_##H##_##W##_##C##_##AXIS##_##OFFSET##\
_##TYPE(int iters) { \
std::vector<index_t> shape0 = {N, H, W, C}; \
std::vector<index_t> shape1 = {N / 2, H / 2, W / 2, C / 2}; \
OpenCLCropHelper<TYPE>(iters, shape0, shape1, AXIS, OFFSET); \
} \
MACE_BENCHMARK(MACE_BM_CROP_GPU_##N##_##H##_##W##_##C##_##AXIS##_##OFFSET\
##_##TYPE)
MACE_BM_CROP_GPU_MACRO(4, 32, 32, 32, 2, 4, float);
MACE_BM_CROP_GPU_MACRO(8, 32, 32, 64, 1, 0, float);
MACE_BM_CROP_GPU_MACRO(8, 32, 32, 128, 0, 0, float);
MACE_BM_CROP_GPU_MACRO(8, 32, 32, 256, 2, 4, float);
#define MACE_BM_CROP_MACRO(N, H, W, C, AXIS, OFFSET, DEVICE, TYPE) \
static void MACE_BM_CROP_##N##_##H##_##W##_##C##_##AXIS##_##OFFSET## \
_##DEVICE##_##TYPE(int iters) { \
std::vector<index_t> shape0 = {N, H, W, C}; \
std::vector<index_t> shape1 = {N / 2, H / 2, W / 2, C / 2}; \
CropHelper<DEVICE, TYPE>(iters, shape0, shape1, AXIS, OFFSET); \
} \
MACE_BENCHMARK(MACE_BM_CROP_##N##_##H##_##W##_##C##_##AXIS##_##OFFSET\
##_##DEVICE##_##TYPE)
#define MACE_BM_CROP(N, H, W, C, AXIS, OFFSET) \
MACE_BM_CROP_MACRO(N, H, W, C, AXIS, OFFSET, CPU, float); \
MACE_BM_CROP_MACRO(N, H, W, C, AXIS, OFFSET, GPU, float); \
MACE_BM_CROP_MACRO(N, H, W, C, AXIS, OFFSET, GPU, half);
MACE_BM_CROP(4, 32, 32, 32, 2, 4);
MACE_BM_CROP(8, 32, 32, 64, 1, 0);
MACE_BM_CROP(8, 32, 32, 128, 0, 0);
MACE_BM_CROP(8, 32, 32, 256, 2, 4);
MACE_BM_CROP_GPU_MACRO(4, 32, 32, 32, 2, 4, half);
MACE_BM_CROP_GPU_MACRO(8, 32, 32, 64, 1, 0, half);
MACE_BM_CROP_GPU_MACRO(8, 32, 32, 128, 0, 0, half);
MACE_BM_CROP_GPU_MACRO(8, 32, 32, 256, 2, 4, half);
} // namespace test
} // namespace ops
......
......@@ -26,7 +26,6 @@ void RunCrop(const std::vector<index_t> &input_shape,
const std::vector<float> &input_data,
const std::vector<index_t> &input_shape2,
const std::vector<int> &offset,
const int axis,
const std::vector<index_t> &expected_shape,
const std::vector<float> &expected_data) {
OpsTestNet net;
......@@ -39,7 +38,7 @@ void RunCrop(const std::vector<index_t> &input_shape,
.Input("Input1")
.Output("Output")
.AddIntsArg("offset", offset)
.AddIntArg("axis", axis)
.AddIntArg("has_data_format", 1)
.Finalize(net.NewOperatorDef());
} else if (D == CPU) {
net.TransformDataFormat<DeviceType::CPU, float>("Input0",
......@@ -55,7 +54,7 @@ void RunCrop(const std::vector<index_t> &input_shape,
.Input("InputNCHW1")
.Output("OutputNCHW")
.AddIntsArg("offset", offset)
.AddIntArg("axis", axis)
.AddIntArg("has_data_format", 1)
.Finalize(net.NewOperatorDef());
}
......@@ -113,7 +112,7 @@ TEST_F(CropTest, SimpleCPU) {
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0,
3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0,
4.0, 4.0, 4.0}, {1, 5, 5, 3}, {2, 2}, 2,
4.0, 4.0, 4.0}, {1, 5, 5, 3}, {-1, 2, 2, -1},
{1, 5, 5, 3},
{1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0,
2.0, 2.0, 2.0, 3.0, 3.0, 3.0,
......@@ -168,7 +167,7 @@ TEST_F(CropTest, SimpleGPU) {
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0,
3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0,
4.0, 4.0, 4.0}, {1, 5, 5, 3}, {2, 2}, 2,
4.0, 4.0, 4.0}, {1, 5, 5, 3}, {-1, 2, 2, -1},
{1, 5, 5, 3},
{1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0,
2.0, 2.0, 2.0, 3.0, 3.0, 3.0,
......
......@@ -34,16 +34,14 @@ template <typename T>
class CropKernel : public OpenCLCropKernel {
public:
explicit CropKernel(
const int axis,
const std::vector<int> &offset)
: axis_(axis), offset_(offset) {}
: offset_(offset) {}
MaceStatus Compute(
OpContext *context,
const std::vector<const Tensor *> &input_list,
Tensor *output) override;
private:
const int axis_;
std::vector<int> offset_;
cl::Kernel kernel_;
uint32_t kwg_size_;
......@@ -68,57 +66,14 @@ MaceStatus CropKernel<T>::Compute(
std::vector<int32_t> offsets(4, 0);
std::vector<index_t> output_shape(input0->shape());
switch (axis_) {
case 0:
if (offset_.size() == 1) {
offsets[0] = offset_[0];
offsets[1] = offset_[0];
offsets[2] = offset_[0];
offsets[3] = offset_[0];
} else if (offset_.size() == 4) {
offsets[0] = offset_[0];
offsets[1] = offset_[2];
offsets[2] = offset_[3];
offsets[3] = offset_[1];
}
for (int i = 0; i < 4; ++i) {
output_shape[i] = input1->dim(i);
}
break;
case 1:
if (offset_.size() == 1) {
offsets[1] = offset_[0];
offsets[2] = offset_[0];
offsets[3] = offset_[0];
} else if (offset_.size() == 3) {
offsets[1] = offset_[1];
offsets[2] = offset_[2];
offsets[3] = offset_[0];
}
for (int i = 1; i < 4; ++i) {
output_shape[i] = input1->dim(i);
}
break;
case 2:
if (offset_.size() == 1) {
offsets[1] = offset_[0];
offsets[2] = offset_[0];
} else if (offset_.size() == 2) {
offsets[1] = offset_[0];
offsets[2] = offset_[1];
}
output_shape[1] = input1->dim(1);
output_shape[2] = input1->dim(2);
break;
case 3:
if (offset_.size() == 1) {
offsets[2] = offset_[0];
}
output_shape[2] = input1->dim(2);
break;
default:
MACE_CHECK(axis_ >= 0 && axis_ < 4, "axis is out of boundary.");
break;
for (index_t i = 0; i < in0_dims; ++i) {
if (offset_[i] >= 0) {
output_shape[i] = input1->dim(i);
offsets[i] = offset_[i];
MACE_CHECK(input0->dim(i) - offset_[i] >= input1->dim(i))
<< "the crop for dimension " << i << " is out of bound with size "
<< input1->dim(i) << " and offset " << offsets[i];
}
}
MACE_CHECK(offsets[3] % 4 == 0,
"MACE opencl only supports cropping channel"
......
......@@ -552,18 +552,20 @@ class CaffeConverter(base_converter.ConverterInterface):
param = caffe_op.layer.crop_param
op.type = MaceOp.Crop.name
axis_arg = op.arg.add()
axis_arg.name = MaceKeyword.mace_axis_str
axis_arg.i = 2
if param.HasField(MaceKeyword.mace_axis_str):
axis_arg.i = param.axis
axis_arg.i = 4 + axis_arg.i if axis_arg.i < 0 else axis_arg.i
axis = param.axis
axis = 4 + axis if axis < 0 else axis
offset_value = -1 * np.ones(4, dtype=np.int32)
offset_len = len(param.offset)
if offset_len == 1:
while axis < 4:
offset_value[axis] = param.offset[0]
axis += 1
else:
offset_value[axis:] = param.offset
offset_arg = op.arg.add()
offset_arg.name = MaceKeyword.mace_offset_str
if len(param.offset) > 0:
offset_arg.ints.extend(list(param.offset))
else:
offset_arg.i = 0
offset_arg.ints.extend(offset_value)
def convert_concat(self, caffe_op):
op = self.convert_general_op(caffe_op)
......
......@@ -224,7 +224,12 @@ class ShapeInference(object):
def infer_shape_crop(self, op):
mace_check(len(op.input) == 2, "crop layer needs two inputs")
output_shape = self._output_shape_cache[op.input[1]]
output_shape = self._output_shape_cache[op.input[0]]
input1_shape = self._output_shape_cache[op.input[1]]
offsets = ConverterUtil.get_arg(op, MaceKeyword.mace_offset_str).ints
for i in range(len(offsets)):
if offsets[i] >= 0:
output_shape[i] = input1_shape[i]
self.add_output_shape(op, [output_shape])
def infer_shape_channel_shuffle(self, op):
......
......@@ -1012,7 +1012,8 @@ class Transformer(base_converter.ConverterInterface):
elif filter_format == DataFormat.OIHW:
weight.dims[:] = weight.dims[:] + [1, 1]
else:
mace_check("FC does not support filter format %s",
mace_check(False,
"FC does not support filter format %s" %
filter_format.name)
return False
......@@ -1084,6 +1085,16 @@ class Transformer(base_converter.ConverterInterface):
new_axises.sort()
arg.ints[:] = []
arg.ints.extend(new_axises)
elif op.type == MaceOp.Crop.name:
offset_arg = ConverterUtil.get_arg(op,
MaceKeyword.mace_offset_str)
mace_check(offset_arg and
ConverterUtil.data_format(op) == DataFormat.NCHW and
len(op.output_shape[0].dims) == 4,
"MACE only support crop with NCHW format")
print("Transpose crop args: %s(%s)"
% (op.name, op.type))
self.transpose_shape(offset_arg.ints, [0, 2, 3, 1])
# transpose op output shape
data_format = ConverterUtil.data_format(op)
......@@ -1147,7 +1158,7 @@ class Transformer(base_converter.ConverterInterface):
elif filter_format == DataFormat.OIHW:
transpose_order = [0, 2, 3, 1]
else:
mace_check("Quantize model does not support conv "
mace_check(False, "Quantize model does not support conv "
"filter format: %s" % filter_format.name)
for op in net.op:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册