提交 3a424351 编写于 作者: L liuqi

Refactor: Polish the Eltwise code and add some UTs.

上级 9d3e2cc5
......@@ -14,6 +14,7 @@
#include <vector>
#include "mace/ops/common/conv_pool_2d_util.h"
#include "mace/ops/eltwise.h"
#include "mace/ops/ops_test_util.h"
......@@ -531,6 +532,100 @@ TEST_F(EltwiseOpTest, GPUSimpleTensorTensor) {
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {0, 0, 0, 0, 0, 25, 25, 25, 25, 25});
}
namespace {
template <typename T>
void GPUOverflowTest(const ops::EltwiseType type,
const std::vector<index_t> &shape0,
const std::vector<T> &input0,
const std::vector<index_t> &shape1,
const std::vector<T> &input1,
const std::vector<index_t> &output_shape,
const std::vector<T> &output) {
// Construct graph
OpsTestNet net;
// Add input data
net.AddInputFromArray<DeviceType::GPU, T>("Input0", shape0, input0);
net.AddInputFromArray<DeviceType::GPU, T>("Input1", shape1, input1);
OpDefBuilder("Eltwise", "EltwiseTest")
.AddIntArg("T", DataTypeToEnum<T>::v())
.Input("Input0")
.Input("Input1")
.AddIntArg("type", static_cast<int>(type))
.OutputType({ops::IsLogicalType(type) ? DT_INT32 : DT_FLOAT})
.Output("EltOutput")
.OutputShape(output_shape)
.Finalize(net.AddNewOperatorDef());
net.AddInputFromArray<DeviceType::GPU, T>(
"Filter",
{output_shape.back(), shape0.back(), 3, 3},
std::vector<float>(output_shape.back() * shape0.back() * 9, 1));
OpDefBuilder("Conv2D", "Conv2D")
.AddIntArg("T", DataTypeToEnum<T>::v())
.Input("EltOutput")
.Input("Filter")
.Output("Output")
.OutputShape(output_shape)
.AddIntsArg("strides", {1, 1})
.AddIntArg("padding", Padding::SAME)
.AddIntsArg("dilations", {1, 1})
.Finalize(net.AddNewOperatorDef());
// Run
net.RunOp(DeviceType::GPU);
auto expected = net.CreateTensor<T>(output_shape, output);
ExpectTensorNear<T>(*expected, *net.GetOutput("Output"), 1e-5);
}
} // namespace
TEST_F(EltwiseOpTest, GPUOverflowTest) {
GPUOverflowTest<float>(
ops::EltwiseType::SUM, {1, 2, 2, 2}, std::vector<float>(8, 1),
{1, 1, 1, 2}, {1, 1},
{1, 2, 2, 1}, {16, 16, 16, 16});
GPUOverflowTest<float>(
ops::EltwiseType::SUB, {2, 2, 2, 2}, std::vector<float>(16, 1),
{2, 1, 1, 2}, {1, 1, 2, 2},
{2, 2, 2, 1}, {0, 0, 0, 0, -8, -8, -8, -8});
GPUOverflowTest<float>(
ops::EltwiseType::PROD, {1, 3, 2, 1}, std::vector<float>(6, 1),
{1, 3, 2, 1}, std::vector<float>(6, 1),
{1, 3, 2, 1}, {4, 4, 6, 6, 4, 4});
GPUOverflowTest<float>(
ops::EltwiseType::DIV, {2, 3, 2, 1}, std::vector<float>(12, 1),
{2, 3, 2, 1}, std::vector<float>(12, 1),
{2, 3, 2, 1}, {4, 4, 6, 6, 4, 4, 4, 4, 6, 6, 4, 4});
GPUOverflowTest<float>(
ops::EltwiseType::MIN, {1, 2, 2, 2}, std::vector<float>(8, 1),
{1, 1, 1, 2}, {1, 1},
{1, 2, 2, 1}, {8, 8, 8, 8});
GPUOverflowTest<float>(
ops::EltwiseType::MAX, {2, 2, 2, 2}, std::vector<float>(16, 1),
{2, 1, 1, 2}, {1, 1, 2, 2},
{2, 2, 2, 1}, {8, 8, 8, 8, 16, 16, 16, 16});
GPUOverflowTest<float>(
ops::EltwiseType::NEG, {1, 3, 2, 1}, std::vector<float>(6, 1),
{1, 1, 1, 1}, {0},
{1, 3, 2, 1}, {-4, -4, -6, -6, -4, -4});
GPUOverflowTest<float>(
ops::EltwiseType::ABS, {2, 3, 2, 1}, std::vector<float>(12, -1),
{1, 1, 1, 1}, {0},
{2, 3, 2, 1}, {4, 4, 6, 6, 4, 4, 4, 4, 6, 6, 4, 4});
GPUOverflowTest<float>(
ops::EltwiseType::SQR_DIFF, {2, 2, 2, 2}, std::vector<float>(16, 1),
{2, 1, 1, 2}, {1, 1, 2, 2},
{2, 2, 2, 1}, {0, 0, 0, 0, 8, 8, 8, 8});
GPUOverflowTest<float>(
ops::EltwiseType::POW, {1, 3, 2, 1}, std::vector<float>(6, 1),
{1, 3, 2, 1}, std::vector<float>(6, 1),
{1, 3, 2, 1}, {4, 4, 6, 6, 4, 4});
GPUOverflowTest<float>(
ops::EltwiseType::FLOOR_DIV, {2, 2, 2, 2}, std::vector<float>(16, 1),
{2, 1, 1, 2}, {1, 1, 2, 2},
{2, 2, 2, 1}, {8, 8, 8, 8, 0, 0, 0, 0});
}
namespace {
template <typename T>
void RandomTensorScalar(const ops::EltwiseType type,
......
......@@ -3,7 +3,7 @@
__kernel void eltwise(OUT_OF_RANGE_PARAMS
GLOBAL_WORK_GROUP_SIZE_DIM3
__read_only image2d_t input0,
#if INPUT_TYPE == 1
#if defined(INPUT_SCALAR)
__private const float value,
#else
__read_only image2d_t input1,
......@@ -28,14 +28,14 @@ __kernel void eltwise(OUT_OF_RANGE_PARAMS
const int pos = mad24(chan_idx, width, width_idx);
DATA_TYPE4 in0 = READ_IMAGET(input0, SAMPLER, (int2)(pos, hb));
#if INPUT_TYPE == 1
#if defined(INPUT_SCALAR)
DATA_TYPE4 in1 = (DATA_TYPE4)(value, value, value, value);
#elif INPUT_TYPE == 2
#elif defined(INPUT_VECTOR)
DATA_TYPE4 in1 = READ_IMAGET(input1, SAMPLER, (int2)(chan_idx, 0));
#elif defined(INPUT_BATCH_VECTOR)
const int batch_idx = hb / height;
DATA_TYPE4 in1 = READ_IMAGET(input1, SAMPLER, (int2)(chan_idx, batch_idx));
#elif INPUT_TYPE == 3
DATA_TYPE4 in1 = READ_IMAGET(input1, SAMPLER, (int2)(chan_idx, 0));
#elif INPUT_TYPE == 4
#elif defined(INPUT_TENSOR_BC_CHAN)
DATA_TYPE4 tmp = READ_IMAGET(input1, SAMPLER, (int2)(width_idx, hb));
DATA_TYPE4 in1 = (DATA_TYPE4)(tmp.x, tmp.x, tmp.x, tmp.x);
#else
......@@ -89,11 +89,11 @@ __kernel void eltwise(OUT_OF_RANGE_PARAMS
#endif
#endif
#if ((INPUT_TYPE == 1 || INPUT_TYPE == 4) && \
(ELTWISE_TYPE == 0 || ELTWISE_TYPE == 1 || ELTWISE_TYPE == 4 || \
ELTWISE_TYPE == 5 || ELTWISE_TYPE == 8 || ELTWISE_TYPE == 9)) || \
((INPUT_TYPE != 1 || INPUT_TYPE != 4) && \
(ELTWISE_TYPE == 3 || ELTWISE_TYPE == 9 || ELTWISE_TYPE == 11))
#if defined(NOT_DIVISIBLE_FOUR) && \
((ELTWISE_TYPE == 3 || ELTWISE_TYPE == 9 || ELTWISE_TYPE == 11) \
|| ((defined(INPUT_SCALAR) || defined(INPUT_TENSOR_BC_CHAN)) && \
(ELTWISE_TYPE == 0 || ELTWISE_TYPE == 1 || ELTWISE_TYPE == 4 || \
ELTWISE_TYPE == 5 || ELTWISE_TYPE == 8)))
const int remain_channel = channel - 4 * chan_idx;
if (remain_channel < 4) {
switch (remain_channel) {
......
......@@ -67,25 +67,52 @@ MaceStatus EltwiseKernel<T>::Compute(
const Tensor *input1,
Tensor *output) {
bool swapped = false;
if (input1 != nullptr) {
std::string input1_type = "";
if (input1 == nullptr) {
input1_type = "INPUT_SCALAR";
} else {
MACE_CHECK(input0->dim_size() == input1->dim_size() ||
input0->dim_size() == 1 || input1->dim_size() == 1)
<< "Inputs of Eltwise op must be same shape";
MACE_CHECK(type_ != EltwiseType::EQUAL)
<< "Eltwise op on GPU does not support EQUAL";
// broadcast
if (input0->size() != input1->size()) {
if (input0->size() < input1->size()) {
std::swap(input0, input1);
swapped = true;
}
if (input1->dim_size() == 1) {
MACE_CHECK(input0->dim(3) == input1->dim(0))
<< "Element-Wise op support broadcast on only-channel or non-channel dimension"; // NOLINT(whitespace/line_length)
} else {
MACE_CHECK(((input0->dim(0) == input1->dim(0) || input1->dim(0) == 1)
&& input0->dim(3) == input1->dim(3) && input1->dim(1) == 1 &&
input1->dim(2) == 1) || (input0->dim(0) == input1->dim(0) &&
input0->dim(1) == input1->dim(1) && input0->dim(2) == input1->dim(2)
&& input1->dim(3) == 1))
<< "Element-Wise op support broadcast on only-channel or non-channel dimension"; // NOLINT(whitespace/line_length)
if (input1->dim_size() == 1
|| (input1->dim(0) == 1 && input1->dim(1) == 1
&& input1->dim(2) == 1)) {
// Tensor-Vector element wise
if (input0->dim(3) == input1->dim(input1->dim_size()-1)) {
input1_type = "INPUT_VECTOR";
} else {
LOG(FATAL) << "Inputs not match the broadcast logic, "
<< MakeString(input0->shape()) << " vs "
<< MakeString(input1->shape());
}
} else { // must be 4-D
if (input0->dim(0) == input1->dim(0)
&& input1->dim(1) == 1
&& input1->dim(2) == 1
&& input0->dim(3) == input1->dim(3)) {
input1_type = "INPUT_BATCH_VECTOR";
} else if (input0->dim(0) == input1->dim(0)
&& input0->dim(1) == input1->dim(1)
&& input0->dim(2) == input1->dim(2)
&& input1->dim(3) == 1) {
// broadcast on channel dimension
input1_type = "INPUT_TENSOR_BC_CHAN";
} else {
LOG(FATAL) << "Element-Wise op only support broadcast on"
" channel dimension:"
"Tensor-BatchVector(4D-[N,1,1,C]) "
"and Tensor-Tensor(4D-[N,H,W,1]). but got "
<< MakeString(input0->shape()) << " vs "
<< MakeString(input1->shape());
}
}
}
}
......@@ -129,20 +156,11 @@ MaceStatus EltwiseKernel<T>::Compute(
built_options.emplace("-DDATA_TYPE=" + DtToUpCompatibleCLDt(dt));
built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpCompatibleCLCMDDt(dt));
built_options.emplace(MakeString("-DELTWISE_TYPE=", type_));
if (input1 == nullptr) {
built_options.emplace("-DINPUT_TYPE=1");
} else if (input0->size() != input1->size()) {
if (input0->dim(0) == input1->dim(0) && input0->dim(1) == input1->dim(1)
&& input0->dim(2) == input1->dim(2) && input1->dim(3) == 1) {
// only broadcast on channel
built_options.emplace("-DINPUT_TYPE=4");
} else if (input1->dim(0) == 1 || input1->dim_size() == 1) {
built_options.emplace("-DINPUT_TYPE=3");
} else {
built_options.emplace("-DINPUT_TYPE=2");
}
if (swapped) built_options.emplace("-DSWAPPED");
if (!input1_type.empty()) {
built_options.emplace("-D" + input1_type);
}
if (swapped) built_options.emplace("-DSWAPPED");
if (channels % 4 != 0) built_options.emplace("-DNOT_DIVISIBLE_FOUR");
if (!coeff_.empty()) built_options.emplace("-DCOEFF_SUM");
MACE_RETURN_IF_ERROR(runtime->BuildKernel("eltwise", kernel_name,
built_options, &kernel_));
......
......@@ -159,16 +159,16 @@ void OpTestContext::SetOCLImageAndBufferTestFlag() {
bool OpsTestNet::Setup(mace::DeviceType device) {
NetDef net_def;
for (auto &op_def_ : op_defs_) {
net_def.add_op()->CopyFrom(op_def_);
for (auto &op_def : op_defs_) {
net_def.add_op()->CopyFrom(op_def);
for (auto input : op_def_.input()) {
for (auto input : op_def.input()) {
if (ws_.GetTensor(input) != nullptr &&
!ws_.GetTensor(input)->is_weight()) {
auto input_info = net_def.add_input_info();
input_info->set_name(input);
auto data_format = ProtoArgHelper::GetOptionalArg<OperatorDef, int>(
op_def_, "data_format", DataFormat::DF_NONE);
op_def, "data_format", DataFormat::DF_NONE);
input_info->set_data_format(data_format);
auto &shape = ws_.GetTensor(input)->shape();
for (auto d : shape) {
......@@ -176,16 +176,16 @@ bool OpsTestNet::Setup(mace::DeviceType device) {
}
}
}
for (int i = 0; i < op_def_.output_size(); ++i) {
ws_.RemoveTensor(op_def_.output(i));
auto output_info = net_def.add_output_info();
output_info->set_name(op_def_.output(i));
if (op_def_.output_type_size() == op_def_.output_size()) {
output_info->set_data_type(op_def_.output_type(i));
} else {
output_info->set_data_type(DataType::DT_FLOAT);
}
}
auto op_def = op_defs_.back();
for (int i = 0; i < op_def.output_size(); ++i) {
ws_.RemoveTensor(op_def.output(i));
auto output_info = net_def.add_output_info();
output_info->set_name(op_def.output(i));
if (op_def.output_type_size() == op_def.output_size()) {
output_info->set_data_type(op_def.output_type(i));
} else {
output_info->set_data_type(DataType::DT_FLOAT);
}
}
MemoryOptimizer mem_optimizer;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册