提交 a8b3c4f4 编写于 作者: 刘托

Merge branch 'refactor-eltwise' into 'master'

Refactor: Polish the Eltwise code and add some UTs.

See merge request !978
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <vector> #include <vector>
#include "mace/ops/common/conv_pool_2d_util.h"
#include "mace/ops/eltwise.h" #include "mace/ops/eltwise.h"
#include "mace/ops/ops_test_util.h" #include "mace/ops/ops_test_util.h"
...@@ -531,6 +532,100 @@ TEST_F(EltwiseOpTest, GPUSimpleTensorTensor) { ...@@ -531,6 +532,100 @@ TEST_F(EltwiseOpTest, GPUSimpleTensorTensor) {
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {0, 0, 0, 0, 0, 25, 25, 25, 25, 25}); {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {0, 0, 0, 0, 0, 25, 25, 25, 25, 25});
} }
namespace {
template <typename T>
void GPUOverflowTest(const ops::EltwiseType type,
const std::vector<index_t> &shape0,
const std::vector<T> &input0,
const std::vector<index_t> &shape1,
const std::vector<T> &input1,
const std::vector<index_t> &output_shape,
const std::vector<T> &output) {
// Construct graph
OpsTestNet net;
// Add input data
net.AddInputFromArray<DeviceType::GPU, T>("Input0", shape0, input0);
net.AddInputFromArray<DeviceType::GPU, T>("Input1", shape1, input1);
OpDefBuilder("Eltwise", "EltwiseTest")
.AddIntArg("T", DataTypeToEnum<T>::v())
.Input("Input0")
.Input("Input1")
.AddIntArg("type", static_cast<int>(type))
.OutputType({ops::IsLogicalType(type) ? DT_INT32 : DT_FLOAT})
.Output("EltOutput")
.OutputShape(output_shape)
.Finalize(net.AddNewOperatorDef());
net.AddInputFromArray<DeviceType::GPU, T>(
"Filter",
{output_shape.back(), shape0.back(), 3, 3},
std::vector<float>(output_shape.back() * shape0.back() * 9, 1));
OpDefBuilder("Conv2D", "Conv2D")
.AddIntArg("T", DataTypeToEnum<T>::v())
.Input("EltOutput")
.Input("Filter")
.Output("Output")
.OutputShape(output_shape)
.AddIntsArg("strides", {1, 1})
.AddIntArg("padding", Padding::SAME)
.AddIntsArg("dilations", {1, 1})
.Finalize(net.AddNewOperatorDef());
// Run
net.RunOp(DeviceType::GPU);
auto expected = net.CreateTensor<T>(output_shape, output);
ExpectTensorNear<T>(*expected, *net.GetOutput("Output"), 1e-5);
}
} // namespace
TEST_F(EltwiseOpTest, GPUOverflowTest) {
GPUOverflowTest<float>(
ops::EltwiseType::SUM, {1, 2, 2, 2}, std::vector<float>(8, 1),
{1, 1, 1, 2}, {1, 1},
{1, 2, 2, 1}, {16, 16, 16, 16});
GPUOverflowTest<float>(
ops::EltwiseType::SUB, {2, 2, 2, 2}, std::vector<float>(16, 1),
{2, 1, 1, 2}, {1, 1, 2, 2},
{2, 2, 2, 1}, {0, 0, 0, 0, -8, -8, -8, -8});
GPUOverflowTest<float>(
ops::EltwiseType::PROD, {1, 3, 2, 1}, std::vector<float>(6, 1),
{1, 3, 2, 1}, std::vector<float>(6, 1),
{1, 3, 2, 1}, {4, 4, 6, 6, 4, 4});
GPUOverflowTest<float>(
ops::EltwiseType::DIV, {2, 3, 2, 1}, std::vector<float>(12, 1),
{2, 3, 2, 1}, std::vector<float>(12, 1),
{2, 3, 2, 1}, {4, 4, 6, 6, 4, 4, 4, 4, 6, 6, 4, 4});
GPUOverflowTest<float>(
ops::EltwiseType::MIN, {1, 2, 2, 2}, std::vector<float>(8, 1),
{1, 1, 1, 2}, {1, 1},
{1, 2, 2, 1}, {8, 8, 8, 8});
GPUOverflowTest<float>(
ops::EltwiseType::MAX, {2, 2, 2, 2}, std::vector<float>(16, 1),
{2, 1, 1, 2}, {1, 1, 2, 2},
{2, 2, 2, 1}, {8, 8, 8, 8, 16, 16, 16, 16});
GPUOverflowTest<float>(
ops::EltwiseType::NEG, {1, 3, 2, 1}, std::vector<float>(6, 1),
{1, 1, 1, 1}, {0},
{1, 3, 2, 1}, {-4, -4, -6, -6, -4, -4});
GPUOverflowTest<float>(
ops::EltwiseType::ABS, {2, 3, 2, 1}, std::vector<float>(12, -1),
{1, 1, 1, 1}, {0},
{2, 3, 2, 1}, {4, 4, 6, 6, 4, 4, 4, 4, 6, 6, 4, 4});
GPUOverflowTest<float>(
ops::EltwiseType::SQR_DIFF, {2, 2, 2, 2}, std::vector<float>(16, 1),
{2, 1, 1, 2}, {1, 1, 2, 2},
{2, 2, 2, 1}, {0, 0, 0, 0, 8, 8, 8, 8});
GPUOverflowTest<float>(
ops::EltwiseType::POW, {1, 3, 2, 1}, std::vector<float>(6, 1),
{1, 3, 2, 1}, std::vector<float>(6, 1),
{1, 3, 2, 1}, {4, 4, 6, 6, 4, 4});
GPUOverflowTest<float>(
ops::EltwiseType::FLOOR_DIV, {2, 2, 2, 2}, std::vector<float>(16, 1),
{2, 1, 1, 2}, {1, 1, 2, 2},
{2, 2, 2, 1}, {8, 8, 8, 8, 0, 0, 0, 0});
}
namespace { namespace {
template <typename T> template <typename T>
void RandomTensorScalar(const ops::EltwiseType type, void RandomTensorScalar(const ops::EltwiseType type,
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
__kernel void eltwise(OUT_OF_RANGE_PARAMS __kernel void eltwise(OUT_OF_RANGE_PARAMS
GLOBAL_WORK_GROUP_SIZE_DIM3 GLOBAL_WORK_GROUP_SIZE_DIM3
__read_only image2d_t input0, __read_only image2d_t input0,
#if INPUT_TYPE == 1 #if defined(INPUT_SCALAR)
__private const float value, __private const float value,
#else #else
__read_only image2d_t input1, __read_only image2d_t input1,
...@@ -28,14 +28,14 @@ __kernel void eltwise(OUT_OF_RANGE_PARAMS ...@@ -28,14 +28,14 @@ __kernel void eltwise(OUT_OF_RANGE_PARAMS
const int pos = mad24(chan_idx, width, width_idx); const int pos = mad24(chan_idx, width, width_idx);
DATA_TYPE4 in0 = READ_IMAGET(input0, SAMPLER, (int2)(pos, hb)); DATA_TYPE4 in0 = READ_IMAGET(input0, SAMPLER, (int2)(pos, hb));
#if INPUT_TYPE == 1 #if defined(INPUT_SCALAR)
DATA_TYPE4 in1 = (DATA_TYPE4)(value, value, value, value); DATA_TYPE4 in1 = (DATA_TYPE4)(value, value, value, value);
#elif INPUT_TYPE == 2 #elif defined(INPUT_VECTOR)
DATA_TYPE4 in1 = READ_IMAGET(input1, SAMPLER, (int2)(chan_idx, 0));
#elif defined(INPUT_BATCH_VECTOR)
const int batch_idx = hb / height; const int batch_idx = hb / height;
DATA_TYPE4 in1 = READ_IMAGET(input1, SAMPLER, (int2)(chan_idx, batch_idx)); DATA_TYPE4 in1 = READ_IMAGET(input1, SAMPLER, (int2)(chan_idx, batch_idx));
#elif INPUT_TYPE == 3 #elif defined(INPUT_TENSOR_BC_CHAN)
DATA_TYPE4 in1 = READ_IMAGET(input1, SAMPLER, (int2)(chan_idx, 0));
#elif INPUT_TYPE == 4
DATA_TYPE4 tmp = READ_IMAGET(input1, SAMPLER, (int2)(width_idx, hb)); DATA_TYPE4 tmp = READ_IMAGET(input1, SAMPLER, (int2)(width_idx, hb));
DATA_TYPE4 in1 = (DATA_TYPE4)(tmp.x, tmp.x, tmp.x, tmp.x); DATA_TYPE4 in1 = (DATA_TYPE4)(tmp.x, tmp.x, tmp.x, tmp.x);
#else #else
...@@ -89,11 +89,11 @@ __kernel void eltwise(OUT_OF_RANGE_PARAMS ...@@ -89,11 +89,11 @@ __kernel void eltwise(OUT_OF_RANGE_PARAMS
#endif #endif
#endif #endif
#if ((INPUT_TYPE == 1 || INPUT_TYPE == 4) && \ #if defined(NOT_DIVISIBLE_FOUR) && \
(ELTWISE_TYPE == 0 || ELTWISE_TYPE == 1 || ELTWISE_TYPE == 4 || \ ((ELTWISE_TYPE == 3 || ELTWISE_TYPE == 9 || ELTWISE_TYPE == 11) \
ELTWISE_TYPE == 5 || ELTWISE_TYPE == 8 || ELTWISE_TYPE == 9)) || \ || ((defined(INPUT_SCALAR) || defined(INPUT_TENSOR_BC_CHAN)) && \
((INPUT_TYPE != 1 || INPUT_TYPE != 4) && \ (ELTWISE_TYPE == 0 || ELTWISE_TYPE == 1 || ELTWISE_TYPE == 4 || \
(ELTWISE_TYPE == 3 || ELTWISE_TYPE == 9 || ELTWISE_TYPE == 11)) ELTWISE_TYPE == 5 || ELTWISE_TYPE == 8)))
const int remain_channel = channel - 4 * chan_idx; const int remain_channel = channel - 4 * chan_idx;
if (remain_channel < 4) { if (remain_channel < 4) {
switch (remain_channel) { switch (remain_channel) {
......
...@@ -67,25 +67,52 @@ MaceStatus EltwiseKernel<T>::Compute( ...@@ -67,25 +67,52 @@ MaceStatus EltwiseKernel<T>::Compute(
const Tensor *input1, const Tensor *input1,
Tensor *output) { Tensor *output) {
bool swapped = false; bool swapped = false;
if (input1 != nullptr) { std::string input1_type = "";
if (input1 == nullptr) {
input1_type = "INPUT_SCALAR";
} else {
MACE_CHECK(input0->dim_size() == input1->dim_size() || MACE_CHECK(input0->dim_size() == input1->dim_size() ||
input0->dim_size() == 1 || input1->dim_size() == 1) input0->dim_size() == 1 || input1->dim_size() == 1)
<< "Inputs of Eltwise op must be same shape"; << "Inputs of Eltwise op must be same shape";
MACE_CHECK(type_ != EltwiseType::EQUAL)
<< "Eltwise op on GPU does not support EQUAL";
// broadcast
if (input0->size() != input1->size()) { if (input0->size() != input1->size()) {
if (input0->size() < input1->size()) { if (input0->size() < input1->size()) {
std::swap(input0, input1); std::swap(input0, input1);
swapped = true; swapped = true;
} }
if (input1->dim_size() == 1) { if (input1->dim_size() == 1
MACE_CHECK(input0->dim(3) == input1->dim(0)) || (input1->dim(0) == 1 && input1->dim(1) == 1
<< "Element-Wise op support broadcast on only-channel or non-channel dimension"; // NOLINT(whitespace/line_length) && input1->dim(2) == 1)) {
} else { // Tensor-Vector element wise
MACE_CHECK(((input0->dim(0) == input1->dim(0) || input1->dim(0) == 1) if (input0->dim(3) == input1->dim(input1->dim_size()-1)) {
&& input0->dim(3) == input1->dim(3) && input1->dim(1) == 1 && input1_type = "INPUT_VECTOR";
input1->dim(2) == 1) || (input0->dim(0) == input1->dim(0) && } else {
input0->dim(1) == input1->dim(1) && input0->dim(2) == input1->dim(2) LOG(FATAL) << "Inputs not match the broadcast logic, "
&& input1->dim(3) == 1)) << MakeString(input0->shape()) << " vs "
<< "Element-Wise op support broadcast on only-channel or non-channel dimension"; // NOLINT(whitespace/line_length) << MakeString(input1->shape());
}
} else { // must be 4-D
if (input0->dim(0) == input1->dim(0)
&& input1->dim(1) == 1
&& input1->dim(2) == 1
&& input0->dim(3) == input1->dim(3)) {
input1_type = "INPUT_BATCH_VECTOR";
} else if (input0->dim(0) == input1->dim(0)
&& input0->dim(1) == input1->dim(1)
&& input0->dim(2) == input1->dim(2)
&& input1->dim(3) == 1) {
// broadcast on channel dimension
input1_type = "INPUT_TENSOR_BC_CHAN";
} else {
LOG(FATAL) << "Element-Wise op only support broadcast on"
" channel dimension:"
"Tensor-BatchVector(4D-[N,1,1,C]) "
"and Tensor-Tensor(4D-[N,H,W,1]). but got "
<< MakeString(input0->shape()) << " vs "
<< MakeString(input1->shape());
}
} }
} }
} }
...@@ -129,20 +156,11 @@ MaceStatus EltwiseKernel<T>::Compute( ...@@ -129,20 +156,11 @@ MaceStatus EltwiseKernel<T>::Compute(
built_options.emplace("-DDATA_TYPE=" + DtToUpCompatibleCLDt(dt)); built_options.emplace("-DDATA_TYPE=" + DtToUpCompatibleCLDt(dt));
built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpCompatibleCLCMDDt(dt)); built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpCompatibleCLCMDDt(dt));
built_options.emplace(MakeString("-DELTWISE_TYPE=", type_)); built_options.emplace(MakeString("-DELTWISE_TYPE=", type_));
if (input1 == nullptr) { if (!input1_type.empty()) {
built_options.emplace("-DINPUT_TYPE=1"); built_options.emplace("-D" + input1_type);
} else if (input0->size() != input1->size()) {
if (input0->dim(0) == input1->dim(0) && input0->dim(1) == input1->dim(1)
&& input0->dim(2) == input1->dim(2) && input1->dim(3) == 1) {
// only broadcast on channel
built_options.emplace("-DINPUT_TYPE=4");
} else if (input1->dim(0) == 1 || input1->dim_size() == 1) {
built_options.emplace("-DINPUT_TYPE=3");
} else {
built_options.emplace("-DINPUT_TYPE=2");
}
if (swapped) built_options.emplace("-DSWAPPED");
} }
if (swapped) built_options.emplace("-DSWAPPED");
if (channels % 4 != 0) built_options.emplace("-DNOT_DIVISIBLE_FOUR");
if (!coeff_.empty()) built_options.emplace("-DCOEFF_SUM"); if (!coeff_.empty()) built_options.emplace("-DCOEFF_SUM");
MACE_RETURN_IF_ERROR(runtime->BuildKernel("eltwise", kernel_name, MACE_RETURN_IF_ERROR(runtime->BuildKernel("eltwise", kernel_name,
built_options, &kernel_)); built_options, &kernel_));
......
...@@ -159,16 +159,16 @@ void OpTestContext::SetOCLImageAndBufferTestFlag() { ...@@ -159,16 +159,16 @@ void OpTestContext::SetOCLImageAndBufferTestFlag() {
bool OpsTestNet::Setup(mace::DeviceType device) { bool OpsTestNet::Setup(mace::DeviceType device) {
NetDef net_def; NetDef net_def;
for (auto &op_def_ : op_defs_) { for (auto &op_def : op_defs_) {
net_def.add_op()->CopyFrom(op_def_); net_def.add_op()->CopyFrom(op_def);
for (auto input : op_def_.input()) { for (auto input : op_def.input()) {
if (ws_.GetTensor(input) != nullptr && if (ws_.GetTensor(input) != nullptr &&
!ws_.GetTensor(input)->is_weight()) { !ws_.GetTensor(input)->is_weight()) {
auto input_info = net_def.add_input_info(); auto input_info = net_def.add_input_info();
input_info->set_name(input); input_info->set_name(input);
auto data_format = ProtoArgHelper::GetOptionalArg<OperatorDef, int>( auto data_format = ProtoArgHelper::GetOptionalArg<OperatorDef, int>(
op_def_, "data_format", DataFormat::DF_NONE); op_def, "data_format", DataFormat::DF_NONE);
input_info->set_data_format(data_format); input_info->set_data_format(data_format);
auto &shape = ws_.GetTensor(input)->shape(); auto &shape = ws_.GetTensor(input)->shape();
for (auto d : shape) { for (auto d : shape) {
...@@ -176,16 +176,16 @@ bool OpsTestNet::Setup(mace::DeviceType device) { ...@@ -176,16 +176,16 @@ bool OpsTestNet::Setup(mace::DeviceType device) {
} }
} }
} }
}
for (int i = 0; i < op_def_.output_size(); ++i) { auto op_def = op_defs_.back();
ws_.RemoveTensor(op_def_.output(i)); for (int i = 0; i < op_def.output_size(); ++i) {
auto output_info = net_def.add_output_info(); ws_.RemoveTensor(op_def.output(i));
output_info->set_name(op_def_.output(i)); auto output_info = net_def.add_output_info();
if (op_def_.output_type_size() == op_def_.output_size()) { output_info->set_name(op_def.output(i));
output_info->set_data_type(op_def_.output_type(i)); if (op_def.output_type_size() == op_def.output_size()) {
} else { output_info->set_data_type(op_def.output_type(i));
output_info->set_data_type(DataType::DT_FLOAT); } else {
} output_info->set_data_type(DataType::DT_FLOAT);
} }
} }
MemoryOptimizer mem_optimizer; MemoryOptimizer mem_optimizer;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册