提交 ade50226 编写于 作者: L lidanqing 提交者: Tao Luo

[UT Coverage]Improve sum_mkldnn_op line coverage (#22275)

上级 3099d9d4
......@@ -54,102 +54,84 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto& dev_ctx = ctx.template device_context<MKLDNNDeviceContext>();
const auto& mkldnn_engine = dev_ctx.GetEngine();
auto in_vars = ctx.MultiInputVar("X");
const int N = in_vars.size();
auto out_var = ctx.OutputVar("Out");
PADDLE_ENFORCE_NE(in_vars.empty(), true, platform::errors::InvalidArgument(
"Input variable is empty."));
bool in_place = out_var == in_vars[0];
if (out_var->IsType<framework::LoDTensor>()) {
LoDTensor* output = ctx.Output<LoDTensor>("Out");
T* output_data = output->mutable_data<T>(ctx.GetPlace());
auto dst_tz = framework::vectorize<int64_t>(output->dims());
auto src_tz = dst_tz;
MKLDNNMemoryFormat output_format{MKLDNNMemoryFormat::undef};
std::vector<float> scales;
std::vector<memory::desc> srcs_md;
std::vector<mkldnn::memory> srcs_mem;
PADDLE_ENFORCE_EQ(in_vars[0]->IsType<LoDTensor>(), true,
"Input[0] must be LoDTensors");
auto& input0 = in_vars[0]->Get<LoDTensor>();
PADDLE_ENFORCE_EQ(input0.layout(), DataLayout::kMKLDNN,
"Wrong layout set for inputs[0] tensor");
PADDLE_ENFORCE_NE(input0.format(), MKLDNNMemoryFormat::undef,
"Wrong format set for inputs[0] tensor");
MKLDNNMemoryFormat input_format = input0.format();
for (int i = 0; i < N; i++) {
PADDLE_ENFORCE_EQ(in_vars[i]->IsType<LoDTensor>(), true,
"all inputs must be all LoDTensors");
auto& input = in_vars[i]->Get<LoDTensor>();
PADDLE_ENFORCE_EQ(input.layout(), DataLayout::kMKLDNN,
"Wrong layout set for inputs");
PADDLE_ENFORCE_NE(input.format(), MKLDNNMemoryFormat::undef,
"Wrong format set for inputs");
if (input.numel() == 0) {
continue;
}
const T* input_data = input.data<T>();
auto src_md =
memory::desc(src_tz, memory::data_type::f32, input_format);
auto src_mem = memory(src_md, mkldnn_engine, to_void_cast(input_data));
srcs_md.push_back(src_md);
srcs_mem.push_back(src_mem);
scales.push_back(1.0);
}
LoDTensor* output = ctx.Output<LoDTensor>("Out");
T* output_data = output->mutable_data<T>(ctx.GetPlace());
auto dst_md =
memory::desc(dst_tz, memory::data_type::f32, MKLDNNMemoryFormat::any);
auto dst_tz = framework::vectorize<int64_t>(output->dims());
auto src_tz = dst_tz;
MKLDNNMemoryFormat output_format{MKLDNNMemoryFormat::undef};
std::vector<float> scales;
std::vector<memory::desc> srcs_md;
std::vector<mkldnn::memory> srcs_mem;
auto sum_pd = sum::primitive_desc(dst_md, scales, srcs_md, mkldnn_engine);
auto& input0 = in_vars[0]->Get<LoDTensor>();
in_place = (input0.numel() > 0) && (input0.data<T>() == output_data);
std::shared_ptr<memory> dst_mem;
if (in_place) {
dst_mem.reset(new memory(sum_pd.dst_desc(), mkldnn_engine));
} else {
dst_mem.reset(
new memory(sum_pd.dst_desc(), mkldnn_engine, output_data));
}
MKLDNNMemoryFormat input_format = input0.format();
auto sum_prim = mkldnn::sum(sum_pd);
output_format = platform::GetMKLDNNFormat(sum_pd.dst_desc());
std::shared_ptr<mkldnn::reorder> reorder_p;
std::shared_ptr<memory> target_mem;
if (in_place) {
output_format = input_format;
target_mem.reset(
new memory({{src_tz}, memory::data_type::f32, output_format},
mkldnn_engine, output_data));
reorder_p = std::make_shared<reorder>(*dst_mem, *target_mem);
for (size_t i = 0; i < in_vars.size(); i++) {
auto& input_it = in_vars[i]->Get<LoDTensor>();
if (input_it.numel() == 0) {
continue;
}
mkldnn::stream astream(mkldnn_engine);
std::unordered_map<int, memory> args;
for (size_t i = 0; i < srcs_mem.size(); ++i) {
args.insert({MKLDNN_ARG_MULTIPLE_SRC + i, srcs_mem.at(i)});
}
args.insert({MKLDNN_ARG_DST, *dst_mem});
const T* input_data = input_it.data<T>();
sum_prim.execute(astream, args);
astream.wait();
auto src_md = memory::desc(src_tz, memory::data_type::f32, input_format);
auto src_mem = memory(src_md, mkldnn_engine, to_void_cast(input_data));
srcs_md.push_back(src_md);
srcs_mem.push_back(src_mem);
scales.push_back(1.0);
}
if (in_place) {
reorder_p->execute(astream, *dst_mem, *target_mem);
astream.wait();
}
auto dst_md =
memory::desc(dst_tz, memory::data_type::f32, MKLDNNMemoryFormat::any);
auto sum_pd = sum::primitive_desc(dst_md, scales, srcs_md, mkldnn_engine);
output->set_layout(DataLayout::kMKLDNN);
output->set_format(output_format);
} else { // Fallback to naive version
SumKernel<CPUDeviceContext, T> reference_kernel;
reference_kernel.Compute(ctx);
std::shared_ptr<memory> dst_mem;
if (in_place) {
dst_mem.reset(new memory(sum_pd.dst_desc(), mkldnn_engine));
} else {
dst_mem.reset(new memory(sum_pd.dst_desc(), mkldnn_engine, output_data));
}
auto sum_prim = mkldnn::sum(sum_pd);
output_format = platform::GetMKLDNNFormat(sum_pd.dst_desc());
std::shared_ptr<mkldnn::reorder> reorder_p;
std::shared_ptr<memory> target_mem;
if (in_place) {
output_format = input_format;
target_mem.reset(
new memory({{src_tz}, memory::data_type::f32, output_format},
mkldnn_engine, output_data));
reorder_p = std::make_shared<reorder>(*dst_mem, *target_mem);
}
mkldnn::stream astream(mkldnn_engine);
std::unordered_map<int, memory> args;
for (size_t i = 0; i < srcs_mem.size(); ++i) {
args.insert({MKLDNN_ARG_MULTIPLE_SRC + i, srcs_mem.at(i)});
}
args.insert({MKLDNN_ARG_DST, *dst_mem});
sum_prim.execute(astream, args);
astream.wait();
if (in_place) {
reorder_p->execute(astream, *dst_mem, *target_mem);
astream.wait();
}
output->set_layout(DataLayout::kMKLDNN);
output->set_format(output_format);
}
};
......
......@@ -113,14 +113,6 @@ class SumOp : public framework::OperatorWithKernel {
framework::LibraryType library{framework::LibraryType::kPlain};
framework::DataLayout layout{framework::DataLayout::kAnyLayout};
#ifdef PADDLE_WITH_MKLDNN
if (library == framework::LibraryType::kPlain &&
platform::CanMKLDNNBeUsed(ctx)) {
library = framework::LibraryType::kMKLDNN;
layout = framework::DataLayout::kMKLDNN;
}
#endif
if (x_vars[0]->IsType<framework::LoDTensor>()) {
int dtype = -1;
for (size_t idx = 0; idx < x_vars.size(); ++idx) {
......@@ -141,6 +133,23 @@ class SumOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_NE(dtype, -1,
"Sum operator should have at least one tensor");
#ifdef PADDLE_WITH_MKLDNN
if (library == framework::LibraryType::kPlain &&
platform::CanMKLDNNBeUsed(ctx) &&
static_cast<framework::proto::VarType::Type>(dtype) ==
framework::proto::VarType::FP32 &&
ctx.OutputVar("Out")->IsType<framework::LoDTensor>()) {
if (std::all_of(x_vars.begin(), x_vars.end(),
[](const framework::Variable* v) {
return v->IsType<framework::LoDTensor>();
})) {
return framework::OpKernelType(
framework::proto::VarType::FP32, ctx.GetPlace(),
framework::DataLayout::kMKLDNN, framework::LibraryType::kMKLDNN);
}
}
#endif
return framework::OpKernelType(
static_cast<framework::proto::VarType::Type>(dtype), ctx.GetPlace(),
layout, library);
......
......@@ -15,25 +15,26 @@
from __future__ import print_function
import unittest
import paddle.fluid.core as core
from paddle.fluid.tests.unittests.test_sum_op import TestSumOp
import numpy as np
import paddle.fluid.op as fluid_op
class TestMKLDNN(TestSumOp):
class TestSumMKLDNN(TestSumOp):
def setUp(self):
self.op_type = "sum"
self.init_kernel_type()
self.init_data_type()
self.use_mkldnn = True
x0 = np.random.random((25, 4)).astype(self.dtype)
x1 = np.random.random((25, 4)).astype(self.dtype)
x2 = np.random.random((25, 4)).astype(self.dtype)
x0 = np.random.random((25, 8)).astype(self.dtype)
x1 = np.random.random((25, 8)).astype(self.dtype)
x2 = np.random.random((25, 8)).astype(self.dtype)
self.inputs = {"X": [("x0", x0), ("x1", x1), ("x2", x2)]}
y = x0 + x1 + x2
self.outputs = {'Out': y}
self.attrs = {'use_mkldnn': self.use_mkldnn}
def init_kernel_type(self):
def init_data_type(self):
self.dtype = np.float32
def test_check_output(self):
......@@ -45,5 +46,44 @@ class TestMKLDNN(TestSumOp):
self.check_grad(['x0'], 'Out', check_dygraph=False)
class TestMKLDNNSumInplaceOp(unittest.TestCase):
def setUp(self):
self.op_type = "sum"
self.init_data_type()
self.use_mkldnn = True
self.x0 = np.random.random((25, 8)).astype(self.dtype)
self.x1 = np.random.random((25, 8)).astype(self.dtype)
def init_data_type(self):
self.dtype = np.float32
def test_check_output(self):
place = core.CPUPlace()
scope = core.Scope()
out_var_name = "x0"
inputs = {"X": [("x0", self.x0), ("x1", self.x1)]}
for input_key in inputs:
for per_input in inputs[input_key]:
var_name, var_value = per_input[0], per_input[1]
var = scope.var(var_name)
tensor = var.get_tensor()
tensor.set(var_value, place)
sum_op = fluid_op.Operator(
"sum", X=["x0", "x1"], Out=out_var_name, use_mkldnn=True)
expected_out = np.array(self.x0 + self.x1)
sum_op.run(scope, place)
out = scope.find_var("x0").get_tensor()
out_array = np.array(out)
self.assertTrue(
np.allclose(
expected_out, out_array, atol=1e-5),
"Inplace sum_mkldnn_op output has diff with expected output")
def test_check_grad(self):
pass
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册