未验证 提交 d4f38476 编写于 作者: T Tao Luo 提交者: GitHub

Merge pull request #14987 from jczaja/prv-transpose-mkldnn-grad

[MKL-DNN] Added transpose/transpose2 MKLDNN grad ops
...@@ -29,10 +29,6 @@ class TransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -29,10 +29,6 @@ class TransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
void Compute(const paddle::framework::ExecutionContext& ctx) const override { void Compute(const paddle::framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()), PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()),
"It must use CPUPlace."); "It must use CPUPlace.");
const bool is_test = ctx.Attr<bool>("is_test");
PADDLE_ENFORCE(
is_test == true,
"TransposeMKLDNN works only for inference!. Set is_test = True");
auto& dev_ctx = auto& dev_ctx =
ctx.template device_context<paddle::platform::MKLDNNDeviceContext>(); ctx.template device_context<paddle::platform::MKLDNNDeviceContext>();
const auto& mkldnn_engine = dev_ctx.GetEngine(); const auto& mkldnn_engine = dev_ctx.GetEngine();
...@@ -68,6 +64,57 @@ class TransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -68,6 +64,57 @@ class TransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
} }
}; };
template <typename T>
class TransposeMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
public:
void Compute(const paddle::framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()),
"It must use CPUPlace.");
auto* out_grad =
ctx.Input<framework::Tensor>(framework::GradVarName("Out"));
auto* x_grad = ctx.Output<framework::Tensor>(framework::GradVarName("X"));
if (!x_grad) return;
auto& dev_ctx =
ctx.template device_context<paddle::platform::MKLDNNDeviceContext>();
const auto& mkldnn_engine = dev_ctx.GetEngine();
std::vector<int> axis = ctx.Attr<std::vector<int>>("axis");
std::vector<int> reversed_axis(axis);
int ndims = axis.size();
if (ndims == 1) {
x_grad->ShareDataWith(*out_grad);
return;
}
for (size_t i = 0; i < axis.size(); i++) {
reversed_axis[axis[i]] = i;
}
const T* out_grad_data = out_grad->data<T>();
x_grad->mutable_data<T>(ctx.GetPlace());
std::vector<int> nchw_tz =
paddle::framework::vectorize2int(out_grad->dims());
const std::string key = platform::TransposeMKLDNNHandler::GetHash(
nchw_tz, axis, ctx.op().Output(framework::GradVarName("X")));
platform::TransposeMKLDNNHandler handler(nchw_tz, reversed_axis, dev_ctx,
mkldnn_engine, key);
auto transpose_src_memory_p = handler.AcquireSrcMemory(
out_grad->format(), platform::to_void_cast<T>(out_grad_data));
auto transpose_dst_memory_p =
handler.AcquireDstMemory(x_grad, ctx.GetPlace());
auto transpose_p = handler.AcquireTranspose(transpose_dst_memory_p,
transpose_src_memory_p);
std::vector<mkldnn::primitive> pipeline;
pipeline.push_back(*transpose_p);
mkldnn::stream(mkldnn::stream::kind::eager).submit(pipeline).wait();
}
};
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
...@@ -77,3 +124,8 @@ REGISTER_OP_KERNEL(transpose2, MKLDNN, ::paddle::platform::CPUPlace, ...@@ -77,3 +124,8 @@ REGISTER_OP_KERNEL(transpose2, MKLDNN, ::paddle::platform::CPUPlace,
ops::TransposeMKLDNNOpKernel<float>); ops::TransposeMKLDNNOpKernel<float>);
REGISTER_OP_KERNEL(transpose, MKLDNN, ::paddle::platform::CPUPlace, REGISTER_OP_KERNEL(transpose, MKLDNN, ::paddle::platform::CPUPlace,
ops::TransposeMKLDNNOpKernel<float>); ops::TransposeMKLDNNOpKernel<float>);
REGISTER_OP_KERNEL(transpose_grad, MKLDNN, ::paddle::platform::CPUPlace,
ops::TransposeMKLDNNGradOpKernel<float>);
REGISTER_OP_KERNEL(transpose2_grad, MKLDNN, ::paddle::platform::CPUPlace,
ops::TransposeMKLDNNGradOpKernel<float>);
...@@ -79,10 +79,6 @@ class TransposeOp : public framework::OperatorWithKernel { ...@@ -79,10 +79,6 @@ class TransposeOp : public framework::OperatorWithKernel {
class TransposeOpMaker : public framework::OpProtoAndCheckerMaker { class TransposeOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
void Make() override { void Make() override {
AddAttr<bool>("is_test",
"(bool, default false) Set to true for inference only, false "
"for training. Some layers may run faster when this is true.")
.SetDefault(false);
AddInput( AddInput(
"X", "X",
"(Tensor) The input tensor, tensors with rank up to 6 are supported."); "(Tensor) The input tensor, tensors with rank up to 6 are supported.");
...@@ -147,6 +143,24 @@ class TransposeOpGrad : public framework::OperatorWithKernel { ...@@ -147,6 +143,24 @@ class TransposeOpGrad : public framework::OperatorWithKernel {
ctx->SetOutputDim(framework::GradVarName("X"), x_dims); ctx->SetOutputDim(framework::GradVarName("X"), x_dims);
} }
} }
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext &ctx) const override {
framework::LibraryType library_{framework::LibraryType::kPlain};
std::string data_format = ctx.Attr<std::string>("data_format");
framework::DataLayout layout_ = framework::StringToDataLayout(data_format);
#ifdef PADDLE_WITH_MKLDNN
if (library_ == framework::LibraryType::kPlain &&
platform::CanMKLDNNBeUsed(ctx)) {
library_ = framework::LibraryType::kMKLDNN;
layout_ = framework::DataLayout::kMKLDNN;
}
#endif
return framework::OpKernelType(
ctx.Input<framework::LoDTensor>(framework::GradVarName("Out"))->type(),
ctx.GetPlace(), layout_, library_);
}
}; };
// FIXME(zcd): transpose2 adds an intermediate output(XShape) based on // FIXME(zcd): transpose2 adds an intermediate output(XShape) based on
...@@ -237,9 +251,19 @@ class Transpose2OpGrad : public framework::OperatorWithKernel { ...@@ -237,9 +251,19 @@ class Transpose2OpGrad : public framework::OperatorWithKernel {
protected: protected:
framework::OpKernelType GetExpectedKernelType( framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext &ctx) const override { const framework::ExecutionContext &ctx) const override {
framework::LibraryType library_{framework::LibraryType::kPlain};
std::string data_format = ctx.Attr<std::string>("data_format");
framework::DataLayout layout_ = framework::StringToDataLayout(data_format);
#ifdef PADDLE_WITH_MKLDNN
if (library_ == framework::LibraryType::kPlain &&
platform::CanMKLDNNBeUsed(ctx)) {
library_ = framework::LibraryType::kMKLDNN;
layout_ = framework::DataLayout::kMKLDNN;
}
#endif
return framework::OpKernelType( return framework::OpKernelType(
ctx.Input<framework::LoDTensor>(framework::GradVarName("Out"))->type(), ctx.Input<framework::LoDTensor>(framework::GradVarName("Out"))->type(),
ctx.device_context()); ctx.GetPlace(), layout_, library_);
} }
}; };
......
...@@ -23,16 +23,6 @@ class TestTransposeMKLDNN(TestTransposeOp): ...@@ -23,16 +23,6 @@ class TestTransposeMKLDNN(TestTransposeOp):
def init_op_type(self): def init_op_type(self):
self.op_type = "transpose2" self.op_type = "transpose2"
self.use_mkldnn = True self.use_mkldnn = True
self.is_test = True
return
def test_check_grad(self):
return
def test_check_grad_no_input(self):
return
def test_check_grad_no_filter(self):
return return
......
...@@ -27,7 +27,6 @@ class TestTransposeOp(OpTest): ...@@ -27,7 +27,6 @@ class TestTransposeOp(OpTest):
self.attrs = { self.attrs = {
'axis': list(self.axis), 'axis': list(self.axis),
'use_mkldnn': self.use_mkldnn, 'use_mkldnn': self.use_mkldnn,
'is_test': self.is_test,
} }
self.outputs = { self.outputs = {
'XShape': np.random.random(self.shape).astype("float32"), 'XShape': np.random.random(self.shape).astype("float32"),
...@@ -37,7 +36,6 @@ class TestTransposeOp(OpTest): ...@@ -37,7 +36,6 @@ class TestTransposeOp(OpTest):
def init_op_type(self): def init_op_type(self):
self.op_type = "transpose2" self.op_type = "transpose2"
self.use_mkldnn = False self.use_mkldnn = False
self.is_test = False
def test_check_output(self): def test_check_output(self):
self.check_output(no_check_set=['XShape']) self.check_output(no_check_set=['XShape'])
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册