未验证 提交 b8ae3858 编写于 作者: S Sławomir Siwek 提交者: GitHub

[PHI] Migrate softmax kernel (#47339)

* add extra attr property set

* add type_info for all context

* add onednn context to all context

* fix context compile error

* simplify conv kernel args

* pass runtime attr into dev_ctx

* fix marco error

* clear conv_grad_kernel extra args

* merge conv_grad_grad into conv_grad

* clear conv2d_grad_grad extra attrs

* remove redundant imports

* migrate softmax

* clear yaml and eager extra attr

* fix conv1d error

* change to thread local

* fix npu compile failed

* try to fix windows compile failed

* add conv2d onednn phi kernel

* fix ci bugs (#36)

* fix compile bugs (#38)

* fix extra input transform bug (#39)

* support dynamic created attr (#40)

* reset extra info gen code

* rm conv_grad_grad kernel

* reimpl pass attr adapting

* add int attr support

* remove vector inputnames creating

* merge dev

* fix map at error

* adjust attribute

* adapt funcs to PHI
Co-authored-by: NChen Weihang <chenweihang@baidu.com>
Co-authored-by: NYuanRisheng <yuanrisheng@baidu.com>
上级 f9a0605d
...@@ -23,7 +23,7 @@ ...@@ -23,7 +23,7 @@
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
USE_OP_ITSELF(softmax); USE_OP_ITSELF(softmax);
USE_OP_DEVICE_KERNEL(softmax, MKLDNN); PD_DECLARE_KERNEL(softmax, OneDNN, ONEDNN);
USE_OP_ITSELF(elementwise_add); USE_OP_ITSELF(elementwise_add);
USE_OP_DEVICE_KERNEL(elementwise_add, MKLDNN); USE_OP_DEVICE_KERNEL(elementwise_add, MKLDNN);
USE_OP_ITSELF(leaky_relu); USE_OP_ITSELF(leaky_relu);
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/mkldnn_reuse.h"
#include "paddle/phi/kernels/funcs/axis_utils.h"
namespace paddle {
namespace operators {
using paddle::platform::MKLDNNDeviceContext;
using paddle::platform::MKLDNNMemDesc;
using dnnl::memory; // Note: paddle has also "memory" namespace
using dnnl::primitive;
using dnnl::prop_kind;
using dnnl::softmax_backward;
using dnnl::softmax_forward;
using dnnl::stream;
using platform::to_void_cast;
template <typename T>
class SoftmaxMKLDNNHandler
: public platform::MKLDNNHandlerNoCachingT<T,
dnnl::softmax_forward,
dnnl::softmax_backward> {
public:
SoftmaxMKLDNNHandler(const dnnl::engine mkldnn_engine,
platform::Place cpu_place,
const phi::DenseTensor* input,
phi::DenseTensor* output,
const int axis)
: platform::MKLDNNHandlerNoCachingT<T,
dnnl::softmax_forward,
dnnl::softmax_backward>(mkldnn_engine,
cpu_place) {
PADDLE_ENFORCE_EQ(
input->dims(),
output->dims(),
platform::errors::InvalidArgument(
"The shape of input and output tensor must be identical."));
this->AcquireForwardPrimitiveDescriptor(
prop_kind::forward_scoring, input->mem_desc(), axis);
}
};
template <typename T>
class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel<T> {
public:
void Compute(const paddle::framework::ExecutionContext& ctx) const override {
auto& dev_ctx = ctx.template device_context<MKLDNNDeviceContext>();
const auto& mkldnn_engine = dev_ctx.GetEngine();
const phi::DenseTensor* input = ctx.Input<phi::DenseTensor>("X");
phi::DenseTensor* output = ctx.Output<phi::DenseTensor>("Out");
bool is_inplaced = input->IsSharedBufferWith(*output);
const int axis =
phi::funcs::CanonicalAxis(ctx.Attr<int>("axis"), input->dims().size());
SoftmaxMKLDNNHandler<T> handler(
mkldnn_engine, ctx.GetPlace(), input, output, axis);
auto softmax_src_memory_p = handler.AcquireSrcMemory(input);
// For Inplace src and and dst are the same memory object
std::shared_ptr<dnnl::memory> softmax_dst_memory_p = nullptr;
if (is_inplaced) {
softmax_dst_memory_p = softmax_src_memory_p;
output->mutable_data<T>(ctx.GetPlace());
} else {
softmax_dst_memory_p = handler.AcquireDstMemory(output);
}
auto softmax_p = handler.AcquireForwardPrimitive();
auto& astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
softmax_p->execute(astream,
{{DNNL_ARG_SRC, *softmax_src_memory_p},
{DNNL_ARG_DST, *softmax_dst_memory_p}});
astream.wait();
const bool is_test = ctx.Attr<bool>("is_test");
if (!is_test) {
T* output_data = output->mutable_data<T>(ctx.GetPlace());
std::for_each(output_data, &output_data[output->numel()], [](T& val) {
val = std::max(val, static_cast<T>(exp(-64)));
});
}
output->set_mem_desc(softmax_dst_memory_p->get_desc());
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_KERNEL(softmax,
MKLDNN,
::paddle::platform::CPUPlace,
ops::SoftmaxMKLDNNKernel<float>,
ops::SoftmaxMKLDNNKernel<paddle::platform::bfloat16>);
...@@ -34,7 +34,7 @@ USE_OP_DEVICE_KERNEL(elementwise_mul, MKLDNN); ...@@ -34,7 +34,7 @@ USE_OP_DEVICE_KERNEL(elementwise_mul, MKLDNN);
USE_OP_ITSELF(relu); USE_OP_ITSELF(relu);
PD_DECLARE_KERNEL(relu, OneDNN, ONEDNN); PD_DECLARE_KERNEL(relu, OneDNN, ONEDNN);
USE_OP_ITSELF(softmax); USE_OP_ITSELF(softmax);
USE_OP_DEVICE_KERNEL(softmax, MKLDNN); PD_DECLARE_KERNEL(softmax, OneDNN, ONEDNN);
USE_OP_ITSELF(conv2d); USE_OP_ITSELF(conv2d);
PD_DECLARE_KERNEL(conv2d, OneDNN, ONEDNN); PD_DECLARE_KERNEL(conv2d, OneDNN, ONEDNN);
......
...@@ -32,8 +32,7 @@ USE_OP_DEVICE_KERNEL(elementwise_add, MKLDNN); ...@@ -32,8 +32,7 @@ USE_OP_DEVICE_KERNEL(elementwise_add, MKLDNN);
USE_OP_ITSELF(relu); USE_OP_ITSELF(relu);
PD_DECLARE_KERNEL(relu, OneDNN, ONEDNN); PD_DECLARE_KERNEL(relu, OneDNN, ONEDNN);
USE_OP_ITSELF(softmax); USE_OP_ITSELF(softmax);
USE_OP_DEVICE_KERNEL(softmax, MKLDNN); PD_DECLARE_KERNEL(softmax, OneDNN, ONEDNN);
PD_DECLARE_KERNEL(softmax, CPU, ALL_LAYOUT); PD_DECLARE_KERNEL(softmax, CPU, ALL_LAYOUT);
namespace paddle { namespace paddle {
......
...@@ -753,12 +753,19 @@ class SoftmaxOneDNNHandler ...@@ -753,12 +753,19 @@ class SoftmaxOneDNNHandler
public: public:
SoftmaxOneDNNHandler(const dnnl::engine onednn_engine, SoftmaxOneDNNHandler(const dnnl::engine onednn_engine,
Place cpu_place, Place cpu_place,
int axis,
const DenseTensor* x, const DenseTensor* x,
int axis) DenseTensor* out)
: OneDNNHandlerNoCachingT<T, : OneDNNHandlerNoCachingT<T,
dnnl::softmax_forward, dnnl::softmax_forward,
dnnl::softmax_backward>(onednn_engine, dnnl::softmax_backward>(onednn_engine,
cpu_place) { cpu_place) {
PADDLE_ENFORCE_EQ(
x->dims(),
out->dims(),
phi::errors::InvalidArgument(
"The shape of input and output tensor must be identical."));
const int canonical_axis = funcs::CanonicalAxis(axis, x->dims().size()); const int canonical_axis = funcs::CanonicalAxis(axis, x->dims().size());
this->AcquireForwardPrimitiveDescriptor( this->AcquireForwardPrimitiveDescriptor(
dnnl::prop_kind::forward_scoring, x->mem_desc(), canonical_axis); dnnl::prop_kind::forward_scoring, x->mem_desc(), canonical_axis);
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/softmax_kernel.h"
#include "paddle/phi/backends/onednn/onednn_reuse.h"
#include "paddle/phi/core/kernel_registry.h"
namespace phi {
template <typename T, typename Context>
void SoftmaxKernel(const Context& dev_ctx,
const DenseTensor& x,
int axis,
DenseTensor* out) {
funcs::SoftmaxOneDNNHandler<T> handler(
dev_ctx.GetEngine(), dev_ctx.GetPlace(), axis, &x, out);
auto src_memory_p = handler.AcquireSrcMemory(&x);
std::shared_ptr<dnnl::memory> dst_memory_p = nullptr;
if (x.IsSharedBufferWith(*out)) {
dst_memory_p = src_memory_p;
dev_ctx.template Alloc<T>(out);
} else {
dst_memory_p = handler.AcquireDstMemory(out);
}
auto softmax_p = handler.AcquireForwardPrimitive();
auto& astream = OneDNNContext::tls().get_stream();
softmax_p->execute(
astream, {{DNNL_ARG_SRC, *src_memory_p}, {DNNL_ARG_DST, *dst_memory_p}});
astream.wait();
bool is_test = dev_ctx.HasDnnAttr("is_test")
? PADDLE_GET_CONST(bool, dev_ctx.GetDnnAttr("is_test"))
: false;
if (!is_test) {
T* out_data = dev_ctx.template Alloc<T>(out);
std::for_each(out_data, &out_data[out->numel()], [](T& val) {
val = std::max(val, static_cast<T>(exp(-64)));
});
}
out->set_mem_desc(dst_memory_p->get_desc());
}
} // namespace phi
PD_REGISTER_KERNEL(
softmax, OneDNN, ONEDNN, phi::SoftmaxKernel, float, phi::dtype::bfloat16) {}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册