未验证 提交 519e7426 编写于 作者: J jakpiase 提交者: GitHub

Added optimization pass for oneDNN layernorm kernel (#47782)

* optimization for ln

* fix

* added output to gpd

* added formatting

* fix
上级 626d7bcb
...@@ -178,6 +178,7 @@ if(WITH_MKLDNN) ...@@ -178,6 +178,7 @@ if(WITH_MKLDNN)
pass_library(elt_act_mkldnn_fuse_pass inference DIR mkldnn) pass_library(elt_act_mkldnn_fuse_pass inference DIR mkldnn)
pass_library(matmul_elementwise_add_mkldnn_fuse_pass inference DIR mkldnn) pass_library(matmul_elementwise_add_mkldnn_fuse_pass inference DIR mkldnn)
pass_library(matmul_activation_mkldnn_fuse_pass inference DIR mkldnn) pass_library(matmul_activation_mkldnn_fuse_pass inference DIR mkldnn)
pass_library(layer_norm_onednn_optimization_pass inference DIR mkldnn)
pass_library(operator_scale_onednn_fuse_pass inference DIR mkldnn) pass_library(operator_scale_onednn_fuse_pass inference DIR mkldnn)
pass_library(squeeze2_transpose2_onednn_fuse_pass inference DIR mkldnn) pass_library(squeeze2_transpose2_onednn_fuse_pass inference DIR mkldnn)
pass_library(operator_unsqueeze2_onednn_fuse_pass inference DIR mkldnn) pass_library(operator_unsqueeze2_onednn_fuse_pass inference DIR mkldnn)
......
...@@ -940,6 +940,29 @@ PDNode *patterns::ConvBN::operator()(paddle::framework::ir::PDNode *conv_input, ...@@ -940,6 +940,29 @@ PDNode *patterns::ConvBN::operator()(paddle::framework::ir::PDNode *conv_input,
return bn_out_var; return bn_out_var;
} }
PDNode *patterns::LayerNormShiftScale::operator()() {
auto layer_norm_in = pattern->NewNode(layer_norm_in_repr())
->AsInput()
->assert_is_op_input("layer_norm", "X");
auto layer_norm_bias = pattern->NewNode(layer_norm_bias_repr())
->AsInput()
->assert_is_op_input("layer_norm", "Bias");
auto layer_norm_scale = pattern->NewNode(layer_norm_scale_repr())
->AsInput()
->assert_is_op_input("layer_norm", "Scale");
auto layer_norm_op =
pattern->NewNode(layer_norm_op_repr())->assert_is_op("layer_norm");
auto layer_norm_out = pattern->NewNode(layer_norm_out_repr())
->assert_is_op_output("layer_norm", "Y")
->AsOutput();
layer_norm_op->LinksFrom({layer_norm_in, layer_norm_bias, layer_norm_scale})
.LinksTo({layer_norm_out});
return layer_norm_out;
}
PDNode *patterns::OperatorActivation::operator()( PDNode *patterns::OperatorActivation::operator()(
const std::string &operator_type, const std::string &activation_type) { const std::string &operator_type, const std::string &activation_type) {
auto *preceding_op = auto *preceding_op =
......
...@@ -526,6 +526,19 @@ struct ConvBN : public PatternBase { ...@@ -526,6 +526,19 @@ struct ConvBN : public PatternBase {
PATTERN_DECL_NODE(bn_saved_variance); PATTERN_DECL_NODE(bn_saved_variance);
}; };
struct LayerNormShiftScale : public PatternBase {
LayerNormShiftScale(PDPattern* pattern, const std::string& name_scope)
: PatternBase(pattern, name_scope, "layer_norm_shift_scale") {}
PDNode* operator()();
PATTERN_DECL_NODE(layer_norm_in);
PATTERN_DECL_NODE(layer_norm_op);
PATTERN_DECL_NODE(layer_norm_bias);
PATTERN_DECL_NODE(layer_norm_scale);
PATTERN_DECL_NODE(layer_norm_out);
};
struct OperatorActivation : public PatternBase { struct OperatorActivation : public PatternBase {
OperatorActivation(PDPattern* pattern, const std::string& name_scope) OperatorActivation(PDPattern* pattern, const std::string& name_scope)
: PatternBase(pattern, name_scope, "operator_activation") {} : PatternBase(pattern, name_scope, "operator_activation") {}
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/mkldnn/layer_norm_onednn_optimization_pass.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/mkldnn_reuse.h"
#include "paddle/fluid/string/pretty_log.h"
namespace paddle {
namespace framework {
namespace ir {
using string::PrettyLogDetail;
void LayerNormOneDNNOptimizationPass::ApplyImpl(Graph *graph) const {
PADDLE_ENFORCE_NOT_NULL(
graph, platform::errors::InvalidArgument("Graph cannot be nullptr."));
FusePassBase::Init("layer_norm_onednn_optimization_pass", graph);
GraphPatternDetector gpd;
patterns::LayerNormShiftScale layer_norm_shift_scale_pattern(
gpd.mutable_pattern(), "layer_norm_onednn_optimization_pass");
layer_norm_shift_scale_pattern();
int found_layer_norm = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t &subgraph,
Graph *g) {
GET_IR_NODE_FROM_SUBGRAPH(
layer_norm_op, layer_norm_op, layer_norm_shift_scale_pattern);
GET_IR_NODE_FROM_SUBGRAPH(
layer_norm_bias, layer_norm_bias, layer_norm_shift_scale_pattern);
GET_IR_NODE_FROM_SUBGRAPH(
layer_norm_scale, layer_norm_scale, layer_norm_shift_scale_pattern);
if (layer_norm_op->Op()->HasAttr("use_mkldnn") &&
!(PADDLE_GET_CONST(bool, layer_norm_op->Op()->GetAttr("use_mkldnn")))) {
VLOG(4) << "Only oneDNN version of layer_norm can be optimized to "
"include Bias and Shift in a single tensor.";
return;
}
auto *scope = param_scope();
auto ln_bias_name = layer_norm_op->Op()->Input("Bias");
auto ln_scale_name = layer_norm_op->Op()->Input("Scale");
auto *ln_bias_tensor =
scope->FindVar(ln_bias_name[0])->GetMutable<phi::DenseTensor>();
auto *ln_scale_tensor =
scope->FindVar(ln_scale_name[0])->GetMutable<phi::DenseTensor>();
const int channels = ln_bias_tensor->dims()[0];
VarDesc scale_shift_desc(patterns::PDNodeName(
"layer_norm_onednn_optimization_pass", "ScaleShift"));
scale_shift_desc.SetShape({channels * 2});
scale_shift_desc.SetDataType(
framework::TransToProtoVarType(ln_bias_tensor->dtype()));
scale_shift_desc.SetPersistable(true);
auto scale_shift_node = g->CreateVarNode(&scale_shift_desc);
auto *scale_shift_tensor =
scope->Var(scale_shift_node->Name())->GetMutable<phi::DenseTensor>();
scale_shift_tensor->Resize(phi::make_ddim({channels * 2}));
memcpy(scale_shift_tensor->mutable_data<float>(platform::CPUPlace()),
ln_scale_tensor->data<float>(),
channels * sizeof(float));
memcpy(scale_shift_tensor->data<float>() + channels,
ln_bias_tensor->data<float>(),
channels * sizeof(float));
layer_norm_op->Op()->SetInput("ScaleShift", {scale_shift_node->Name()});
IR_NODE_LINK_TO(scale_shift_node, layer_norm_op);
found_layer_norm++;
};
gpd(graph, handler);
AddStatis(found_layer_norm);
if ((!Has("disable_logs") || !Get<bool>("disable_logs")) &&
found_layer_norm > 0)
PrettyLogDetail("--- optimized %d layer_norms by merging Scale and Bias",
found_layer_norm);
}
} // namespace ir
} // namespace framework
} // namespace paddle
REGISTER_PASS(layer_norm_onednn_optimization_pass,
paddle::framework::ir::LayerNormOneDNNOptimizationPass);
REGISTER_PASS_CAPABILITY(layer_norm_onednn_optimization_pass)
.AddCombination(
paddle::framework::compatible::OpVersionComparatorCombination().GE(
"layer_norm", 0));
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph.h"
namespace paddle {
namespace framework {
namespace ir {
class LayerNormOneDNNOptimizationPass : public FusePassBase {
public:
virtual ~LayerNormOneDNNOptimizationPass() {}
protected:
void ApplyImpl(Graph *graph) const override;
};
} // namespace ir
} // namespace framework
} // namespace paddle
...@@ -346,6 +346,7 @@ void CpuPassStrategy::EnableMKLDNN() { ...@@ -346,6 +346,7 @@ void CpuPassStrategy::EnableMKLDNN() {
"softplus_activation_mkldnn_fuse_pass", // "softplus_activation_mkldnn_fuse_pass", //
"shuffle_channel_mkldnn_detect_pass", // "shuffle_channel_mkldnn_detect_pass", //
"elt_act_mkldnn_fuse_pass", // "elt_act_mkldnn_fuse_pass", //
"layer_norm_onednn_optimization_pass", //
"operator_scale_onednn_fuse_pass", // "operator_scale_onednn_fuse_pass", //
"operator_unsqueeze2_onednn_fuse_pass", // "operator_unsqueeze2_onednn_fuse_pass", //
"operator_reshape2_onednn_fuse_pass", // "operator_reshape2_onednn_fuse_pass", //
...@@ -443,6 +444,7 @@ void CpuPassStrategy::EnableMkldnnInt8() { ...@@ -443,6 +444,7 @@ void CpuPassStrategy::EnableMkldnnInt8() {
passes_.push_back("scale_matmul_fuse_pass"); passes_.push_back("scale_matmul_fuse_pass");
passes_.push_back("reshape_transpose_matmul_mkldnn_fuse_pass"); passes_.push_back("reshape_transpose_matmul_mkldnn_fuse_pass");
passes_.push_back("matmul_elementwise_add_mkldnn_fuse_pass"); passes_.push_back("matmul_elementwise_add_mkldnn_fuse_pass");
passes_.push_back("layer_norm_onednn_optimization_pass");
passes_.push_back("operator_scale_onednn_fuse_pass"); passes_.push_back("operator_scale_onednn_fuse_pass");
passes_.push_back("operator_unsqueeze2_onednn_fuse_pass"); passes_.push_back("operator_unsqueeze2_onednn_fuse_pass");
passes_.push_back("operator_reshape2_onednn_fuse_pass"); passes_.push_back("operator_reshape2_onednn_fuse_pass");
......
...@@ -41,19 +41,32 @@ class LayerNormOneDNNHandler ...@@ -41,19 +41,32 @@ class LayerNormOneDNNHandler
} }
std::shared_ptr<dnnl::memory> AcquireScaleShiftMemory( std::shared_ptr<dnnl::memory> AcquireScaleShiftMemory(
const phi::DenseTensor* scale, const phi::DenseTensor* shift) { const phi::DenseTensor* scale,
// OneDNN requires a single piece of memory for scale and shift data const phi::DenseTensor* shift,
const unsigned int C = phi::vectorize(scale->dims())[0]; const framework::ExecutionContext& ctx) {
// OneDNN requires a single piece of memory for scale and shift data. During
auto scaleshift_memory = // inference both pieces of memory are merged inside
this->AcquireMemoryFromPrimitive(this->fwd_pd_->weights_desc()); // layer_norm_onednn_optimization_pass, but during training we have to
// manually copy them into new memory buffer
auto mem_data_handle = auto* scaleshift = ctx.Input<phi::DenseTensor>("ScaleShift");
reinterpret_cast<float*>(scaleshift_memory->get_data_handle()); if (scaleshift) {
std::copy(scale->data<float>(), scale->data<float>() + C, mem_data_handle); return this->AcquireMemoryFromPrimitive(
std::copy( this->fwd_pd_->weights_desc(),
shift->data<float>(), shift->data<float>() + C, mem_data_handle + C); platform::to_void_cast(scaleshift->data<float>()));
return scaleshift_memory; } else {
const unsigned int C = phi::vectorize(scale->dims())[0];
auto scaleshift_memory =
this->AcquireMemoryFromPrimitive(this->fwd_pd_->weights_desc());
auto mem_data_handle =
reinterpret_cast<float*>(scaleshift_memory->get_data_handle());
std::copy(
scale->data<float>(), scale->data<float>() + C, mem_data_handle);
std::copy(
shift->data<float>(), shift->data<float>() + C, mem_data_handle + C);
return scaleshift_memory;
}
} }
std::shared_ptr<dnnl::memory> AcquireMeanMemory(phi::DenseTensor* mean) { std::shared_ptr<dnnl::memory> AcquireMeanMemory(phi::DenseTensor* mean) {
...@@ -77,9 +90,9 @@ class LayerNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -77,9 +90,9 @@ class LayerNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
auto* x = ctx.Input<phi::DenseTensor>("X"); auto* x = ctx.Input<phi::DenseTensor>("X");
auto* out = ctx.Output<phi::DenseTensor>("Y");
auto* scale = ctx.Input<phi::DenseTensor>("Scale"); auto* scale = ctx.Input<phi::DenseTensor>("Scale");
auto* bias = ctx.Input<phi::DenseTensor>("Bias"); auto* bias = ctx.Input<phi::DenseTensor>("Bias");
auto* out = ctx.Output<phi::DenseTensor>("Y");
const float epsilon = ctx.Attr<float>("epsilon"); const float epsilon = ctx.Attr<float>("epsilon");
const auto begin_norm_axis = ctx.Attr<int>("begin_norm_axis"); const auto begin_norm_axis = ctx.Attr<int>("begin_norm_axis");
...@@ -129,7 +142,7 @@ class LayerNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -129,7 +142,7 @@ class LayerNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
if (with_scaleshift) { if (with_scaleshift) {
std::shared_ptr<dnnl::memory> scaleshift_memory = std::shared_ptr<dnnl::memory> scaleshift_memory =
handler.AcquireScaleShiftMemory(scale, bias); handler.AcquireScaleShiftMemory(scale, bias, ctx);
args.insert({DNNL_ARG_SCALE_SHIFT, *scaleshift_memory}); args.insert({DNNL_ARG_SCALE_SHIFT, *scaleshift_memory});
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册