From 4d5a5533279496e45ba900648a4a0f85692a9e68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C5=82awomir=20Siwek?= Date: Thu, 15 Dec 2022 03:39:16 +0100 Subject: [PATCH] [PHI decoupling] Remove fluid imports from MKLDNN code (#48981) * fix wrong handler name * mkldnn_engine -> onednn_engine * remove fluid/errors.h imports * remove fluid/enforce.h imports * remove note and unnecessary import * remove fluid/pretty_log.h imports * remove fluid/place.h imports * remove fluid/data_layout_transform.h imports * remove fluid/device_context.h imports * remove mkldnn_helper code * remove fluid/mkldnn_reuse.h imports * pretty_log import --- .../ir/mkldnn/batch_norm_act_fuse_pass.cc | 4 +- .../mkldnn/batch_norm_act_fuse_pass_tester.cc | 1 - .../compute_propagate_scales_mkldnn_pass.cc | 6 +- ...ute_propagate_scales_mkldnn_pass_tester.cc | 26 +- .../conv_activation_mkldnn_fuse_pass.cc | 4 +- .../conv_affine_channel_mkldnn_fuse_pass.cc | 6 +- .../ir/mkldnn/conv_bias_mkldnn_fuse_pass.cc | 6 +- .../conv_bias_mkldnn_fuse_pass_tester.cc | 4 +- .../conv_elementwise_add_mkldnn_fuse_pass.cc | 2 +- .../framework/ir/mkldnn/cpu_bfloat16_pass.cc | 2 +- .../ir/mkldnn/cpu_bfloat16_pass_tester.cc | 18 +- .../ir/mkldnn/cpu_bfloat16_placement_pass.cc | 2 +- .../framework/ir/mkldnn/cpu_quantize_pass.cc | 5 +- .../ir/mkldnn/cpu_quantize_pass_tester.cc | 4 +- .../ir/mkldnn/cpu_quantize_squash_pass.cc | 4 +- .../mkldnn/cpu_quantize_squash_pass_tester.cc | 4 +- .../ir/mkldnn/elt_act_mkldnn_fuse_pass.cc | 6 +- .../ir/mkldnn/fc_act_mkldnn_fuse_pass.cc | 4 +- .../fc_elementwise_add_mkldnn_fuse_pass.cc | 2 +- .../framework/ir/mkldnn/fc_mkldnn_pass.cc | 4 +- .../int8_scale_calculation_mkldnn_pass.cc | 2 +- .../ir/mkldnn/interpolate_mkldnn_pass.cc | 2 +- .../layer_norm_onednn_optimization_pass.cc | 6 +- .../matmul_activation_mkldnn_fuse_pass.cc | 4 +- ...matmul_elementwise_add_mkldnn_fuse_pass.cc | 2 +- ...tmul_transpose_reshape_mkldnn_fuse_pass.cc | 4 +- .../mkldnn/mkldnn_conv_bn_fuse_pass_tester.cc | 6 +- .../framework/ir/mkldnn/mkldnn_pass_util.h | 2 +- .../ir/mkldnn/multi_gru_fuse_pass.cc | 9 +- .../ir/mkldnn/multi_gru_seq_fuse_pass.cc | 9 +- .../operator_reshape2_onednn_fuse_pass.cc | 4 +- .../mkldnn/operator_scale_onednn_fuse_pass.cc | 4 +- .../operator_unsqueeze2_onednn_fuse_pass.cc | 4 +- .../mkldnn/params_quantization_mkldnn_pass.cc | 2 +- .../params_quantization_mkldnn_pass_tester.cc | 2 +- .../ir/mkldnn/quant_dequant_mkldnn_pass.cc | 15 +- ...shape_transpose_matmul_mkldnn_fuse_pass.cc | 4 +- ...ranspose_matmul_mkldnn_fuse_pass_tester.cc | 2 +- .../ir/mkldnn/scale_matmul_fuse_pass.cc | 2 +- ...uffle_channel_mkldnn_detect_pass_tester.cc | 2 +- .../softplus_activation_mkldnn_fuse_pass.cc | 6 +- .../squeeze2_transpose2_onednn_fuse_pass.cc | 4 +- .../fluid/inference/api/mkldnn_quantizer.cc | 6 +- .../inference/api/mkldnn_quantizer_tester.cc | 34 +- ...nalyzer_detect_functional_mkldnn_tester.cc | 5 +- .../fused/mkldnn/fusion_gru_mkldnn_op.cc | 12 +- .../fused/mkldnn/fusion_lstm_mkldnn_op.cc | 11 +- .../fused/mkldnn/fusion_rnn_mkldnn.h | 6 +- .../fused/mkldnn/multi_gru_mkldnn_op.cc | 7 +- .../operators/mkldnn/dequantize_mkldnn_op.cc | 11 +- paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc | 138 ++++++- .../operators/mkldnn/interpolate_mkldnn_op.cc | 12 +- .../operators/mkldnn/layer_norm_mkldnn_op.cc | 9 +- .../fluid/operators/mkldnn/lrn_mkldnn_op.cc | 26 +- .../operators/mkldnn/matmul_v2_mkldnn_op.cc | 240 ++++++++++++- .../operators/mkldnn/quantize_mkldnn_op.cc | 6 +- .../operators/mkldnn/requantize_mkldnn_op.cc | 3 +- .../operators/mkldnn/reshape_mkldnn_op.cc | 21 +- .../mkldnn/shuffle_channel_mkldnn_op.cc | 9 +- .../operators/mkldnn/test_mkldnn_caching.cc | 11 +- .../mkldnn/test_mkldnn_op_inplace.cc | 9 +- .../operators/mkldnn/test_mkldnn_op_nhwc.cc | 13 +- .../operators/mkldnn/transpose_mkldnn_op.cc | 7 +- paddle/fluid/operators/pool_op.h | 3 - paddle/fluid/platform/mkldnn_helper.h | 3 +- paddle/fluid/platform/mkldnn_reuse.h | 336 ------------------ paddle/phi/kernels/onednn/conv_function.h | 2 +- paddle/phi/kernels/onednn/conv_handler.h | 4 +- 68 files changed, 576 insertions(+), 579 deletions(-) delete mode 100644 paddle/fluid/platform/mkldnn_reuse.h diff --git a/paddle/fluid/framework/ir/mkldnn/batch_norm_act_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/batch_norm_act_fuse_pass.cc index daaef03f11..230971a2dd 100644 --- a/paddle/fluid/framework/ir/mkldnn/batch_norm_act_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/batch_norm_act_fuse_pass.cc @@ -16,8 +16,8 @@ #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/phi/core/enforce.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/batch_norm_act_fuse_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/batch_norm_act_fuse_pass_tester.cc index 9e989f343b..ab03c73adc 100644 --- a/paddle/fluid/framework/ir/mkldnn/batch_norm_act_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/batch_norm_act_fuse_pass_tester.cc @@ -19,7 +19,6 @@ #include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/program_desc.h" -#include "paddle/fluid/platform/errors.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.cc b/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.cc index 3fb7636f06..4e2c61ce7d 100644 --- a/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.cc @@ -28,7 +28,7 @@ namespace ir { void ComputePropagateScalesMkldnnPass::GetTensorFromVector( const std::vector& data_v, phi::DenseTensor* tensor) const { const int size = static_cast(data_v.size()); - auto* data = tensor->mutable_data({size}, platform::CPUPlace()); + auto* data = tensor->mutable_data({size}, phi::CPUPlace()); for (int i = 0; i < size; i++) { data[i] = data_v[i]; } @@ -123,7 +123,7 @@ void ComputePropagateScalesMkldnnPass::ComputeVarScales( std::vector reshape_dims = {dims[0], volume}; tmp_tensor.Resize(phi::make_ddim(reshape_dims)); auto* weight_data = weight_tensor->data(); - auto* tmp_data = tmp_tensor.mutable_data(platform::CPUPlace()); + auto* tmp_data = tmp_tensor.mutable_data(phi::CPUPlace()); for (int i = 0; i < weight_tensor->numel(); i++) { tmp_data[i] = std::abs(weight_data[i]); } @@ -365,7 +365,7 @@ void ComputePropagateScalesMkldnnPass::UpdateScaleOpInOutScales( auto pair = iter->second; const auto tensor = pair.second; tmp_tensor.Resize(tensor.dims()); - auto* data = tmp_tensor.mutable_data(platform::CPUPlace()); + auto* data = tmp_tensor.mutable_data(phi::CPUPlace()); auto* src_data = tensor.data(); for (int i = 0; i < tensor.numel(); i++) { if (out_iter != var_quant_scales->end()) { diff --git a/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass_tester.cc index 39ecfd2c0e..fc4ca24b2a 100644 --- a/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass_tester.cc @@ -17,7 +17,7 @@ #include "paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.h" #include "paddle/fluid/framework/naive_executor.h" -#include "paddle/fluid/platform/place.h" +#include "paddle/phi/common/place.h" namespace paddle { namespace framework { @@ -119,7 +119,7 @@ class ComputePropagateScalesMkldnnPassTest : public testing::Test { const ProgramDesc& prog, Scope* scope, const std::initializer_list& variable_names) { - auto place = paddle::platform::CPUPlace(); + auto place = phi::CPUPlace(); NaiveExecutor exe{place}; exe.CreateVariables(prog, 0, true, scope); @@ -148,19 +148,19 @@ class ComputePropagateScalesMkldnnPassTest : public testing::Test { auto* wx_tensor = wx_var->GetMutable(); wx_tensor->Resize(phi::make_dim(wx.size(), wx[0].size())); for (size_t i = 0; i < wx.size(); i++) - std::copy(begin(wx[i]), - end(wx[i]), - wx_tensor->mutable_data(platform::CPUPlace()) + - i * wx[0].size()); + std::copy( + begin(wx[i]), + end(wx[i]), + wx_tensor->mutable_data(phi::CPUPlace()) + i * wx[0].size()); auto* wh_var = scope.FindVar(wh_var_names); auto* wh_tensor = wh_var->GetMutable(); wh_tensor->Resize(phi::make_dim(wh.size(), wh[0].size())); for (size_t i = 0; i < wh.size(); i++) - std::copy(begin(wh[i]), - end(wh[i]), - wh_tensor->mutable_data(platform::CPUPlace()) + - i * wh[0].size()); + std::copy( + begin(wh[i]), + end(wh[i]), + wh_tensor->mutable_data(phi::CPUPlace()) + i * wh[0].size()); if (type == "gru") { ComputeGruWeightScales( graph, &scope, wx_name, wh_name, &var_quant_scales); @@ -283,7 +283,7 @@ TEST_F(ComputePropagateScalesMkldnnPassTest, get_scales_function) { var_tensor.Resize(phi::make_dim(values.size(), 1)); std::copy(begin(values), end(values), - var_tensor.mutable_data(platform::CPUPlace())); + var_tensor.mutable_data(phi::CPUPlace())); std::vector results = GetScales(&var_tensor, 0); ASSERT_EQ(results.size(), std::size_t(1)); @@ -310,7 +310,7 @@ TEST_F(ComputePropagateScalesMkldnnPassTest, compute_var_scales) { weight_tensor->Resize(phi::make_dim(1, values.size())); std::copy(begin(values), end(values), - weight_tensor->mutable_data(platform::CPUPlace())); + weight_tensor->mutable_data(phi::CPUPlace())); auto max_val = *std::max_element(values.begin(), values.end()); @@ -338,7 +338,7 @@ TEST_F(ComputePropagateScalesMkldnnPassTest, update_relu_output_scales) { StringPairMap var_quant_scales; for (auto& var_name : conv_variable_names) { phi::DenseTensor tensor; - auto* data = tensor.mutable_data({1}, platform::CPUPlace()); + auto* data = tensor.mutable_data({1}, phi::CPUPlace()); data[0] = 10; auto pair = std::make_pair(false, tensor); var_quant_scales.insert(std::make_pair(var_name, pair)); diff --git a/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.cc index a673aafadc..5ac92e5bb0 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.cc @@ -15,8 +15,8 @@ #include "paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/platform/mkldnn_reuse.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/phi/backends/onednn/onednn_reuse.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/conv_affine_channel_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/conv_affine_channel_mkldnn_fuse_pass.cc index 5bf025af11..07851350ca 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_affine_channel_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/conv_affine_channel_mkldnn_fuse_pass.cc @@ -80,7 +80,7 @@ void recompute_bias_and_weights(const Scope* scope, ac_bias_tensor.data(), ac_bias_tensor.numel(), 1); EigenVectorArrayMap eltwise_y_in_array( - eltwise_y_in_tensor->mutable_data(platform::CPUPlace()), + eltwise_y_in_tensor->mutable_data(phi::CPUPlace()), eltwise_y_in_tensor->numel(), 1); @@ -91,7 +91,7 @@ void recompute_bias_and_weights(const Scope* scope, scope->FindVar(conv_weight->Name())->GetMutable(); auto weights_shape = weights->dims(); auto weights_shape_2d = phi::flatten_to_2d(weights_shape, 1); - auto* weights_data = weights->mutable_data(platform::CPUPlace()); + auto* weights_data = weights->mutable_data(phi::CPUPlace()); EigenMatrixArrayMap weights_array_2d( weights_data, weights_shape_2d[0], weights_shape_2d[1]); @@ -233,7 +233,7 @@ void ConvAffineChannelFusePass::ApplyImpl(ir::Graph* graph) const { auto* eltwise_y_in_tensor = scope->Var(eltwise_y_in_node->Name())->GetMutable(); eltwise_y_in_tensor->Resize(ac_bias_tensor->dims()); - std::fill_n(eltwise_y_in_tensor->mutable_data(platform::CPUPlace()), + std::fill_n(eltwise_y_in_tensor->mutable_data(phi::CPUPlace()), eltwise_y_in_tensor->numel(), 0.0f); diff --git a/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.cc index 13cd875431..314991d347 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.cc @@ -19,8 +19,8 @@ #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/phi/core/enforce.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { @@ -263,7 +263,7 @@ phi::DenseTensor tensor_apply_eltwise(const phi::DenseTensor& vec_a, vec_y.Resize(vec_a.dims()); const float* a = vec_a.data(); const float* b = vec_b.data(); - float* y = vec_y.mutable_data(platform::CPUPlace()); + float* y = vec_y.mutable_data(phi::CPUPlace()); for (int i = 0; i < vec_a.numel(); i++) { y[i] = f(a[i], b[i]); } diff --git a/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc index c5ee20b4b0..1cd1d0325a 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc @@ -19,7 +19,7 @@ #include "paddle/fluid/framework/op_proto_maker.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/imperative/type_defs.h" -#include "paddle/fluid/platform/place.h" +#include "paddle/phi/common/place.h" namespace paddle { namespace framework { @@ -112,7 +112,7 @@ void InitTensorHolder(Scope* scope, void MainTest(bool convWithExistingBias) { auto prog = BuildProgramDesc(convWithExistingBias); std::unique_ptr graph(new ir::Graph(prog)); - auto place = paddle::platform::CPUPlace(); + auto place = phi::CPUPlace(); NaiveExecutor exe{place}; Scope scope; // Init scope, as it is used in pass diff --git a/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.cc index dcaba82106..d43c092a38 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.cc @@ -16,7 +16,7 @@ #include "paddle/fluid/framework/ir/graph_traits.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.cc b/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.cc index 12a673b89d..528ba57472 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.cc @@ -18,7 +18,7 @@ limitations under the License. */ #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass_tester.cc index 54078135cf..770a3a7a1d 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass_tester.cc @@ -17,7 +17,6 @@ #include "paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.h" #include "paddle/fluid/framework/naive_executor.h" #include "paddle/fluid/imperative/type_defs.h" -#include "paddle/fluid/platform/place.h" namespace paddle { namespace framework { @@ -68,23 +67,16 @@ void SetOp(ProgramDesc* prog, static const std::initializer_list variable_names{ "z", "a", "b", "c", "d", "e", "f", "g", "h", "i"}; -void PreparePass(std::unique_ptr& graph, - int* original_nodes_num, - int* current_nodes_num) { - auto pass = PassRegistry::Instance().Get("cpu_bfloat16_pass"); - - *original_nodes_num = graph->Nodes().size(); - graph.reset(pass->Apply(graph.release())); - *current_nodes_num = graph->Nodes().size(); -} - void MainTest(const ProgramDesc& prog, const int& quant_count, const int& dequant_count, const int& added_nodes_count) { auto graph = std::make_unique(prog); - int original_nodes_num, current_nodes_num; - PreparePass(graph, &original_nodes_num, ¤t_nodes_num); + auto pass = PassRegistry::Instance().Get("cpu_bfloat16_pass"); + + int original_nodes_num = graph->Nodes().size(); + graph.reset(pass->Apply(graph.release())); + int current_nodes_num = graph->Nodes().size(); int quantize_nodes_count = 0; int dequantize_nodes_count = 0; diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.cc b/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.cc index fbdafbfe30..8741b00f68 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.cc @@ -19,7 +19,7 @@ limitations under the License. */ #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/platform/mkldnn_helper.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc index b650821a3d..a47bdfef32 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc @@ -20,7 +20,7 @@ #include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" #include "paddle/fluid/platform/mkldnn_helper.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { @@ -1204,8 +1204,7 @@ void CPUQuantizePass::QuantizeMultiGru(Graph* graph) const { auto* w_scale_tensor_dst = scope->Var(w_scale_node->Name())->GetMutable(); w_scale_tensor_dst->Resize(scale_tensor_src.dims()); - auto* dst_data = - w_scale_tensor_dst->mutable_data(platform::CPUPlace()); + auto* dst_data = w_scale_tensor_dst->mutable_data(phi::CPUPlace()); EigenVectorArrayMapFloat eigen_tensor_dst{dst_data, w_scale_tensor_dst->numel()}; eigen_tensor_dst = diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc index 55de1efed7..e340bb625d 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc @@ -19,7 +19,7 @@ #include "paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h" // NOLINT #include "paddle/fluid/framework/naive_executor.h" #include "paddle/fluid/imperative/type_defs.h" -#include "paddle/fluid/platform/place.h" +#include "paddle/phi/common/place.h" namespace paddle { namespace framework { @@ -146,7 +146,7 @@ void PreparePass(std::unique_ptr* graph, int* current_nodes_num, std::string var_without_scale = "", std::string var_signed = "") { - auto place = paddle::platform::CPUPlace(); + auto place = phi::CPUPlace(); NaiveExecutor exe{place}; Scope scope; exe.CreateVariables(prog, 0, true, &scope); diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.cc index 69cf01278b..3654c8bd2d 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.cc @@ -18,9 +18,9 @@ #include #include -#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/mkldnn_helper.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/phi/core/enforce.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass_tester.cc index 11f6098819..f94456f028 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass_tester.cc @@ -16,7 +16,7 @@ #include "paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.h" #include "paddle/fluid/framework/naive_executor.h" -#include "paddle/fluid/platform/place.h" +#include "paddle/phi/common/place.h" namespace paddle { namespace framework { @@ -722,7 +722,7 @@ void InitTensorHolder(Scope* scope, } void PrepareGraph(std::unique_ptr* graph, const ProgramDesc& prog) { - auto place = paddle::platform::CPUPlace(); + auto place = phi::CPUPlace(); NaiveExecutor exe{place}; Scope scope; exe.CreateVariables(prog, 0, true, &scope); diff --git a/paddle/fluid/framework/ir/mkldnn/elt_act_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/elt_act_mkldnn_fuse_pass.cc index 87b2e64061..618b699372 100644 --- a/paddle/fluid/framework/ir/mkldnn/elt_act_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/elt_act_mkldnn_fuse_pass.cc @@ -16,9 +16,9 @@ #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/mkldnn_reuse.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/phi/backends/onednn/onednn_reuse.h" +#include "paddle/phi/core/enforce.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/fc_act_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/fc_act_mkldnn_fuse_pass.cc index 298e9cf49c..60ab407f00 100644 --- a/paddle/fluid/framework/ir/mkldnn/fc_act_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/fc_act_mkldnn_fuse_pass.cc @@ -15,8 +15,8 @@ #include "paddle/fluid/framework/ir/mkldnn/fc_act_mkldnn_fuse_pass.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/platform/mkldnn_reuse.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/phi/backends/onednn/onednn_reuse.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/fc_elementwise_add_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/fc_elementwise_add_mkldnn_fuse_pass.cc index 9ddf9e161d..ef01acd88c 100644 --- a/paddle/fluid/framework/ir/mkldnn/fc_elementwise_add_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/fc_elementwise_add_mkldnn_fuse_pass.cc @@ -16,7 +16,7 @@ #include "paddle/fluid/framework/ir/graph_traits.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/fc_mkldnn_pass.cc b/paddle/fluid/framework/ir/mkldnn/fc_mkldnn_pass.cc index ceb73b0911..f4396d6d81 100644 --- a/paddle/fluid/framework/ir/mkldnn/fc_mkldnn_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/fc_mkldnn_pass.cc @@ -14,8 +14,8 @@ #include "paddle/fluid/framework/ir/mkldnn/fc_mkldnn_pass.h" -#include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/phi/core/enforce.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/int8_scale_calculation_mkldnn_pass.cc b/paddle/fluid/framework/ir/mkldnn/int8_scale_calculation_mkldnn_pass.cc index a5481f5c6f..c06f6b5ec9 100644 --- a/paddle/fluid/framework/ir/mkldnn/int8_scale_calculation_mkldnn_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/int8_scale_calculation_mkldnn_pass.cc @@ -15,8 +15,8 @@ #include "paddle/fluid/framework/ir/mkldnn/int8_scale_calculation_mkldnn_pass.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/phi/core/enforce.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/interpolate_mkldnn_pass.cc b/paddle/fluid/framework/ir/mkldnn/interpolate_mkldnn_pass.cc index 17a9da8410..04a6f8d6b7 100644 --- a/paddle/fluid/framework/ir/mkldnn/interpolate_mkldnn_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/interpolate_mkldnn_pass.cc @@ -17,7 +17,7 @@ #include #include -#include "paddle/fluid/platform/enforce.h" +#include "paddle/phi/core/enforce.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/layer_norm_onednn_optimization_pass.cc b/paddle/fluid/framework/ir/mkldnn/layer_norm_onednn_optimization_pass.cc index e5c1a43d92..1fed263394 100644 --- a/paddle/fluid/framework/ir/mkldnn/layer_norm_onednn_optimization_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/layer_norm_onednn_optimization_pass.cc @@ -15,8 +15,8 @@ #include "paddle/fluid/framework/ir/mkldnn/layer_norm_onednn_optimization_pass.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/platform/mkldnn_reuse.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/phi/backends/onednn/onednn_reuse.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { @@ -77,7 +77,7 @@ void LayerNormOneDNNOptimizationPass::ApplyImpl(Graph *graph) const { scale_shift_tensor->Resize(phi::make_ddim({channels * 2})); - memcpy(scale_shift_tensor->mutable_data(platform::CPUPlace()), + memcpy(scale_shift_tensor->mutable_data(phi::CPUPlace()), ln_scale_tensor->data(), channels * sizeof(float)); diff --git a/paddle/fluid/framework/ir/mkldnn/matmul_activation_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/matmul_activation_mkldnn_fuse_pass.cc index 7a40a145bd..07a608c5a2 100644 --- a/paddle/fluid/framework/ir/mkldnn/matmul_activation_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/matmul_activation_mkldnn_fuse_pass.cc @@ -15,8 +15,8 @@ #include "paddle/fluid/framework/ir/mkldnn/matmul_activation_mkldnn_fuse_pass.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/platform/mkldnn_reuse.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/phi/backends/onednn/onednn_reuse.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/matmul_elementwise_add_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/matmul_elementwise_add_mkldnn_fuse_pass.cc index 85e49c68ff..f045377465 100644 --- a/paddle/fluid/framework/ir/mkldnn/matmul_elementwise_add_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/matmul_elementwise_add_mkldnn_fuse_pass.cc @@ -16,7 +16,7 @@ #include "paddle/fluid/framework/ir/graph_traits.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_mkldnn_fuse_pass.cc index a719618376..40dbaa03a0 100644 --- a/paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_mkldnn_fuse_pass.cc @@ -13,9 +13,9 @@ // limitations under the License. #include "paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_mkldnn_fuse_pass.h" -#include #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/platform/enforce.h" +#include "paddle/phi/core/enforce.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/mkldnn_conv_bn_fuse_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/mkldnn_conv_bn_fuse_pass_tester.cc index 7911b125b1..5672ca2eb7 100644 --- a/paddle/fluid/framework/ir/mkldnn/mkldnn_conv_bn_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/mkldnn_conv_bn_fuse_pass_tester.cc @@ -23,7 +23,7 @@ #include "paddle/fluid/framework/ir/pass_tester_helper.h" #include "paddle/fluid/framework/naive_executor.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/platform/place.h" +#include "paddle/phi/common/place.h" #include "paddle/phi/core/kernel_registry.h" PD_DECLARE_KERNEL(conv2d_transpose, CPU, ALL_LAYOUT); @@ -195,7 +195,7 @@ class MKLDNNConvBatchNormPassTest { void FillTensorWithRandomData(phi::DenseTensor* tnsr, float lowb, float upb, - platform::CPUPlace place) { + phi::CPUPlace place) { float* ptr = tnsr->mutable_data(place); // Initialize input data std::uniform_real_distribution dist(static_cast(lowb), @@ -219,7 +219,7 @@ class MKLDNNConvBatchNormPassTest { std::unique_ptr graph(new ir::Graph(base_prog)); Scope scope; - auto place = paddle::platform::CPUPlace(); + auto place = phi::CPUPlace(); NaiveExecutor exe{place}; auto pass = PassRegistry::Instance().Get( diff --git a/paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h b/paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h index 6899a7202d..b091236ddd 100644 --- a/paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h +++ b/paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h @@ -140,7 +140,7 @@ static void GetInfoFromTheFirstOp(ir::Graph* graph, op_desc->GetAttr(vector_name)); phi::DenseTensor tensor; const int size = static_cast(scales_vector.size()); - auto data = tensor.mutable_data({size}, platform::CPUPlace()); + auto data = tensor.mutable_data({size}, phi::CPUPlace()); std::copy(scales_vector.begin(), scales_vector.end(), data); auto pair = std::make_pair(is_unsigned, tensor); info_map->insert(std::make_pair(var_name, pair)); diff --git a/paddle/fluid/framework/ir/mkldnn/multi_gru_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/multi_gru_fuse_pass.cc index eee1840675..e83cc1fa6a 100644 --- a/paddle/fluid/framework/ir/mkldnn/multi_gru_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/multi_gru_fuse_pass.cc @@ -18,8 +18,8 @@ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" -#include "paddle/fluid/platform/errors.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/phi/core/errors.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { @@ -44,12 +44,11 @@ std::vector JoinInputs(Node* op1, void MultiGRUFusePass::ApplyImpl(ir::Graph* graph) const { VLOG(3) << "Fusing two concatenated multi_gru ops."; PADDLE_ENFORCE_NOT_NULL(graph, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Pointer to graph argument cannot be NULL.")); FusePassBase::Init(name_scope_, graph); PADDLE_ENFORCE_NOT_NULL( - param_scope(), - platform::errors::InvalidArgument("Scope cannot be nullptr.")); + param_scope(), phi::errors::InvalidArgument("Scope cannot be nullptr.")); GraphPatternDetector gpd; patterns::TwoFusionGruConcat pattern{gpd.mutable_pattern(), name_scope_}; diff --git a/paddle/fluid/framework/ir/mkldnn/multi_gru_seq_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/multi_gru_seq_fuse_pass.cc index d143f08791..35813bc22d 100644 --- a/paddle/fluid/framework/ir/mkldnn/multi_gru_seq_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/multi_gru_seq_fuse_pass.cc @@ -21,9 +21,9 @@ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" -#include "paddle/fluid/platform/errors.h" #include "paddle/fluid/platform/mkldnn_helper.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/phi/core/errors.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { @@ -48,12 +48,11 @@ std::vector JoinInputs(Node* op1, void MultiGruSeqFusePass::ApplyImpl(ir::Graph* graph) const { VLOG(3) << "Fusing two consecutive multi_gru ops."; PADDLE_ENFORCE_NOT_NULL(graph, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Pointer to graph argument cannot be NULL.")); FusePassBase::Init(name_scope_, graph); PADDLE_ENFORCE_NOT_NULL( - param_scope(), - platform::errors::InvalidArgument("Scope cannot be nullptr.")); + param_scope(), phi::errors::InvalidArgument("Scope cannot be nullptr.")); GraphPatternDetector gpd; patterns::MultiGruSeq pattern{gpd.mutable_pattern(), name_scope_}; diff --git a/paddle/fluid/framework/ir/mkldnn/operator_reshape2_onednn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/operator_reshape2_onednn_fuse_pass.cc index 0f8d0452aa..66e75193bb 100644 --- a/paddle/fluid/framework/ir/mkldnn/operator_reshape2_onednn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/operator_reshape2_onednn_fuse_pass.cc @@ -15,8 +15,8 @@ #include "paddle/fluid/framework/ir/mkldnn/operator_reshape2_onednn_fuse_pass.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/platform/mkldnn_reuse.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/phi/backends/onednn/onednn_reuse.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/operator_scale_onednn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/operator_scale_onednn_fuse_pass.cc index 31b9229bfc..cb06f6eb12 100644 --- a/paddle/fluid/framework/ir/mkldnn/operator_scale_onednn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/operator_scale_onednn_fuse_pass.cc @@ -15,8 +15,8 @@ #include "paddle/fluid/framework/ir/mkldnn/operator_scale_onednn_fuse_pass.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/platform/mkldnn_reuse.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/phi/backends/onednn/onednn_reuse.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/operator_unsqueeze2_onednn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/operator_unsqueeze2_onednn_fuse_pass.cc index 80f49613c6..716d148863 100644 --- a/paddle/fluid/framework/ir/mkldnn/operator_unsqueeze2_onednn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/operator_unsqueeze2_onednn_fuse_pass.cc @@ -15,8 +15,8 @@ #include "paddle/fluid/framework/ir/mkldnn/operator_unsqueeze2_onednn_fuse_pass.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/platform/mkldnn_reuse.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/phi/backends/onednn/onednn_reuse.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass.cc b/paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass.cc index 5ad1e95cd7..c03caa7ee8 100644 --- a/paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass.cc @@ -16,7 +16,7 @@ #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/platform/mkldnn_helper.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass_tester.cc index f6eee81cec..52f3843566 100755 --- a/paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass_tester.cc @@ -16,7 +16,7 @@ #include "paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass.h" // NOLINT #include "paddle/fluid/imperative/type_defs.h" -#include "paddle/fluid/platform/place.h" +#include "paddle/phi/common/place.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.cc b/paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.cc index 433b9aba2e..932f5fb670 100755 --- a/paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.cc @@ -430,8 +430,8 @@ void QuantDequantMkldnnPass::TransposeWeight(phi::DenseTensor* input) const { phi::DenseTensor trans_tensor; trans_tensor.Resize(out_dims); - float* trans_data = trans_tensor.mutable_data(platform::CPUPlace()); - float* in_data = input->mutable_data(platform::CPUPlace()); + float* trans_data = trans_tensor.mutable_data(phi::CPUPlace()); + float* in_data = input->mutable_data(phi::CPUPlace()); for (int64_t out_idx = 0; out_idx < count; ++out_idx) { int64_t in_idx = 0; @@ -493,8 +493,7 @@ void QuantDequantMkldnnPass::ConvertFromINT8ToFP32( weight_tensor->clear(); // clear int weight weight_tensor->Resize(phi::make_ddim(phi::vectorize(weight_dims))); - auto* new_weight_data = - weight_tensor->mutable_data(platform::CPUPlace()); + auto* new_weight_data = weight_tensor->mutable_data(phi::CPUPlace()); memcpy(new_weight_data, weight_data.data(), weight_tensor->numel() * sizeof(float)); @@ -536,8 +535,7 @@ void QuantDequantMkldnnPass::ConvertFromINT8ToFP32( } weight_tensor->clear(); // clear int weight weight_tensor->Resize(phi::make_ddim(phi::vectorize(weight_dims))); - auto* new_weight_data = - weight_tensor->mutable_data(platform::CPUPlace()); + auto* new_weight_data = weight_tensor->mutable_data(phi::CPUPlace()); memcpy(new_weight_data, weight_data.data(), weight_tensor->numel() * sizeof(float)); @@ -582,8 +580,7 @@ void QuantDequantMkldnnPass::DequantizeOpWeights( weight_var_name, op_desc->Type())); auto* weight_tensor = var->GetMutable(); - float* fp32_weight_data = - weight_tensor->mutable_data(platform::CPUPlace()); + float* fp32_weight_data = weight_tensor->mutable_data(phi::CPUPlace()); ConvertFromINT8ToFP32( scales, weight_tensor, nullptr, fp32_weight_data, weight_var_name); } @@ -628,7 +625,7 @@ void QuantDequantMkldnnPass::DequantizeOpWeightsFromONNXFormat( op_desc->Type())); auto* weight_tensor = var->GetMutable(); int8_t* int8_weight_data = - weight_tensor->mutable_data(platform::CPUPlace()); + weight_tensor->mutable_data(phi::CPUPlace()); ConvertFromINT8ToFP32( scales, weight_tensor, int8_weight_data, nullptr, weight_var_name); diff --git a/paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.cc index bb6ceb6064..25a79509b5 100644 --- a/paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.cc @@ -14,8 +14,8 @@ #include "paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/phi/core/enforce.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass_tester.cc index 111a28403f..2dd13573d9 100644 --- a/paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass_tester.cc @@ -26,7 +26,7 @@ void AddVarToScope(Scope* param_scope, const DDim& dims) { auto* tensor = param_scope->Var(name)->GetMutable(); tensor->Resize(dims); - tensor->mutable_data(platform::CPUPlace()); + tensor->mutable_data(phi::CPUPlace()); } Scope* CreateParamScope() { diff --git a/paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.cc index bcc44a53fe..9f50aefc46 100644 --- a/paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.cc @@ -20,7 +20,7 @@ limitations under the License. */ #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/platform/mkldnn_helper.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/shuffle_channel_mkldnn_detect_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/shuffle_channel_mkldnn_detect_pass_tester.cc index 11c5b42af5..4c6fc3774e 100644 --- a/paddle/fluid/framework/ir/mkldnn/shuffle_channel_mkldnn_detect_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/shuffle_channel_mkldnn_detect_pass_tester.cc @@ -28,7 +28,7 @@ void AddVarToScope(Scope* param_scope, const DDim& dims) { auto* tensor = param_scope->Var(name)->GetMutable(); tensor->Resize(dims); - tensor->mutable_data(platform::CPUPlace()); + tensor->mutable_data(phi::CPUPlace()); } Scope* CreateParamScope() { diff --git a/paddle/fluid/framework/ir/mkldnn/softplus_activation_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/softplus_activation_mkldnn_fuse_pass.cc index 38c86d225f..a4e74bb376 100644 --- a/paddle/fluid/framework/ir/mkldnn/softplus_activation_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/softplus_activation_mkldnn_fuse_pass.cc @@ -16,9 +16,9 @@ #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/mkldnn_reuse.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/phi/backends/onednn/onednn_reuse.h" +#include "paddle/phi/core/enforce.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/squeeze2_transpose2_onednn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/squeeze2_transpose2_onednn_fuse_pass.cc index 9a3b420073..b71873a9dc 100644 --- a/paddle/fluid/framework/ir/mkldnn/squeeze2_transpose2_onednn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/squeeze2_transpose2_onednn_fuse_pass.cc @@ -13,8 +13,8 @@ // limitations under the License. #include "paddle/fluid/framework/ir/mkldnn/squeeze2_transpose2_onednn_fuse_pass.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/platform/mkldnn_reuse.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/phi/backends/onednn/onednn_reuse.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/inference/api/mkldnn_quantizer.cc b/paddle/fluid/inference/api/mkldnn_quantizer.cc index 53ed8c8134..5fec6e74d3 100644 --- a/paddle/fluid/inference/api/mkldnn_quantizer.cc +++ b/paddle/fluid/inference/api/mkldnn_quantizer.cc @@ -30,14 +30,14 @@ #include "paddle/fluid/inference/analysis/analyzer.h" #include "paddle/fluid/inference/api/analysis_predictor.h" #include "paddle/fluid/platform/mkldnn_helper.h" -#include "paddle/fluid/platform/place.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/phi/common/place.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { using framework::Variable; using framework::ir::Graph; -using platform::CPUPlace; +using phi::CPUPlace; using ConstEigenVectorArrayMap = Eigen::Map>; using EigenMatrixDoubleArray = diff --git a/paddle/fluid/inference/api/mkldnn_quantizer_tester.cc b/paddle/fluid/inference/api/mkldnn_quantizer_tester.cc index 79873a63af..5e699a8b4c 100644 --- a/paddle/fluid/inference/api/mkldnn_quantizer_tester.cc +++ b/paddle/fluid/inference/api/mkldnn_quantizer_tester.cc @@ -111,7 +111,7 @@ TEST_F(MkldnnQuantizerTest, histogram_inverted_min_max) { var_tensor.Resize(phi::make_dim(values.size())); std::copy(begin(values), end(values), - var_tensor.mutable_data(platform::CPUPlace())); + var_tensor.mutable_data(phi::CPUPlace())); ASSERT_THROW(Histogram(var_tensor, max_val, min_val, 3), platform::EnforceNotMet); @@ -127,7 +127,7 @@ TEST_F(MkldnnQuantizerTest, histogram_non_negative_to_3) { var_tensor.Resize(phi::make_dim(values.size())); std::copy(begin(values), end(values), - var_tensor.mutable_data(platform::CPUPlace())); + var_tensor.mutable_data(phi::CPUPlace())); std::vector histogram; float bin_width; @@ -151,7 +151,7 @@ TEST_F(MkldnnQuantizerTest, histogram_positive_and_negative_to_3) { var_tensor.Resize(phi::make_dim(values.size())); std::copy(begin(values), end(values), - var_tensor.mutable_data(platform::CPUPlace())); + var_tensor.mutable_data(phi::CPUPlace())); std::vector histogram; float bin_width; @@ -175,7 +175,7 @@ TEST_F(MkldnnQuantizerTest, histogram_zero_bins) { var_tensor.Resize(phi::make_dim(values.size())); std::copy(begin(values), end(values), - var_tensor.mutable_data(platform::CPUPlace())); + var_tensor.mutable_data(phi::CPUPlace())); ASSERT_THROW(Histogram(var_tensor, min_val, max_val, 0), platform::EnforceNotMet); @@ -188,7 +188,7 @@ TEST_F(MkldnnQuantizerTest, histogram_empty) { // zero tensor phi::DenseTensor var_tensor; var_tensor.Resize({0}); - var_tensor.mutable_data(platform::CPUPlace()); + var_tensor.mutable_data(phi::CPUPlace()); ASSERT_THROW(Histogram(var_tensor, -1, 1, 1), platform::EnforceNotMet); } @@ -200,7 +200,7 @@ TEST_F(MkldnnQuantizerTest, kl_scaling_factor_signed) { var_tensor.Resize(phi::make_dim(values.size())); std::copy(begin(values), end(values), - var_tensor.mutable_data(platform::CPUPlace())); + var_tensor.mutable_data(phi::CPUPlace())); bool is_unsigned; phi::DenseTensor lod_tensor; @@ -220,7 +220,7 @@ TEST_F(MkldnnQuantizerTest, max_scaling_factor_signed) { var_tensor.Resize(phi::make_dim(values.size())); std::copy(begin(values), end(values), - var_tensor.mutable_data(platform::CPUPlace())); + var_tensor.mutable_data(phi::CPUPlace())); bool is_unsigned; phi::DenseTensor lod_tensor; @@ -240,7 +240,7 @@ TEST_F(MkldnnQuantizerTest, max_scaling_factor_unsigned) { var_tensor.Resize(phi::make_dim(values.size())); std::copy(begin(values), end(values), - var_tensor.mutable_data(platform::CPUPlace())); + var_tensor.mutable_data(phi::CPUPlace())); bool is_unsigned; phi::DenseTensor lod_tensor; @@ -260,10 +260,10 @@ TEST_F(MkldnnQuantizerTest, max_scaling_factor_chwise_unsigned) { phi::DenseTensor var_tensor; var_tensor.Resize(phi::make_dim(channels, 1, 1, values.size())); for (int i = 0; i < channels; i++) - std::copy(begin(values), - end(values), - var_tensor.mutable_data(platform::CPUPlace()) + - i * values.size()); + std::copy( + begin(values), + end(values), + var_tensor.mutable_data(phi::CPUPlace()) + i * values.size()); bool is_unsigned; phi::DenseTensor lod_tensor; @@ -284,7 +284,7 @@ TEST_F(MkldnnQuantizerTest, kl_scaling_factor_unsigned) { var_tensor.Resize(phi::make_dim(values.size())); std::copy(begin(values), end(values), - var_tensor.mutable_data(platform::CPUPlace())); + var_tensor.mutable_data(phi::CPUPlace())); bool is_unsigned; phi::DenseTensor lod_tensor; @@ -312,14 +312,14 @@ TEST_F(MkldnnQuantizerTest, max_ch_gru_scaling_factor) { std::copy( begin(wx[i]), end(wx[i]), - wx_tensor.mutable_data(platform::CPUPlace()) + i * wx[0].size()); + wx_tensor.mutable_data(phi::CPUPlace()) + i * wx[0].size()); wh_tensor.Resize(phi::make_dim(wh.size(), wh[0].size())); for (size_t i = 0; i < wh.size(); i++) std::copy( begin(wh[i]), end(wh[i]), - wh_tensor.mutable_data(platform::CPUPlace()) + i * wh[0].size()); + wh_tensor.mutable_data(phi::CPUPlace()) + i * wh[0].size()); bool is_unsigned; std::tie(is_unsigned, lod_tensor) = @@ -342,14 +342,14 @@ TEST_F(MkldnnQuantizerTest, max_ch_lstm_scaling_factor) { std::copy( begin(wx[i]), end(wx[i]), - wx_tensor.mutable_data(platform::CPUPlace()) + i * wx[0].size()); + wx_tensor.mutable_data(phi::CPUPlace()) + i * wx[0].size()); wh_tensor.Resize(phi::make_dim(wh.size(), wh[0].size())); for (size_t i = 0; i < wh.size(); i++) std::copy( begin(wh[i]), end(wh[i]), - wh_tensor.mutable_data(platform::CPUPlace()) + i * wh[0].size()); + wh_tensor.mutable_data(phi::CPUPlace()) + i * wh[0].size()); bool is_unsigned; std::tie(is_unsigned, lod_tensor) = diff --git a/paddle/fluid/inference/tests/api/analyzer_detect_functional_mkldnn_tester.cc b/paddle/fluid/inference/tests/api/analyzer_detect_functional_mkldnn_tester.cc index f73798faa4..277f68d09b 100644 --- a/paddle/fluid/inference/tests/api/analyzer_detect_functional_mkldnn_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_detect_functional_mkldnn_tester.cc @@ -18,8 +18,7 @@ limitations under the License. */ #include #include "paddle/fluid/inference/tests/api/tester_helper.h" -#include "paddle/fluid/platform/device_context.h" -#include "paddle/fluid/platform/place.h" +#include "paddle/phi/common/place.h" DEFINE_string(infer_shape, "", "data shape file"); DEFINE_int32(sample, 20, "number of sample"); @@ -78,7 +77,7 @@ void SetInput(std::vector> *inputs, #ifdef PADDLE_WITH_MKLDNN int GetNumCachedObjects(void) { auto &pool = platform::DeviceContextPool::Instance(); - platform::CPUPlace place; + phi::CPUPlace place; auto onednn_dev_ctx = dynamic_cast(pool.Get(place)); return onednn_dev_ctx->GetCachedObjectsNumber(); } diff --git a/paddle/fluid/operators/fused/mkldnn/fusion_gru_mkldnn_op.cc b/paddle/fluid/operators/fused/mkldnn/fusion_gru_mkldnn_op.cc index 00f0f94175..806c883228 100644 --- a/paddle/fluid/operators/fused/mkldnn/fusion_gru_mkldnn_op.cc +++ b/paddle/fluid/operators/fused/mkldnn/fusion_gru_mkldnn_op.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/operators/fused/fusion_gru_op.h" #include "paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h" +#include "paddle/phi/backends/onednn/onednn_reuse.h" #include "paddle/phi/core/expect.h" namespace paddle { @@ -24,13 +25,14 @@ using phi::OneDNNContext; using phi::funcs::OneDNNGetDataType; using phi::funcs::OneDNNMemDesc; using phi::funcs::RNNReorderType; +using OneDNNMemoryFormat = dnnl::memory::format_tag; template class GRUMKLDNNHandler : public RNNMKLDNNHandler { public: GRUMKLDNNHandler(const paddle::framework::ExecutionContext& ctx, const OneDNNContext& dev_ctx, - const dnnl::engine mkldnn_engine, + const dnnl::engine onednn_engine, platform::Place cpu_place, const phi::DenseTensor* input, const phi::DenseTensor* weight_h, @@ -44,7 +46,7 @@ class GRUMKLDNNHandler : public RNNMKLDNNHandler { : RNNMKLDNNHandler( ctx, dev_ctx, - mkldnn_engine, + onednn_engine, ctx.GetPlace(), input, weight_h, @@ -256,7 +258,7 @@ class FusionGRUMKLDNNKernel : public framework::OpKernel { template void RunKernel(const framework::ExecutionContext& ctx) const { auto& dev_ctx = ctx.template device_context(); - const auto& mkldnn_engine = dev_ctx.GetEngine(); + const auto& onednn_engine = dev_ctx.GetEngine(); // Get Tensors const auto* input = ctx.Input("X"); @@ -294,7 +296,7 @@ class FusionGRUMKLDNNKernel : public framework::OpKernel { GRUMKLDNNHandler handler( ctx, dev_ctx, - mkldnn_engine, + onednn_engine, ctx.GetPlace(), input, weight_h, @@ -379,7 +381,7 @@ class FusionGRUMKLDNNKernel : public framework::OpKernel { namespace ops = paddle::operators; REGISTER_OP_KERNEL(fusion_gru, MKLDNN, - paddle::platform::CPUPlace, + phi::CPUPlace, ops::FusionGRUMKLDNNKernel, ops::FusionGRUMKLDNNKernel, ops::FusionGRUMKLDNNKernel); diff --git a/paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc b/paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc index 68ab4f55a5..bda5eab2d7 100644 --- a/paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc +++ b/paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc @@ -24,6 +24,7 @@ using phi::OneDNNContext; using phi::funcs::OneDNNGetDataType; using phi::funcs::OneDNNMemDesc; using phi::funcs::RNNReorderType; +using OneDNNMemoryFormat = dnnl::memory::format_tag; template class LSTMMKLDNNHandler @@ -31,7 +32,7 @@ class LSTMMKLDNNHandler public: LSTMMKLDNNHandler(const paddle::framework::ExecutionContext& ctx, const OneDNNContext& dev_ctx, - const dnnl::engine mkldnn_engine, + const dnnl::engine onednn_engine, platform::Place cpu_place, const phi::DenseTensor* input, const phi::DenseTensor* weight_h, @@ -46,7 +47,7 @@ class LSTMMKLDNNHandler : RNNMKLDNNHandler( ctx, dev_ctx, - mkldnn_engine, + onednn_engine, ctx.GetPlace(), input, weight_h, @@ -338,7 +339,7 @@ class FusionLSTMMKLDNNKernel : public framework::OpKernel { template void RunKernel(const framework::ExecutionContext& ctx) const { auto& dev_ctx = ctx.template device_context(); - const auto& mkldnn_engine = dev_ctx.GetEngine(); + const auto& onednn_engine = dev_ctx.GetEngine(); // Get Tensors const auto* input = ctx.Input("X"); @@ -379,7 +380,7 @@ class FusionLSTMMKLDNNKernel : public framework::OpKernel { LSTMMKLDNNHandler handler( ctx, dev_ctx, - mkldnn_engine, + onednn_engine, ctx.GetPlace(), input, weight_h, @@ -474,7 +475,7 @@ class FusionLSTMMKLDNNKernel : public framework::OpKernel { namespace ops = paddle::operators; REGISTER_OP_KERNEL(fusion_lstm, MKLDNN, - paddle::platform::CPUPlace, + phi::CPUPlace, ops::FusionLSTMMKLDNNKernel, ops::FusionLSTMMKLDNNKernel, ops::FusionLSTMMKLDNNKernel); diff --git a/paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h b/paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h index 51b2d9c995..b45af1b4bc 100644 --- a/paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h +++ b/paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h @@ -14,7 +14,8 @@ limitations under the License. */ #pragma once -#include "paddle/fluid/platform/mkldnn_reuse.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/phi/backends/onednn/onednn_reuse.h" namespace paddle { namespace operators { @@ -22,13 +23,14 @@ namespace operators { using phi::funcs::CreateKey; using phi::funcs::OneDNNGetDataType; using phi::funcs::RNNReorderType; +using OneDNNMemoryFormat = dnnl::memory::format_tag; template class RNNMKLDNNHandler : public phi::funcs::OneDNNHandlerT { public: RNNMKLDNNHandler(const paddle::framework::ExecutionContext& ctx, const phi::OneDNNContext& dev_ctx, - const dnnl::engine mkldnn_engine, + const dnnl::engine onednn_engine, platform::Place cpu_place, const phi::DenseTensor* input, const phi::DenseTensor* weight_h, diff --git a/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc b/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc index a0cc218925..3ce6d18bde 100644 --- a/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc +++ b/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc @@ -18,10 +18,10 @@ limitations under the License. */ #include "dnnl.hpp" // NOLINT #include "paddle/fluid/framework/mixed_vector.h" +#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/operators/fused/multi_gru_op.h" -#include "paddle/fluid/platform/errors.h" -#include "paddle/fluid/platform/mkldnn_reuse.h" +#include "paddle/phi/backends/onednn/onednn_reuse.h" namespace paddle { namespace operators { @@ -31,6 +31,7 @@ using phi::funcs::OneDNNGetDataType; using phi::funcs::OneDNNMemDesc; using Direction = dnnl::rnn_direction; using phi::OneDNNContext; +using OneDNNMemoryFormat = dnnl::memory::format_tag; namespace { @@ -721,6 +722,6 @@ class MultiGRUMKLDNNKernel : public framework::OpKernel { namespace ops = paddle::operators; REGISTER_OP_KERNEL(multi_gru, MKLDNN, - paddle::platform::CPUPlace, + phi::CPUPlace, ops::MultiGRUMKLDNNKernel, ops::MultiGRUMKLDNNKernel); diff --git a/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc index 146ee52fc6..6ed5aefd66 100644 --- a/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc @@ -14,11 +14,10 @@ limitations under the License. */ #include "paddle/fluid/operators/dequantize_op.h" -#include "paddle/fluid/framework/data_layout_transform.h" #include "paddle/fluid/framework/tensor.h" -#include "paddle/fluid/platform/errors.h" #include "paddle/fluid/platform/mkldnn_helper.h" -#include "paddle/fluid/platform/mkldnn_reuse.h" +#include "paddle/phi/backends/onednn/onednn_reuse.h" +#include "paddle/phi/core/errors.h" namespace paddle { namespace operators { @@ -39,11 +38,11 @@ class DeQuantOpKernel : public framework::OpKernel { auto* out = ctx.Output("Output"); PADDLE_ENFORCE(quantization_scale != 0.0f, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Dequantization scale must be different than 0.0f")); PADDLE_ENFORCE(quantization_shift <= 255 && quantization_shift >= 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Dequantization shift must be lower or equal to ", "255 and greater or equal to 0, but got %f", quantization_shift)); @@ -91,7 +90,7 @@ namespace ops = paddle::operators; REGISTER_OP_KERNEL(dequantize, MKLDNN, - ::paddle::platform::CPUPlace, + ::phi::CPUPlace, ops::DeQuantOpKernel, ops::DeQuantOpKernel, ops::DeQuantOpKernel); diff --git a/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc index fa376cd45e..31b4ca34a2 100644 --- a/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc @@ -14,9 +14,10 @@ limitations under the License. */ #include +#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/fc_op.h" #include "paddle/fluid/platform/mkldnn_helper.h" -#include "paddle/fluid/platform/mkldnn_reuse.h" +#include "paddle/phi/backends/onednn/onednn_reuse.h" namespace paddle { namespace operators { @@ -51,10 +52,10 @@ class FCMKLDNNHandler const phi::DenseTensor* bias, phi::DenseTensor* out, const int in_num_col_dims, - dnnl::engine mkldnn_engine, + dnnl::engine onednn_engine, platform::Place cpu_place) : phi::funcs::OneDNNHandlerNoCachingT( - mkldnn_engine, cpu_place), + onednn_engine, cpu_place), dev_ctx_(dev_ctx) { this->memory_key_ = ctx.InputName("W"); @@ -122,7 +123,7 @@ class FCMKLDNNHandler post_operations.append_eltwise( activation_scale, dnnl::algorithm::eltwise_relu, 0.0f, 0.0f); } - platform::AppendActivation(ctx, post_operations, activation_scale); + AppendActivation(ctx, post_operations, activation_scale); if (ctx.HasAttr("fused_output_scale")) { float scale_alpha = ctx.Attr("fused_output_scale"); @@ -154,6 +155,59 @@ class FCMKLDNNHandler } } + void AppendActivation(const ExecutionContext& ctx, + dnnl::post_ops& post_ops, // NOLINT + float activation_scale = 1.0f) { + const auto invalid_attribute = + ctx.HasAttr("fuse_activation") + ? ctx.Attr("fuse_activation").empty() + : true; + if (invalid_attribute) return; + + const auto fuse_activation = ctx.Attr("fuse_activation"); + const auto fuse_alpha = + ctx.HasAttr("fuse_alpha") ? ctx.Attr("fuse_alpha") : 0.0f; + const auto fuse_beta = + ctx.HasAttr("fuse_beta") ? ctx.Attr("fuse_beta") : 0.0f; + + if (fuse_activation == "hard_sigmoid") { + post_ops.append_eltwise(activation_scale, + dnnl::algorithm::eltwise_linear, + fuse_alpha, + fuse_beta); + post_ops.append_eltwise( + activation_scale, dnnl::algorithm::eltwise_clip, 0.0f, 1.0f); + } else { + const std::unordered_map activation_map = { + {"abs", dnnl::algorithm::eltwise_abs}, + {"clip", dnnl::algorithm::eltwise_clip}, + {"gelu", dnnl::algorithm::eltwise_gelu_erf}, + {"gelu_erf", dnnl::algorithm::eltwise_gelu_erf}, + {"gelu_tanh", dnnl::algorithm::eltwise_gelu_tanh}, + {"hard_swish", dnnl::algorithm::eltwise_hardswish}, + {"leaky_relu", dnnl::algorithm::eltwise_relu}, + {"mish", dnnl::algorithm::eltwise_mish}, + {"relu", dnnl::algorithm::eltwise_relu}, + {"relu6", dnnl::algorithm::eltwise_bounded_relu}, + {"sigmoid", dnnl::algorithm::eltwise_logistic}, + {"sqrt", dnnl::algorithm::eltwise_sqrt}, + {"swish", dnnl::algorithm::eltwise_swish}, + {"tanh", dnnl::algorithm::eltwise_tanh}}; + + const auto& activation_type = activation_map.find(fuse_activation); + + PADDLE_ENFORCE_NE( + activation_type, + activation_map.end(), + platform::errors::InvalidArgument( + "Activation '%s' not found in oneDNN algorithms mapper", + fuse_activation)); + + post_ops.append_eltwise( + activation_scale, activation_type->second, fuse_alpha, fuse_beta); + } + } + // Correct output scale, to take into account scaling of input and weights // Since the data that comes out of input and weight multiplication is // scaled with its own scales, this data needs to be divided by @@ -396,10 +450,76 @@ class FCMKLDNNKernel : public framework::OpKernel { } } + void SetOutMemDescWithUnsqueeze2FuseSupport( + const framework::ExecutionContext& ctx, + phi::DenseTensor* out, + const dnnl::memory::desc& out_md) const { + const std::vector& fused_unsqueeze2_axes = + ctx.Attr>("fused_unsqueeze2_axes"); + const std::vector& op_tz = out_md.dims(); + std::vector unsqueezed_op_tz( + op_tz.size() + fused_unsqueeze2_axes.size(), 0); + + for (const auto& axis : fused_unsqueeze2_axes) { + int positive_axis = axis < 0 ? unsqueezed_op_tz.size() + axis : axis; + unsqueezed_op_tz[positive_axis] = 1; + } + + int j = 0; + for (size_t i = 0; i < unsqueezed_op_tz.size(); ++i) { + if (unsqueezed_op_tz[i] == 0) { + unsqueezed_op_tz[i] = op_tz[j++]; + } + } + out->set_mem_desc(out_md.reshape(unsqueezed_op_tz)); + out->Resize(phi::make_ddim(unsqueezed_op_tz)); + } + + void SetOutMemDescWithReshape2FuseSupport( + const framework::ExecutionContext& ctx, + phi::DenseTensor* out, + const dnnl::memory::desc& out_md) const { + std::vector fused_reshape2_shape( + ctx.Attr>("fused_reshape2_shape").begin(), + ctx.Attr>("fused_reshape2_shape").end()); + + const int out_shape_numel = out->numel(); + const int new_shape_numel = std::accumulate(fused_reshape2_shape.begin(), + fused_reshape2_shape.end(), + 1, + std::multiplies()); + + for (size_t i = 0; i < fused_reshape2_shape.size(); ++i) { + if (fused_reshape2_shape[i] == -1) { + fused_reshape2_shape[i] = -out_shape_numel / new_shape_numel; + break; + } + } + + out->set_mem_desc(out_md.reshape(fused_reshape2_shape)); + out->Resize(phi::make_ddim(fused_reshape2_shape)); + } + + void SetOutMemDescWithLogicalLayoutFusesSupport( + const framework::ExecutionContext& ctx, + phi::DenseTensor* out, + const dnnl::memory::desc& out_md) const { + if (ctx.HasAttr("fused_unsqueeze2_axes")) { + SetOutMemDescWithUnsqueeze2FuseSupport(ctx, out, out_md); + } else if (ctx.HasAttr("fused_reshape2_shape")) { + SetOutMemDescWithReshape2FuseSupport(ctx, out, out_md); + } else if (ctx.HasAttr("fused_squeeze2_axes")) { + out->set_mem_desc(out_md); + out->Resize(phi::make_ddim(out_md.dims())); + } else { + out->set_mem_desc(out_md); + } + } + template void RunKernel(const framework::ExecutionContext& ctx) const { const auto& dev_ctx = ctx.template device_context(); - const auto& mkldnn_engine = dev_ctx.GetEngine(); + const auto& onednn_engine = dev_ctx.GetEngine(); const auto* x = ctx.Input("Input"); const auto* weights = ctx.Input("W"); @@ -433,7 +553,7 @@ class FCMKLDNNKernel : public framework::OpKernel { inner_product_cache->inner_product_p); src_memory_p = std::make_shared(inner_product_cache->src_mem); - PrepareSrcMem(fc_p, src_memory_p, x, mkldnn_engine); + PrepareSrcMem(fc_p, src_memory_p, x, onednn_engine); weights_memory_p = std::make_shared(inner_product_cache->weights_mem); @@ -463,7 +583,7 @@ class FCMKLDNNKernel : public framework::OpKernel { bias, out, in_col_dims, - mkldnn_engine, + onednn_engine, ctx.GetPlace()); src_memory_p = handler.AcquireSrcMemoryWithReorder(x); @@ -504,7 +624,7 @@ class FCMKLDNNKernel : public framework::OpKernel { dev_ctx.SetBlob(cache_key, ip_cache); } - platform::SetOutMemDescWithLogicalLayoutFusesSupport( + SetOutMemDescWithLogicalLayoutFusesSupport( ctx, out, dst_memory_p->get_desc().reshape(phi::vectorize(out->dims()))); @@ -541,7 +661,7 @@ namespace ops = paddle::operators; REGISTER_OP_KERNEL(fc, MKLDNN, - ::paddle::platform::CPUPlace, + ::phi::CPUPlace, ops::FCMKLDNNKernel, ops::FCMKLDNNKernel, ops::FCMKLDNNKernel, diff --git a/paddle/fluid/operators/mkldnn/interpolate_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/interpolate_mkldnn_op.cc index a4f97c8654..908d0b7481 100644 --- a/paddle/fluid/operators/mkldnn/interpolate_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/interpolate_mkldnn_op.cc @@ -12,9 +12,8 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/framework/data_layout_transform.h" #include "paddle/fluid/operators/interpolate_op.h" -#include "paddle/fluid/platform/mkldnn_reuse.h" +#include "paddle/phi/backends/onednn/onednn_reuse.h" namespace paddle { namespace operators { @@ -25,6 +24,7 @@ using dnnl::reorder; using dnnl::resampling_forward; using dnnl::stream; using phi::DataLayout; +using OneDNNMemoryFormat = dnnl::memory::format_tag; template class InterpolateOneDNNHandler @@ -131,7 +131,7 @@ class InterpolateOneDNNKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { const auto& dev_ctx = ctx.template device_context(); - const auto& mkldnn_engine = dev_ctx.GetEngine(); + const auto& onednn_engine = dev_ctx.GetEngine(); const auto* x = ctx.Input("X"); auto* out = ctx.Output("Out"); @@ -146,7 +146,7 @@ class InterpolateOneDNNKernel : public framework::OpKernel { out->Resize(dim_out); InterpolateOneDNNHandler handler( - algo, mkldnn_engine, ctx.GetPlace(), x, out); + algo, onednn_engine, ctx.GetPlace(), x, out); auto src_memory_p = handler.AcquireSrcMemory(x); auto dst_memory_p = handler.AcquireDstMemory(out); @@ -170,11 +170,11 @@ namespace ops = paddle::operators; REGISTER_OP_KERNEL(nearest_interp, MKLDNN, - ::paddle::platform::CPUPlace, + ::phi::CPUPlace, ops::InterpolateOneDNNKernel, ops::InterpolateOneDNNKernel, ops::InterpolateOneDNNKernel); REGISTER_OP_KERNEL(bilinear_interp, MKLDNN, - ::paddle::platform::CPUPlace, + ::phi::CPUPlace, ops::InterpolateOneDNNKernel); diff --git a/paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc index df3804ab76..c41a2aabee 100644 --- a/paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc @@ -13,7 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/platform/mkldnn_reuse.h" + +#include "paddle/phi/backends/onednn/onednn_reuse.h" #include "paddle/phi/common/data_type.h" namespace paddle { @@ -99,7 +100,7 @@ class LayerNormMKLDNNOpKernel : public paddle::framework::OpKernel { const bool is_test = ctx.Attr("is_test"); auto& dev_ctx = ctx.template device_context(); - const auto& mkldnn_engine = dev_ctx.GetEngine(); + const auto& onednn_engine = dev_ctx.GetEngine(); auto src_tz = phi::vectorize(x->dims()); PADDLE_ENFORCE_EQ(begin_norm_axis, @@ -117,7 +118,7 @@ class LayerNormMKLDNNOpKernel : public paddle::framework::OpKernel { } LayerNormOneDNNHandler handler( - src_tz, epsilon, flags, is_test, x, mkldnn_engine, ctx.GetPlace()); + src_tz, epsilon, flags, is_test, x, onednn_engine, ctx.GetPlace()); auto src_memory = handler.AcquireSrcMemory(x); auto dst_memory = handler.AcquireDstMemory(out); @@ -159,6 +160,6 @@ class LayerNormMKLDNNOpKernel : public paddle::framework::OpKernel { namespace ops = paddle::operators; REGISTER_OP_KERNEL(layer_norm, MKLDNN, - ::paddle::platform::CPUPlace, + ::phi::CPUPlace, ops::LayerNormMKLDNNOpKernel, ops::LayerNormMKLDNNOpKernel); diff --git a/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc index e10e868c46..74c13c0727 100644 --- a/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc @@ -12,7 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/platform/mkldnn_reuse.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/phi/backends/onednn/onednn_reuse.h" namespace paddle { namespace operators { @@ -25,13 +26,13 @@ class LRNOneDNNHandler OneDNNHandlerNoCachingT { public: LRNOneDNNHandler(const framework::ExecutionContext& ctx, - const dnnl::engine mkldnn_engine, + const dnnl::engine onednn_engine, platform::Place cpu_place, const phi::DenseTensor* input) : phi::funcs:: OneDNNHandlerNoCachingT( - mkldnn_engine, cpu_place) { + onednn_engine, cpu_place) { const int n = ctx.Attr("n"); // MKL-DNN implements LRN in a caffe way: // http://caffe.berkeleyvision.org/tutorial/layers/lrn.html @@ -56,14 +57,14 @@ class LRNOneDNNHandler } LRNOneDNNHandler(const framework::ExecutionContext& ctx, - const dnnl::engine mkldnn_engine, + const dnnl::engine onednn_engine, platform::Place cpu_place, const phi::DenseTensor* in_x, const phi::DenseTensor* out_grad, phi::DenseTensor* in_x_grad) : phi::funcs:: OneDNNHandlerNoCachingT( - mkldnn_engine, cpu_place) { + onednn_engine, cpu_place) { PADDLE_ENFORCE_EQ( ctx.Attr("is_test"), false, @@ -125,13 +126,13 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel { paddle::platform::errors::PreconditionNotMet( "Operator DNNL LRN must use CPUPlace")); auto& dev_ctx = ctx.template device_context(); - const auto& mkldnn_engine = dev_ctx.GetEngine(); + const auto& onednn_engine = dev_ctx.GetEngine(); auto x = ctx.Input("X"); auto out = ctx.Output("Out"); auto mid = ctx.Output("MidOut"); - LRNOneDNNHandler handler(ctx, mkldnn_engine, ctx.GetPlace(), x); + LRNOneDNNHandler handler(ctx, onednn_engine, ctx.GetPlace(), x); auto src_memory = handler.AcquireSrcMemory(x); auto dst_memory = handler.AcquireDstMemory(out); @@ -179,10 +180,10 @@ class LRNMKLDNNGradOpKernel : public paddle::framework::OpKernel { auto in_x_grad = ctx.Output(framework::GradVarName("X")); auto& dev_ctx = ctx.template device_context(); - const auto& mkldnn_engine = dev_ctx.GetEngine(); + const auto& onednn_engine = dev_ctx.GetEngine(); LRNOneDNNHandler handler( - ctx, mkldnn_engine, ctx.GetPlace(), in_x, out_grad, in_x_grad); + ctx, onednn_engine, ctx.GetPlace(), in_x, out_grad, in_x_grad); auto src_memory = handler.AcquireSrcMemory(in_x); auto workspace = handler.AcquireBackwardWorkspaceMemory(mid); @@ -207,11 +208,8 @@ class LRNMKLDNNGradOpKernel : public paddle::framework::OpKernel { namespace ops = paddle::operators; -REGISTER_OP_KERNEL(lrn, - MKLDNN, - paddle::platform::CPUPlace, - ops::LRNMKLDNNOpKernel); +REGISTER_OP_KERNEL(lrn, MKLDNN, phi::CPUPlace, ops::LRNMKLDNNOpKernel); REGISTER_OP_KERNEL(lrn_grad, MKLDNN, - paddle::platform::CPUPlace, + phi::CPUPlace, ops::LRNMKLDNNGradOpKernel); diff --git a/paddle/fluid/operators/mkldnn/matmul_v2_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/matmul_v2_mkldnn_op.cc index c2556b6bfc..7bf66bae93 100644 --- a/paddle/fluid/operators/mkldnn/matmul_v2_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/matmul_v2_mkldnn_op.cc @@ -14,14 +14,13 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor.h" -#include "paddle/fluid/platform/mkldnn_reuse.h" +#include "paddle/phi/backends/onednn/onednn_reuse.h" #include "paddle/phi/kernels/funcs/blas/blas.h" namespace { using dnnl::memory; using paddle::framework::ExecutionContext; using paddle::framework::GradVarName; -using paddle::platform::MatMulV2MKLDNNHandler; using phi::OneDNNContext; using phi::vectorize; using phi::funcs::OneDNNGetDataType; @@ -82,6 +81,239 @@ phi::DDim GetDimForInput(const ExecutionContext &ctx, std::string input_name) { return input_dims; } +template +class MatMulV2MKLDNNHandler + : public phi::funcs::OneDNNHandlerNoCachingT { + public: + MatMulV2MKLDNNHandler(const ExecutionContext &ctx, + const dnnl::engine engine, + paddle::platform::Place cpu_place, + const std::vector &x_org_dims, + bool trans_x, + const std::vector &y_org_dims, + bool trans_y, + bool is_output_fused, + const std::vector &x_strides_override, + const std::vector &y_strides_override) + : phi::funcs::OneDNNHandlerNoCachingT(engine, + cpu_place) { + // M X K * K X N + std::vector x_dims(x_org_dims); + std::vector y_dims(y_org_dims); + + const int MB_idx = x_dims.size() - 3; + const int H_idx = x_dims.size() - 2; + const int W_idx = x_dims.size() - 1; + + if (trans_x) std::swap(x_dims[H_idx], x_dims[W_idx]); + if (trans_y) std::swap(y_dims[H_idx], y_dims[W_idx]); + + const memory::dim M = x_dims[H_idx]; + const memory::dim K = x_dims[W_idx]; + const memory::dim N = y_dims[W_idx]; + + std::vector x_strides(x_dims.size() - 3, 1); + std::vector y_strides(x_dims.size() - 3, 1); + std::vector out_strides(x_dims.size() - 3, 1); + std::vector out_ddims(x_dims.size() - 3, 1); + + x_strides.reserve(x_dims.size()); + y_strides.reserve(x_dims.size()); + out_strides.reserve(x_dims.size()); + + if (!x_strides_override.empty()) { + x_strides = x_strides_override; + } else { + if (!trans_x) { + x_strides.insert(x_strides.end(), {M * K, K, 1}); + } else { + x_strides.insert(x_strides.end(), {M * K, 1, M}); + } + } + + if (!y_strides_override.empty()) { + y_strides = y_strides_override; + } else { + if (!trans_y) { + y_strides.insert(y_strides.end(), {N * K, N, 1}); + } else { + y_strides.insert(y_strides.end(), {N * K, 1, K}); + } + } + + out_strides.insert(out_strides.end(), {M * N, N, 1}); + out_ddims.insert(out_ddims.end(), + {std::max(x_dims[MB_idx], y_dims[MB_idx]), M, N}); + + for (int i = x_dims.size() - 4; i >= 0; --i) { + out_ddims[i] = std::max(x_dims[i], y_dims[i]); + if (x_strides_override.empty()) { + x_strides[i] = x_dims[i + 1] * x_strides[i + 1]; + } + if (y_strides_override.empty()) { + y_strides[i] = y_dims[i + 1] * y_strides[i + 1]; + } + out_strides[i] = out_ddims[i + 1] * out_strides[i + 1]; + } + + // TODO(jczaja): Why not for int8?? + if (!phi::funcs::is_int8() && is_output_fused) { + out_strides = FakeTransposeStrides(out_ddims); + } + + auto x_md = + memory::desc(x_dims, phi::funcs::OneDNNGetDataType(), x_strides); + auto y_md = + memory::desc(y_dims, phi::funcs::OneDNNGetDataType(), y_strides); + auto out_md = memory::desc( + out_ddims, phi::funcs::OneDNNGetDataType(), out_strides); + + const dnnl::primitive_attr matmul_attrs = CreateMatmulAttrs(ctx); + + this->AcquireForwardPrimitiveDescriptor(matmul_attrs, x_md, y_md, out_md); + } + + void AppendActivation(const ExecutionContext &ctx, + dnnl::post_ops &post_ops, // NOLINT + float activation_scale = 1.0f) { + const auto invalid_attribute = + ctx.HasAttr("fuse_activation") + ? ctx.Attr("fuse_activation").empty() + : true; + if (invalid_attribute) return; + + const auto fuse_activation = ctx.Attr("fuse_activation"); + const auto fuse_alpha = + ctx.HasAttr("fuse_alpha") ? ctx.Attr("fuse_alpha") : 0.0f; + const auto fuse_beta = + ctx.HasAttr("fuse_beta") ? ctx.Attr("fuse_beta") : 0.0f; + + if (fuse_activation == "hard_sigmoid") { + post_ops.append_eltwise(activation_scale, + dnnl::algorithm::eltwise_linear, + fuse_alpha, + fuse_beta); + post_ops.append_eltwise( + activation_scale, dnnl::algorithm::eltwise_clip, 0.0f, 1.0f); + } else { + const std::unordered_map activation_map = { + {"abs", dnnl::algorithm::eltwise_abs}, + {"clip", dnnl::algorithm::eltwise_clip}, + {"gelu", dnnl::algorithm::eltwise_gelu_erf}, + {"gelu_erf", dnnl::algorithm::eltwise_gelu_erf}, + {"gelu_tanh", dnnl::algorithm::eltwise_gelu_tanh}, + {"hard_swish", dnnl::algorithm::eltwise_hardswish}, + {"leaky_relu", dnnl::algorithm::eltwise_relu}, + {"mish", dnnl::algorithm::eltwise_mish}, + {"relu", dnnl::algorithm::eltwise_relu}, + {"relu6", dnnl::algorithm::eltwise_bounded_relu}, + {"sigmoid", dnnl::algorithm::eltwise_logistic}, + {"sqrt", dnnl::algorithm::eltwise_sqrt}, + {"swish", dnnl::algorithm::eltwise_swish}, + {"tanh", dnnl::algorithm::eltwise_tanh}}; + + const auto &activation_type = activation_map.find(fuse_activation); + + PADDLE_ENFORCE_NE( + activation_type, + activation_map.end(), + phi::errors::InvalidArgument( + "Activation '%s' not found in oneDNN algorithms mapper", + fuse_activation)); + + post_ops.append_eltwise( + activation_scale, activation_type->second, fuse_alpha, fuse_beta); + } + } + + float ComputeOutputScale(const ExecutionContext &ctx) { + float alpha = ctx.HasAttr("alpha") ? ctx.Attr("alpha") : 1.0f; + if (ctx.HasAttr("Scale_x") && ctx.HasAttr("Scale_y") && + ctx.HasAttr("Scale_out")) { + float scale_x = ctx.Attr("Scale_x"); + float scale_y = ctx.Attr("Scale_y"); + bool force_fp32_out = ctx.HasAttr("force_fp32_output") + ? ctx.Attr("force_fp32_output") + : false; + float scale_out = force_fp32_out ? 1.f : ctx.Attr("Scale_out"); + alpha *= scale_out / (scale_x * scale_y); + } + return alpha; + } + + dnnl::primitive_attr CreateMatmulAttrs(const ExecutionContext &ctx) { + dnnl::primitive_attr matmul_attrs; + dnnl::post_ops post_operations; + + float scale_out = ComputeOutputScale(ctx); + if (scale_out != 1.0f) { + matmul_attrs.set_output_scales(0, {scale_out}); + } + + if (ctx.HasInput("ResidualData")) { + auto *residual_data = ctx.Input("ResidualData"); + auto residual_data_tz = phi::vectorize(residual_data->dims()); + auto residual_data_md = memory::desc(residual_data_tz, + phi::funcs::OneDNNGetDataType(), + dnnl::memory::format_tag::any); + post_operations.append_binary(dnnl::algorithm::binary_add, + residual_data_md); + if (ctx.HasAttr("Scale_in_eltwise")) { + float sum_scale = scale_out / ctx.Attr("Scale_in_eltwise"); + post_operations.append_sum(sum_scale); + } + } + + AppendActivation(ctx, post_operations); + + if (ctx.HasAttr("fused_output_scale")) { + float scale_alpha = ctx.Attr("fused_output_scale"); + post_operations.append_eltwise( + 1.0, dnnl::algorithm::eltwise_linear, scale_alpha, 0.0f); + } + + matmul_attrs.set_post_ops(post_operations); + return matmul_attrs; + } + + std::vector FakeTransposeStrides( + const std::vector &matmul_out_dims) const { + // fuse matmul_v2 + transpose + reshape guarantees that output is 4D and + // transpose axis are: {0, 2, 1, 3} + std::vector transpose_axis = {0, 2, 1, 3}; + std::vector fake_strides(transpose_axis.size()); + int ndims = static_cast(transpose_axis.size()); + + int total_stride = 1; + + for (int i = ndims - 1; i >= 0; --i) { + fake_strides[transpose_axis[i]] = total_stride; + total_stride *= matmul_out_dims[transpose_axis[i]]; + } + + return fake_strides; + } + + std::shared_ptr AcquireWeightsMemory(const phi::DenseTensor *input) { + const YT *input_data = input->data(); + return this->AcquireMemoryFromPrimitive( + this->fwd_pd_->weights_desc(), + phi::funcs::to_void_cast(input_data)); + } + + std::shared_ptr AcquireDstMemory(phi::DenseTensor *output) { + // We cannot use base AcquireDstMemory as it makes an allocation request + // base on DST memory primitive size. This is fine in general, but in MatMul + // we have primitive that covers only one batch of Data and then shift + // pointer for every new batch. Hence phi::DenseTensor size is bigger that + // dst memory primitive size. So would we request less memory that is there + // and it triggers an assertion. So as there is no 'any' format here we can + // leave default size of phi::DenseTensor as computed in ComputeInferShape + OT *ptr = output->mutable_data(this->place_); + return this->AcquireMemoryFromPrimitive(this->fwd_pd_->dst_desc(), ptr); + } +}; + template class MatMulMKLDNNHandler : public phi::funcs::OneDNNHandlerNoCachingT { @@ -696,7 +928,7 @@ class MatMulGradMKLDNNKernel : public paddle::framework::OpKernel { REGISTER_OP_KERNEL(matmul, MKLDNN, - ::paddle::platform::CPUPlace, + ::phi::CPUPlace, MatMulMKLDNNKernel, MatMulMKLDNNKernel, MatMulMKLDNNKernel, @@ -704,6 +936,6 @@ REGISTER_OP_KERNEL(matmul, REGISTER_OP_KERNEL(matmul_grad, MKLDNN, - ::paddle::platform::CPUPlace, + ::phi::CPUPlace, MatMulGradMKLDNNKernel, MatMulGradMKLDNNKernel); diff --git a/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc index c23f247c9d..d78b8b6e18 100644 --- a/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc @@ -14,10 +14,10 @@ limitations under the License. */ #include "paddle/fluid/operators/quantize_op.h" -#include "paddle/fluid/framework/data_layout_transform.h" +#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/platform/mkldnn_helper.h" -#include "paddle/fluid/platform/mkldnn_reuse.h" +#include "paddle/phi/backends/onednn/onednn_reuse.h" namespace paddle { namespace operators { @@ -106,5 +106,5 @@ namespace ops = paddle::operators; REGISTER_OP_KERNEL(quantize, MKLDNN, - ::paddle::platform::CPUPlace, + ::phi::CPUPlace, ops::QuantOpKernel); diff --git a/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc index a9408ad38e..0d6708988c 100644 --- a/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc @@ -14,7 +14,6 @@ limitations under the License. */ #include // NOLINT #include "dnnl.hpp" // NOLINT -#include "paddle/fluid/framework/data_layout_transform.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/operators/requantize_op.h" #include "paddle/phi/backends/onednn/onednn_helper.h" @@ -115,7 +114,7 @@ namespace ops = paddle::operators; REGISTER_OP_KERNEL(requantize, MKLDNN, - ::paddle::platform::CPUPlace, + ::phi::CPUPlace, ops::ReQuantOpKernel, ops::ReQuantOpKernel, ops::ReQuantOpKernel); diff --git a/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc index 5a540b802e..6a6c4df4a5 100644 --- a/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc @@ -12,9 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/flatten_op.h" #include "paddle/fluid/operators/squeeze_op.h" -#include "paddle/fluid/platform/mkldnn_reuse.h" +#include "paddle/phi/backends/onednn/onednn_reuse.h" namespace { enum class ReshapeKernelOpName { @@ -357,7 +358,7 @@ namespace ops = paddle::operators; REGISTER_OP_KERNEL( squeeze, MKLDNN, - paddle::platform::CPUPlace, + phi::CPUPlace, ops::ReshapeMKLDNNKernel, ops::ReshapeMKLDNNKernel); @@ -365,7 +366,7 @@ REGISTER_OP_KERNEL( REGISTER_OP_KERNEL( squeeze_grad, MKLDNN, - paddle::platform::CPUPlace, + phi::CPUPlace, ops::ReshapeGradMKLDNNKernel, ops::ReshapeGradMKLDNNKernel); @@ -373,7 +374,7 @@ REGISTER_OP_KERNEL( REGISTER_OP_KERNEL( reshape, MKLDNN, - paddle::platform::CPUPlace, + phi::CPUPlace, ops::ReshapeMKLDNNKernel, ops::ReshapeMKLDNNKernel); @@ -381,7 +382,7 @@ REGISTER_OP_KERNEL( REGISTER_OP_KERNEL( reshape_grad, MKLDNN, - paddle::platform::CPUPlace, + phi::CPUPlace, ops::ReshapeGradMKLDNNKernel, ops::ReshapeGradMKLDNNKernel); @@ -389,7 +390,7 @@ REGISTER_OP_KERNEL( REGISTER_OP_KERNEL( reshape2_grad, MKLDNN, - paddle::platform::CPUPlace, + phi::CPUPlace, ops::ReshapeGradMKLDNNKernel, ops::ReshapeGradMKLDNNKernel); @@ -397,7 +398,7 @@ REGISTER_OP_KERNEL( REGISTER_OP_KERNEL( flatten, MKLDNN, - paddle::platform::CPUPlace, + phi::CPUPlace, ops::ReshapeMKLDNNKernel, ops::ReshapeMKLDNNKernel); @@ -405,7 +406,7 @@ REGISTER_OP_KERNEL( REGISTER_OP_KERNEL( flatten_grad, MKLDNN, - paddle::platform::CPUPlace, + phi::CPUPlace, ops::ReshapeGradMKLDNNKernel, ops::ReshapeGradMKLDNNKernel); @@ -413,7 +414,7 @@ REGISTER_OP_KERNEL( REGISTER_OP_KERNEL( flatten2, MKLDNN, - paddle::platform::CPUPlace, + phi::CPUPlace, ops::ReshapeMKLDNNKernel, ops::ReshapeMKLDNNKernel); @@ -421,7 +422,7 @@ REGISTER_OP_KERNEL( REGISTER_OP_KERNEL( flatten2_grad, MKLDNN, - paddle::platform::CPUPlace, + phi::CPUPlace, ops::ReshapeGradMKLDNNKernel, ops::ReshapeGradMKLDNNKernel); diff --git a/paddle/fluid/operators/mkldnn/shuffle_channel_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/shuffle_channel_mkldnn_op.cc index 36ea07dfdd..783ef8bc50 100644 --- a/paddle/fluid/operators/mkldnn/shuffle_channel_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/shuffle_channel_mkldnn_op.cc @@ -12,7 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/platform/mkldnn_reuse.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/phi/backends/onednn/onednn_reuse.h" namespace paddle { namespace operators { @@ -38,7 +39,7 @@ class ShuffleChannelMKLDNNKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { const auto& dev_ctx = ctx.template device_context(); - const auto& mkldnn_engine = dev_ctx.GetEngine(); + const auto& onednn_engine = dev_ctx.GetEngine(); const auto* x = ctx.Input("X"); auto* out = ctx.Output("Out"); @@ -47,7 +48,7 @@ class ShuffleChannelMKLDNNKernel : public framework::OpKernel { const int group = x->dims()[1] / ctx.Attr("group"); ShuffleChannelMKLDNNHandler handler( - x, group, mkldnn_engine, ctx.GetPlace()); + x, group, onednn_engine, ctx.GetPlace()); auto src_memory_p = handler.AcquireSrcMemory(x); auto dst_memory_p = handler.AcquireDstMemory(out); @@ -69,6 +70,6 @@ class ShuffleChannelMKLDNNKernel : public framework::OpKernel { namespace ops = paddle::operators; REGISTER_OP_KERNEL(shuffle_channel, MKLDNN, - paddle::platform::CPUPlace, + phi::CPUPlace, ops::ShuffleChannelMKLDNNKernel, ops::ShuffleChannelMKLDNNKernel); diff --git a/paddle/fluid/operators/mkldnn/test_mkldnn_caching.cc b/paddle/fluid/operators/mkldnn/test_mkldnn_caching.cc index cbf0b918e6..24be9e518d 100644 --- a/paddle/fluid/operators/mkldnn/test_mkldnn_caching.cc +++ b/paddle/fluid/operators/mkldnn/test_mkldnn_caching.cc @@ -22,9 +22,8 @@ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/scope.h" -#include "paddle/fluid/platform/device_context.h" -#include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/place.h" +#include "paddle/phi/common/place.h" +#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_registry.h" USE_OP_ITSELF(elementwise_add); @@ -51,7 +50,7 @@ class CacheTester { CacheTester() { // Clear oneDNN cache auto &pool = platform::DeviceContextPool::Instance(); - platform::CPUPlace place; + phi::CPUPlace place; onednn_dev_ctx_ = dynamic_cast(pool.Get(place)); onednn_dev_ctx_->ResetBlobMap(nullptr); } @@ -140,7 +139,7 @@ void RunOperator(const platform::Place &place, TEST(test_conv2d_reuse_cache, cpu_place) { framework::DDim dims({1, 16, 32, 64}); - platform::CPUPlace p; + phi::CPUPlace p; CacheTester ct; RunOperator(p, "conv2d", dims, "input_signal"); RunOperator(p, "conv2d", dims, "input_signal"); @@ -152,7 +151,7 @@ TEST(test_conv2d_reuse_cache, cpu_place) { TEST(test_conv2d_noreuse_cache, cpu_place) { framework::DDim dims({1, 16, 32, 64}); - platform::CPUPlace p; + phi::CPUPlace p; CacheTester ct; RunOperator(p, "conv2d", dims, "input_signal"); RunOperator(p, "conv2d", dims, "input_signal2"); diff --git a/paddle/fluid/operators/mkldnn/test_mkldnn_op_inplace.cc b/paddle/fluid/operators/mkldnn/test_mkldnn_op_inplace.cc index 2c8ef7f098..4beb314fe6 100644 --- a/paddle/fluid/operators/mkldnn/test_mkldnn_op_inplace.cc +++ b/paddle/fluid/operators/mkldnn/test_mkldnn_op_inplace.cc @@ -22,9 +22,8 @@ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/scope.h" -#include "paddle/fluid/platform/device_context.h" -#include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/place.h" +#include "paddle/phi/common/place.h" +#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_registry.h" USE_OP_ITSELF(elementwise_add); @@ -137,13 +136,13 @@ bool TestMain(const platform::Place &place, TEST(test_softmax_inplace, cpu_place) { framework::DDim dims({32, 64}); - platform::CPUPlace p; + phi::CPUPlace p; ASSERT_TRUE(TestMain(p, "softmax", dims, 1)); } TEST(test_relu_inplace, cpu_place) { framework::DDim dims({1, 12, 20, 20}); - platform::CPUPlace p; + phi::CPUPlace p; ASSERT_TRUE(TestMain(p, "relu", dims, 1)); } diff --git a/paddle/fluid/operators/mkldnn/test_mkldnn_op_nhwc.cc b/paddle/fluid/operators/mkldnn/test_mkldnn_op_nhwc.cc index 6b371e2317..456c683108 100644 --- a/paddle/fluid/operators/mkldnn/test_mkldnn_op_nhwc.cc +++ b/paddle/fluid/operators/mkldnn/test_mkldnn_op_nhwc.cc @@ -22,9 +22,8 @@ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/scope.h" -#include "paddle/fluid/platform/device_context.h" -#include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/place.h" +#include "paddle/phi/common/place.h" +#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_registry.h" USE_OP_ITSELF(pool2d); @@ -53,7 +52,7 @@ struct InputVars { TEST(test_pool2d_transpose_nhwc, cpu_place) { framework::DDim dims({1, 4, 8, 512}); // NHWC shape framework::DDim expected_dims({1, 7, 512, 3}); // NHWC expected shape - platform::CPUPlace p; + phi::CPUPlace p; framework::Scope scope; InputVars input_name = {"x", scope.Var("x")->GetMutable()}; @@ -109,7 +108,7 @@ TEST(test_pool2d_transpose_nhwc, cpu_place) { TEST(test_pool2d_relu_relu_nhwc, cpu_place) { framework::DDim dims({1, 4, 8, 512}); // NHWC shape framework::DDim expected_dims({1, 512, 3, 7}); // NCHW expected shape - platform::CPUPlace p; + phi::CPUPlace p; framework::Scope scope; InputVars input_name = {"x", scope.Var("x")->GetMutable()}; @@ -172,7 +171,7 @@ TEST(test_pool2d_relu_relu_nhwc, cpu_place) { TEST(test_pool2d_shape_nhwc, cpu_place) { framework::DDim dims({1, 4, 8, 512}); // NHWC shape std::vector expected_dims{1, 3, 7, 512}; // NHWC expected shape - platform::CPUPlace p; + phi::CPUPlace p; framework::Scope scope; InputVars input_name = {"x", scope.Var("x")->GetMutable()}; @@ -227,7 +226,7 @@ TEST(test_pool2d_shape_nhwc, cpu_place) { TEST(test_pool2d_crop_nhwc, cpu_place) { framework::DDim dims({1, 4, 8, 512}); // NHWC shape framework::DDim expected_dims({1, 3, 7, 512}); // NCHW expected shape - platform::CPUPlace p; + phi::CPUPlace p; framework::Scope scope; InputVars input_name = {"x", scope.Var("x")->GetMutable()}; diff --git a/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc index aa8c848791..8b947b4679 100644 --- a/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc @@ -12,10 +12,9 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/framework/data_layout_transform.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/malloc.h" -#include "paddle/fluid/platform/mkldnn_reuse.h" +#include "paddle/phi/backends/onednn/onednn_reuse.h" namespace paddle { namespace operators { @@ -166,10 +165,10 @@ namespace ops = paddle::operators; REGISTER_OP_KERNEL(transpose, MKLDNN, - ::paddle::platform::CPUPlace, + ::phi::CPUPlace, ops::TransposeMKLDNNOpKernel); REGISTER_OP_KERNEL(transpose_grad, MKLDNN, - ::paddle::platform::CPUPlace, + ::phi::CPUPlace, ops::TransposeMKLDNNGradOpKernel); diff --git a/paddle/fluid/operators/pool_op.h b/paddle/fluid/operators/pool_op.h index fd2c0ce15b..9bb7572c10 100644 --- a/paddle/fluid/operators/pool_op.h +++ b/paddle/fluid/operators/pool_op.h @@ -12,9 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -// NOTE(Ruibiao): Difficult to remove code from this header file because too -// many files rely on it through "mkldnn_reuse.h" - #pragma once #include "paddle/fluid/framework/op_registry.h" diff --git a/paddle/fluid/platform/mkldnn_helper.h b/paddle/fluid/platform/mkldnn_helper.h index a7f15dc129..86a8a24e84 100644 --- a/paddle/fluid/platform/mkldnn_helper.h +++ b/paddle/fluid/platform/mkldnn_helper.h @@ -23,11 +23,10 @@ limitations under the License. */ #include "dnnl.hpp" // NOLINT #include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/platform/place.h" #include "paddle/phi/backends/onednn/onednn_helper.h" +#include "paddle/phi/common/place.h" namespace paddle { #ifdef PADDLE_WITH_MKLDNN -using OneDNNMemoryFormat = dnnl::memory::format_tag; using phi::OneDNNContext; #endif namespace platform { diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h deleted file mode 100644 index 0142fa2afd..0000000000 --- a/paddle/fluid/platform/mkldnn_reuse.h +++ /dev/null @@ -1,336 +0,0 @@ -/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ -#pragma once - -#include -#include -#include -#include -#include -#include - -#include "paddle/fluid/framework/data_layout_transform.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/operators/pool_op.h" -#include "paddle/fluid/platform/mkldnn_helper.h" -#include "paddle/fluid/platform/place.h" -#include "paddle/phi/backends/onednn/onednn_reuse.h" - -namespace paddle { -namespace platform { - -using memory = dnnl::memory; - -static void AppendActivation(const framework::ExecutionContext& ctx, - dnnl::post_ops& post_ops, // NOLINT - float activation_scale = 1.0f) { - const auto invalid_attribute = - ctx.HasAttr("fuse_activation") - ? ctx.Attr("fuse_activation").empty() - : true; - if (invalid_attribute) return; - - const auto fuse_activation = ctx.Attr("fuse_activation"); - const auto fuse_alpha = - ctx.HasAttr("fuse_alpha") ? ctx.Attr("fuse_alpha") : 0.0f; - const auto fuse_beta = - ctx.HasAttr("fuse_beta") ? ctx.Attr("fuse_beta") : 0.0f; - - if (fuse_activation == "hard_sigmoid") { - post_ops.append_eltwise(activation_scale, - dnnl::algorithm::eltwise_linear, - fuse_alpha, - fuse_beta); - post_ops.append_eltwise( - activation_scale, dnnl::algorithm::eltwise_clip, 0.0f, 1.0f); - } else { - const std::unordered_map activation_map = { - {"abs", dnnl::algorithm::eltwise_abs}, - {"clip", dnnl::algorithm::eltwise_clip}, - {"gelu", dnnl::algorithm::eltwise_gelu_erf}, - {"gelu_erf", dnnl::algorithm::eltwise_gelu_erf}, - {"gelu_tanh", dnnl::algorithm::eltwise_gelu_tanh}, - {"hard_swish", dnnl::algorithm::eltwise_hardswish}, - {"leaky_relu", dnnl::algorithm::eltwise_relu}, - {"mish", dnnl::algorithm::eltwise_mish}, - {"relu", dnnl::algorithm::eltwise_relu}, - {"relu6", dnnl::algorithm::eltwise_bounded_relu}, - {"sigmoid", dnnl::algorithm::eltwise_logistic}, - {"sqrt", dnnl::algorithm::eltwise_sqrt}, - {"swish", dnnl::algorithm::eltwise_swish}, - {"tanh", dnnl::algorithm::eltwise_tanh}}; - - const auto& activation_type = activation_map.find(fuse_activation); - - PADDLE_ENFORCE_NE( - activation_type, - activation_map.end(), - platform::errors::InvalidArgument( - "Activation '%s' not found in oneDNN algorithms mapper", - fuse_activation)); - - post_ops.append_eltwise( - activation_scale, activation_type->second, fuse_alpha, fuse_beta); - } -} - -static void SetOutMemDescWithUnsqueeze2FuseSupport( - const framework::ExecutionContext& ctx, - phi::DenseTensor* out, - const dnnl::memory::desc& out_md) { - const std::vector& fused_unsqueeze2_axes = - ctx.Attr>("fused_unsqueeze2_axes"); - const std::vector& op_tz = out_md.dims(); - std::vector unsqueezed_op_tz( - op_tz.size() + fused_unsqueeze2_axes.size(), 0); - - for (const auto& axis : fused_unsqueeze2_axes) { - int positive_axis = axis < 0 ? unsqueezed_op_tz.size() + axis : axis; - unsqueezed_op_tz[positive_axis] = 1; - } - - int j = 0; - for (size_t i = 0; i < unsqueezed_op_tz.size(); ++i) { - if (unsqueezed_op_tz[i] == 0) { - unsqueezed_op_tz[i] = op_tz[j++]; - } - } - out->set_mem_desc(out_md.reshape(unsqueezed_op_tz)); - out->Resize(phi::make_ddim(unsqueezed_op_tz)); -} - -static void SetOutMemDescWithReshape2FuseSupport( - const framework::ExecutionContext& ctx, - phi::DenseTensor* out, - const dnnl::memory::desc& out_md) { - std::vector fused_reshape2_shape( - ctx.Attr>("fused_reshape2_shape").begin(), - ctx.Attr>("fused_reshape2_shape").end()); - - const int out_shape_numel = out->numel(); - const int new_shape_numel = std::accumulate(fused_reshape2_shape.begin(), - fused_reshape2_shape.end(), - 1, - std::multiplies()); - - for (size_t i = 0; i < fused_reshape2_shape.size(); ++i) { - if (fused_reshape2_shape[i] == -1) { - fused_reshape2_shape[i] = -out_shape_numel / new_shape_numel; - break; - } - } - - out->set_mem_desc(out_md.reshape(fused_reshape2_shape)); - out->Resize(phi::make_ddim(fused_reshape2_shape)); -} - -static void SetOutMemDescWithLogicalLayoutFusesSupport( - const framework::ExecutionContext& ctx, - phi::DenseTensor* out, - const dnnl::memory::desc& out_md) { - if (ctx.HasAttr("fused_unsqueeze2_axes")) { - SetOutMemDescWithUnsqueeze2FuseSupport(ctx, out, out_md); - } else if (ctx.HasAttr("fused_reshape2_shape")) { - SetOutMemDescWithReshape2FuseSupport(ctx, out, out_md); - } else if (ctx.HasAttr("fused_squeeze2_axes")) { - out->set_mem_desc(out_md); - out->Resize(phi::make_ddim(out_md.dims())); - } else { - out->set_mem_desc(out_md); - } -} - -template -class MatMulV2MKLDNNHandler - : public phi::funcs::OneDNNHandlerNoCachingT { - public: - MatMulV2MKLDNNHandler(const framework::ExecutionContext& ctx, - const dnnl::engine engine, - paddle::platform::Place cpu_place, - const std::vector& x_org_dims, - bool trans_x, - const std::vector& y_org_dims, - bool trans_y, - bool is_output_fused, - const std::vector& x_strides_override, - const std::vector& y_strides_override) - : phi::funcs::OneDNNHandlerNoCachingT(engine, - cpu_place) { - // M X K * K X N - std::vector x_dims(x_org_dims); - std::vector y_dims(y_org_dims); - - const int MB_idx = x_dims.size() - 3; - const int H_idx = x_dims.size() - 2; - const int W_idx = x_dims.size() - 1; - - if (trans_x) std::swap(x_dims[H_idx], x_dims[W_idx]); - if (trans_y) std::swap(y_dims[H_idx], y_dims[W_idx]); - - const memory::dim M = x_dims[H_idx]; - const memory::dim K = x_dims[W_idx]; - const memory::dim N = y_dims[W_idx]; - - std::vector x_strides(x_dims.size() - 3, 1); - std::vector y_strides(x_dims.size() - 3, 1); - std::vector out_strides(x_dims.size() - 3, 1); - std::vector out_ddims(x_dims.size() - 3, 1); - - x_strides.reserve(x_dims.size()); - y_strides.reserve(x_dims.size()); - out_strides.reserve(x_dims.size()); - - if (!x_strides_override.empty()) { - x_strides = x_strides_override; - } else { - if (!trans_x) { - x_strides.insert(x_strides.end(), {M * K, K, 1}); - } else { - x_strides.insert(x_strides.end(), {M * K, 1, M}); - } - } - - if (!y_strides_override.empty()) { - y_strides = y_strides_override; - } else { - if (!trans_y) { - y_strides.insert(y_strides.end(), {N * K, N, 1}); - } else { - y_strides.insert(y_strides.end(), {N * K, 1, K}); - } - } - - out_strides.insert(out_strides.end(), {M * N, N, 1}); - out_ddims.insert(out_ddims.end(), - {std::max(x_dims[MB_idx], y_dims[MB_idx]), M, N}); - - for (int i = x_dims.size() - 4; i >= 0; --i) { - out_ddims[i] = std::max(x_dims[i], y_dims[i]); - if (x_strides_override.empty()) { - x_strides[i] = x_dims[i + 1] * x_strides[i + 1]; - } - if (y_strides_override.empty()) { - y_strides[i] = y_dims[i + 1] * y_strides[i + 1]; - } - out_strides[i] = out_ddims[i + 1] * out_strides[i + 1]; - } - - // TODO(jczaja): Why not for int8?? - if (!phi::funcs::is_int8() && is_output_fused) { - out_strides = FakeTransposeStrides(out_ddims); - } - - auto x_md = - memory::desc(x_dims, phi::funcs::OneDNNGetDataType(), x_strides); - auto y_md = - memory::desc(y_dims, phi::funcs::OneDNNGetDataType(), y_strides); - auto out_md = memory::desc( - out_ddims, phi::funcs::OneDNNGetDataType(), out_strides); - - const dnnl::primitive_attr matmul_attrs = CreateMatmulAttrs(ctx); - - this->AcquireForwardPrimitiveDescriptor(matmul_attrs, x_md, y_md, out_md); - } - - float ComputeOutputScale(const framework::ExecutionContext& ctx) { - float alpha = ctx.HasAttr("alpha") ? ctx.Attr("alpha") : 1.0f; - if (ctx.HasAttr("Scale_x") && ctx.HasAttr("Scale_y") && - ctx.HasAttr("Scale_out")) { - float scale_x = ctx.Attr("Scale_x"); - float scale_y = ctx.Attr("Scale_y"); - bool force_fp32_out = ctx.HasAttr("force_fp32_output") - ? ctx.Attr("force_fp32_output") - : false; - float scale_out = force_fp32_out ? 1.f : ctx.Attr("Scale_out"); - alpha *= scale_out / (scale_x * scale_y); - } - return alpha; - } - - dnnl::primitive_attr CreateMatmulAttrs( - const framework::ExecutionContext& ctx) { - dnnl::primitive_attr matmul_attrs; - dnnl::post_ops post_operations; - - float scale_out = ComputeOutputScale(ctx); - if (scale_out != 1.0f) { - matmul_attrs.set_output_scales(0, {scale_out}); - } - - if (ctx.HasInput("ResidualData")) { - auto* residual_data = ctx.Input("ResidualData"); - auto residual_data_tz = phi::vectorize(residual_data->dims()); - auto residual_data_md = memory::desc(residual_data_tz, - phi::funcs::OneDNNGetDataType(), - dnnl::memory::format_tag::any); - post_operations.append_binary(dnnl::algorithm::binary_add, - residual_data_md); - if (ctx.HasAttr("Scale_in_eltwise")) { - float sum_scale = scale_out / ctx.Attr("Scale_in_eltwise"); - post_operations.append_sum(sum_scale); - } - } - - AppendActivation(ctx, post_operations); - - if (ctx.HasAttr("fused_output_scale")) { - float scale_alpha = ctx.Attr("fused_output_scale"); - post_operations.append_eltwise( - 1.0, dnnl::algorithm::eltwise_linear, scale_alpha, 0.0f); - } - - matmul_attrs.set_post_ops(post_operations); - return matmul_attrs; - } - - std::vector FakeTransposeStrides( - const std::vector& matmul_out_dims) const { - // fuse matmul_v2 + transpose + reshape guarantees that output is 4D and - // transpose axis are: {0, 2, 1, 3} - std::vector transpose_axis = {0, 2, 1, 3}; - std::vector fake_strides(transpose_axis.size()); - int ndims = static_cast(transpose_axis.size()); - - int total_stride = 1; - - for (int i = ndims - 1; i >= 0; --i) { - fake_strides[transpose_axis[i]] = total_stride; - total_stride *= matmul_out_dims[transpose_axis[i]]; - } - - return fake_strides; - } - - std::shared_ptr AcquireWeightsMemory(const phi::DenseTensor* input) { - const YT* input_data = input->data(); - return this->AcquireMemoryFromPrimitive( - this->fwd_pd_->weights_desc(), - phi::funcs::to_void_cast(input_data)); - } - - std::shared_ptr AcquireDstMemory(phi::DenseTensor* output) { - // We cannot use base AcquireDstMemory as it makes an allocation request - // base on DST memory primitive size. This is fine in general, but in MatMul - // we have primitive that covers only one batch of Data and then shift - // pointer for every new batch. Hence phi::DenseTensor size is bigger that - // dst memory primitive size. So would we request less memory that is there - // and it triggers an assertion. So as there is no 'any' format here we can - // leave default size of phi::DenseTensor as computed in ComputeInferShape - OT* ptr = output->mutable_data(this->place_); - return this->AcquireMemoryFromPrimitive(this->fwd_pd_->dst_desc(), ptr); - } -}; - -} // namespace platform -} // namespace paddle diff --git a/paddle/phi/kernels/onednn/conv_function.h b/paddle/phi/kernels/onednn/conv_function.h index 4b3c4d5889..eaa2f41d64 100644 --- a/paddle/phi/kernels/onednn/conv_function.h +++ b/paddle/phi/kernels/onednn/conv_function.h @@ -178,7 +178,7 @@ void ComputeINT8(const OneDNNContext& dev_ctx, const std::string& unique_name = dev_ctx.GetInputsName("Input")[0] + dev_ctx.GetInputsName("Filter")[0]; PD_VISIT_FLOAT_AND_INT8_TYPES( - filter->dtype(), "ConvMKLDNNHandlerT", ([&] { + filter->dtype(), "ConvOneDNNHandlerT", ([&] { onednn::ConvOneDNNHandlerT handler(dev_ctx, onednn_engine, dev_ctx.GetPlace(), diff --git a/paddle/phi/kernels/onednn/conv_handler.h b/paddle/phi/kernels/onednn/conv_handler.h index 0e99113594..113d54ce57 100644 --- a/paddle/phi/kernels/onednn/conv_handler.h +++ b/paddle/phi/kernels/onednn/conv_handler.h @@ -40,7 +40,7 @@ class ConvOneDNNHandlerT dnnl::convolution_backward_weights> { public: ConvOneDNNHandlerT(const OneDNNContext& dev_ctx, - const dnnl::engine mkldnn_engine, + const dnnl::engine onednn_engine, Place cpu_place, const phi::DenseTensor* input, const phi::DenseTensor* filter, @@ -63,7 +63,7 @@ class ConvOneDNNHandlerT dnnl::convolution_backward_data, dnnl::convolution_backward_weights>( dev_ctx, - mkldnn_engine, + onednn_engine, cpu_place, funcs::CreateKey( dev_ctx, phi::vectorize(input->dims()), unique_name)) { -- GitLab