diff --git a/paddle/fluid/framework/ir/mkldnn/batch_norm_act_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/batch_norm_act_fuse_pass.cc index daaef03f11010f3aadf9002900801781e2f342a6..230971a2dd286a39d3f24b58ec8ba3d867325dfe 100644 --- a/paddle/fluid/framework/ir/mkldnn/batch_norm_act_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/batch_norm_act_fuse_pass.cc @@ -16,8 +16,8 @@ #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/phi/core/enforce.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/batch_norm_act_fuse_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/batch_norm_act_fuse_pass_tester.cc index 9e989f343bb34aacd186925ada98e8f94eab3e7a..ab03c73adc44b90a63f1d75a3d79f1af99975f65 100644 --- a/paddle/fluid/framework/ir/mkldnn/batch_norm_act_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/batch_norm_act_fuse_pass_tester.cc @@ -19,7 +19,6 @@ #include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/program_desc.h" -#include "paddle/fluid/platform/errors.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.cc b/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.cc index 3fb7636f06fd0a1154b0bd4e6110738ce7766441..4e2c61ce7d91feacf1cba32f1a22d02698a2af72 100644 --- a/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.cc @@ -28,7 +28,7 @@ namespace ir { void ComputePropagateScalesMkldnnPass::GetTensorFromVector( const std::vector& data_v, phi::DenseTensor* tensor) const { const int size = static_cast(data_v.size()); - auto* data = tensor->mutable_data({size}, platform::CPUPlace()); + auto* data = tensor->mutable_data({size}, phi::CPUPlace()); for (int i = 0; i < size; i++) { data[i] = data_v[i]; } @@ -123,7 +123,7 @@ void ComputePropagateScalesMkldnnPass::ComputeVarScales( std::vector reshape_dims = {dims[0], volume}; tmp_tensor.Resize(phi::make_ddim(reshape_dims)); auto* weight_data = weight_tensor->data(); - auto* tmp_data = tmp_tensor.mutable_data(platform::CPUPlace()); + auto* tmp_data = tmp_tensor.mutable_data(phi::CPUPlace()); for (int i = 0; i < weight_tensor->numel(); i++) { tmp_data[i] = std::abs(weight_data[i]); } @@ -365,7 +365,7 @@ void ComputePropagateScalesMkldnnPass::UpdateScaleOpInOutScales( auto pair = iter->second; const auto tensor = pair.second; tmp_tensor.Resize(tensor.dims()); - auto* data = tmp_tensor.mutable_data(platform::CPUPlace()); + auto* data = tmp_tensor.mutable_data(phi::CPUPlace()); auto* src_data = tensor.data(); for (int i = 0; i < tensor.numel(); i++) { if (out_iter != var_quant_scales->end()) { diff --git a/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass_tester.cc index 39ecfd2c0e79a543053072bd032dbd253f9fd14e..fc4ca24b2ae639059aae9b589041977bdf877b8f 100644 --- a/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass_tester.cc @@ -17,7 +17,7 @@ #include "paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.h" #include "paddle/fluid/framework/naive_executor.h" -#include "paddle/fluid/platform/place.h" +#include "paddle/phi/common/place.h" namespace paddle { namespace framework { @@ -119,7 +119,7 @@ class ComputePropagateScalesMkldnnPassTest : public testing::Test { const ProgramDesc& prog, Scope* scope, const std::initializer_list& variable_names) { - auto place = paddle::platform::CPUPlace(); + auto place = phi::CPUPlace(); NaiveExecutor exe{place}; exe.CreateVariables(prog, 0, true, scope); @@ -148,19 +148,19 @@ class ComputePropagateScalesMkldnnPassTest : public testing::Test { auto* wx_tensor = wx_var->GetMutable(); wx_tensor->Resize(phi::make_dim(wx.size(), wx[0].size())); for (size_t i = 0; i < wx.size(); i++) - std::copy(begin(wx[i]), - end(wx[i]), - wx_tensor->mutable_data(platform::CPUPlace()) + - i * wx[0].size()); + std::copy( + begin(wx[i]), + end(wx[i]), + wx_tensor->mutable_data(phi::CPUPlace()) + i * wx[0].size()); auto* wh_var = scope.FindVar(wh_var_names); auto* wh_tensor = wh_var->GetMutable(); wh_tensor->Resize(phi::make_dim(wh.size(), wh[0].size())); for (size_t i = 0; i < wh.size(); i++) - std::copy(begin(wh[i]), - end(wh[i]), - wh_tensor->mutable_data(platform::CPUPlace()) + - i * wh[0].size()); + std::copy( + begin(wh[i]), + end(wh[i]), + wh_tensor->mutable_data(phi::CPUPlace()) + i * wh[0].size()); if (type == "gru") { ComputeGruWeightScales( graph, &scope, wx_name, wh_name, &var_quant_scales); @@ -283,7 +283,7 @@ TEST_F(ComputePropagateScalesMkldnnPassTest, get_scales_function) { var_tensor.Resize(phi::make_dim(values.size(), 1)); std::copy(begin(values), end(values), - var_tensor.mutable_data(platform::CPUPlace())); + var_tensor.mutable_data(phi::CPUPlace())); std::vector results = GetScales(&var_tensor, 0); ASSERT_EQ(results.size(), std::size_t(1)); @@ -310,7 +310,7 @@ TEST_F(ComputePropagateScalesMkldnnPassTest, compute_var_scales) { weight_tensor->Resize(phi::make_dim(1, values.size())); std::copy(begin(values), end(values), - weight_tensor->mutable_data(platform::CPUPlace())); + weight_tensor->mutable_data(phi::CPUPlace())); auto max_val = *std::max_element(values.begin(), values.end()); @@ -338,7 +338,7 @@ TEST_F(ComputePropagateScalesMkldnnPassTest, update_relu_output_scales) { StringPairMap var_quant_scales; for (auto& var_name : conv_variable_names) { phi::DenseTensor tensor; - auto* data = tensor.mutable_data({1}, platform::CPUPlace()); + auto* data = tensor.mutable_data({1}, phi::CPUPlace()); data[0] = 10; auto pair = std::make_pair(false, tensor); var_quant_scales.insert(std::make_pair(var_name, pair)); diff --git a/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.cc index a673aafadccfc4d759f0528c000d784e40e85357..5ac92e5bb05513be12f502c175258b39e34dd071 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.cc @@ -15,8 +15,8 @@ #include "paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/platform/mkldnn_reuse.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/phi/backends/onednn/onednn_reuse.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/conv_affine_channel_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/conv_affine_channel_mkldnn_fuse_pass.cc index 5bf025af111a4cc69a8c486226695224c2d8b152..07851350cac9dc50ea4004b5474f5b3f208e69f4 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_affine_channel_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/conv_affine_channel_mkldnn_fuse_pass.cc @@ -80,7 +80,7 @@ void recompute_bias_and_weights(const Scope* scope, ac_bias_tensor.data(), ac_bias_tensor.numel(), 1); EigenVectorArrayMap eltwise_y_in_array( - eltwise_y_in_tensor->mutable_data(platform::CPUPlace()), + eltwise_y_in_tensor->mutable_data(phi::CPUPlace()), eltwise_y_in_tensor->numel(), 1); @@ -91,7 +91,7 @@ void recompute_bias_and_weights(const Scope* scope, scope->FindVar(conv_weight->Name())->GetMutable(); auto weights_shape = weights->dims(); auto weights_shape_2d = phi::flatten_to_2d(weights_shape, 1); - auto* weights_data = weights->mutable_data(platform::CPUPlace()); + auto* weights_data = weights->mutable_data(phi::CPUPlace()); EigenMatrixArrayMap weights_array_2d( weights_data, weights_shape_2d[0], weights_shape_2d[1]); @@ -233,7 +233,7 @@ void ConvAffineChannelFusePass::ApplyImpl(ir::Graph* graph) const { auto* eltwise_y_in_tensor = scope->Var(eltwise_y_in_node->Name())->GetMutable(); eltwise_y_in_tensor->Resize(ac_bias_tensor->dims()); - std::fill_n(eltwise_y_in_tensor->mutable_data(platform::CPUPlace()), + std::fill_n(eltwise_y_in_tensor->mutable_data(phi::CPUPlace()), eltwise_y_in_tensor->numel(), 0.0f); diff --git a/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.cc index 13cd87543160344eeae42ecad4249f8c798cad65..314991d3479fb215fa2ad8cc72b364740f9ff367 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.cc @@ -19,8 +19,8 @@ #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/phi/core/enforce.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { @@ -263,7 +263,7 @@ phi::DenseTensor tensor_apply_eltwise(const phi::DenseTensor& vec_a, vec_y.Resize(vec_a.dims()); const float* a = vec_a.data(); const float* b = vec_b.data(); - float* y = vec_y.mutable_data(platform::CPUPlace()); + float* y = vec_y.mutable_data(phi::CPUPlace()); for (int i = 0; i < vec_a.numel(); i++) { y[i] = f(a[i], b[i]); } diff --git a/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc index c5ee20b4b016252e270877166d7840e17aa2bbe5..1cd1d0325ae446b9d2aa3eb39e6db5d41324fe9e 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc @@ -19,7 +19,7 @@ #include "paddle/fluid/framework/op_proto_maker.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/imperative/type_defs.h" -#include "paddle/fluid/platform/place.h" +#include "paddle/phi/common/place.h" namespace paddle { namespace framework { @@ -112,7 +112,7 @@ void InitTensorHolder(Scope* scope, void MainTest(bool convWithExistingBias) { auto prog = BuildProgramDesc(convWithExistingBias); std::unique_ptr graph(new ir::Graph(prog)); - auto place = paddle::platform::CPUPlace(); + auto place = phi::CPUPlace(); NaiveExecutor exe{place}; Scope scope; // Init scope, as it is used in pass diff --git a/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.cc index dcaba821066e9b942823665123f9f49dbda7dfd1..d43c092a3883feb3d59aae5d6fa8b40ba740b1ad 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.cc @@ -16,7 +16,7 @@ #include "paddle/fluid/framework/ir/graph_traits.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.cc b/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.cc index 12a673b89d6810623087b5608d9ea129b052e251..528ba5747218ad25c37b74f37541eb9de1bdddf7 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.cc @@ -18,7 +18,7 @@ limitations under the License. */ #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass_tester.cc index 54078135cf24443ae5e8efa3f57e26bab9f49f1c..770a3a7a1d117d34ee38795dcae9c0916fe4ede8 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass_tester.cc @@ -17,7 +17,6 @@ #include "paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.h" #include "paddle/fluid/framework/naive_executor.h" #include "paddle/fluid/imperative/type_defs.h" -#include "paddle/fluid/platform/place.h" namespace paddle { namespace framework { @@ -68,23 +67,16 @@ void SetOp(ProgramDesc* prog, static const std::initializer_list variable_names{ "z", "a", "b", "c", "d", "e", "f", "g", "h", "i"}; -void PreparePass(std::unique_ptr& graph, - int* original_nodes_num, - int* current_nodes_num) { - auto pass = PassRegistry::Instance().Get("cpu_bfloat16_pass"); - - *original_nodes_num = graph->Nodes().size(); - graph.reset(pass->Apply(graph.release())); - *current_nodes_num = graph->Nodes().size(); -} - void MainTest(const ProgramDesc& prog, const int& quant_count, const int& dequant_count, const int& added_nodes_count) { auto graph = std::make_unique(prog); - int original_nodes_num, current_nodes_num; - PreparePass(graph, &original_nodes_num, ¤t_nodes_num); + auto pass = PassRegistry::Instance().Get("cpu_bfloat16_pass"); + + int original_nodes_num = graph->Nodes().size(); + graph.reset(pass->Apply(graph.release())); + int current_nodes_num = graph->Nodes().size(); int quantize_nodes_count = 0; int dequantize_nodes_count = 0; diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.cc b/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.cc index fbdafbfe304ce488c26d7add1c4af166ca30270c..8741b00f689f503de60aa854561f498e37ba377a 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.cc @@ -19,7 +19,7 @@ limitations under the License. */ #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/platform/mkldnn_helper.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc index b650821a3d507137aa8709fba73b00cdaba8a09b..a47bdfef321556d0055f252741adfb431da2a742 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc @@ -20,7 +20,7 @@ #include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" #include "paddle/fluid/platform/mkldnn_helper.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { @@ -1204,8 +1204,7 @@ void CPUQuantizePass::QuantizeMultiGru(Graph* graph) const { auto* w_scale_tensor_dst = scope->Var(w_scale_node->Name())->GetMutable(); w_scale_tensor_dst->Resize(scale_tensor_src.dims()); - auto* dst_data = - w_scale_tensor_dst->mutable_data(platform::CPUPlace()); + auto* dst_data = w_scale_tensor_dst->mutable_data(phi::CPUPlace()); EigenVectorArrayMapFloat eigen_tensor_dst{dst_data, w_scale_tensor_dst->numel()}; eigen_tensor_dst = diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc index 55de1efed7943e172a3bb3d54192d4a3a30ac472..e340bb625de790a7b4a670036fda264589fb068c 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc @@ -19,7 +19,7 @@ #include "paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h" // NOLINT #include "paddle/fluid/framework/naive_executor.h" #include "paddle/fluid/imperative/type_defs.h" -#include "paddle/fluid/platform/place.h" +#include "paddle/phi/common/place.h" namespace paddle { namespace framework { @@ -146,7 +146,7 @@ void PreparePass(std::unique_ptr* graph, int* current_nodes_num, std::string var_without_scale = "", std::string var_signed = "") { - auto place = paddle::platform::CPUPlace(); + auto place = phi::CPUPlace(); NaiveExecutor exe{place}; Scope scope; exe.CreateVariables(prog, 0, true, &scope); diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.cc index 69cf01278b33a63dade0922783a6411203d9c06d..3654c8bd2df8b60f768deb91002dee0727af414a 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.cc @@ -18,9 +18,9 @@ #include #include -#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/mkldnn_helper.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/phi/core/enforce.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass_tester.cc index 11f6098819d1b6de656ed0dba07c6d0fc94cd725..f94456f02896574ff10dead9fd269256d4ca5364 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass_tester.cc @@ -16,7 +16,7 @@ #include "paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.h" #include "paddle/fluid/framework/naive_executor.h" -#include "paddle/fluid/platform/place.h" +#include "paddle/phi/common/place.h" namespace paddle { namespace framework { @@ -722,7 +722,7 @@ void InitTensorHolder(Scope* scope, } void PrepareGraph(std::unique_ptr* graph, const ProgramDesc& prog) { - auto place = paddle::platform::CPUPlace(); + auto place = phi::CPUPlace(); NaiveExecutor exe{place}; Scope scope; exe.CreateVariables(prog, 0, true, &scope); diff --git a/paddle/fluid/framework/ir/mkldnn/elt_act_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/elt_act_mkldnn_fuse_pass.cc index 87b2e6406137d1398851cbf352c86619727df933..618b6993729f54aafb229973c0ec7fc1f381ed87 100644 --- a/paddle/fluid/framework/ir/mkldnn/elt_act_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/elt_act_mkldnn_fuse_pass.cc @@ -16,9 +16,9 @@ #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/mkldnn_reuse.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/phi/backends/onednn/onednn_reuse.h" +#include "paddle/phi/core/enforce.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/fc_act_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/fc_act_mkldnn_fuse_pass.cc index 298e9cf49caea74b0a7f728d839e0b7cf5a0d2ea..60ab407f00c5af29cb831437bda5d7554ba23321 100644 --- a/paddle/fluid/framework/ir/mkldnn/fc_act_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/fc_act_mkldnn_fuse_pass.cc @@ -15,8 +15,8 @@ #include "paddle/fluid/framework/ir/mkldnn/fc_act_mkldnn_fuse_pass.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/platform/mkldnn_reuse.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/phi/backends/onednn/onednn_reuse.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/fc_elementwise_add_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/fc_elementwise_add_mkldnn_fuse_pass.cc index 9ddf9e161db7d3aadc9157812784c791188c6b32..ef01acd88c0b7c57108ea4ac2ec32866a4f0ffd9 100644 --- a/paddle/fluid/framework/ir/mkldnn/fc_elementwise_add_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/fc_elementwise_add_mkldnn_fuse_pass.cc @@ -16,7 +16,7 @@ #include "paddle/fluid/framework/ir/graph_traits.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/fc_mkldnn_pass.cc b/paddle/fluid/framework/ir/mkldnn/fc_mkldnn_pass.cc index ceb73b091126767708e667bed8448561d914a3c4..f4396d6d8175af7d5e7efb518a06c3c2bb2686de 100644 --- a/paddle/fluid/framework/ir/mkldnn/fc_mkldnn_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/fc_mkldnn_pass.cc @@ -14,8 +14,8 @@ #include "paddle/fluid/framework/ir/mkldnn/fc_mkldnn_pass.h" -#include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/phi/core/enforce.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/int8_scale_calculation_mkldnn_pass.cc b/paddle/fluid/framework/ir/mkldnn/int8_scale_calculation_mkldnn_pass.cc index a5481f5c6f30e0027a768515a5f20a525525352d..c06f6b5ec9249d6fdec335baa929746d90f13796 100644 --- a/paddle/fluid/framework/ir/mkldnn/int8_scale_calculation_mkldnn_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/int8_scale_calculation_mkldnn_pass.cc @@ -15,8 +15,8 @@ #include "paddle/fluid/framework/ir/mkldnn/int8_scale_calculation_mkldnn_pass.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/phi/core/enforce.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/interpolate_mkldnn_pass.cc b/paddle/fluid/framework/ir/mkldnn/interpolate_mkldnn_pass.cc index 17a9da84100da1c6240b2f17d17fe10fe842faf6..04a6f8d6b770d5be54b964a50bc6412f1e597f71 100644 --- a/paddle/fluid/framework/ir/mkldnn/interpolate_mkldnn_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/interpolate_mkldnn_pass.cc @@ -17,7 +17,7 @@ #include #include -#include "paddle/fluid/platform/enforce.h" +#include "paddle/phi/core/enforce.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/layer_norm_onednn_optimization_pass.cc b/paddle/fluid/framework/ir/mkldnn/layer_norm_onednn_optimization_pass.cc index e5c1a43d92301cb342f40f8646de302efb38478e..1fed263394b492099fe47a907e08603a9ea2de5d 100644 --- a/paddle/fluid/framework/ir/mkldnn/layer_norm_onednn_optimization_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/layer_norm_onednn_optimization_pass.cc @@ -15,8 +15,8 @@ #include "paddle/fluid/framework/ir/mkldnn/layer_norm_onednn_optimization_pass.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/platform/mkldnn_reuse.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/phi/backends/onednn/onednn_reuse.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { @@ -77,7 +77,7 @@ void LayerNormOneDNNOptimizationPass::ApplyImpl(Graph *graph) const { scale_shift_tensor->Resize(phi::make_ddim({channels * 2})); - memcpy(scale_shift_tensor->mutable_data(platform::CPUPlace()), + memcpy(scale_shift_tensor->mutable_data(phi::CPUPlace()), ln_scale_tensor->data(), channels * sizeof(float)); diff --git a/paddle/fluid/framework/ir/mkldnn/matmul_activation_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/matmul_activation_mkldnn_fuse_pass.cc index 7a40a145bdb4f369a8afbc18829730f606e533f0..07a608c5a2b4a8e48830281727e288bbc2c9d5dc 100644 --- a/paddle/fluid/framework/ir/mkldnn/matmul_activation_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/matmul_activation_mkldnn_fuse_pass.cc @@ -15,8 +15,8 @@ #include "paddle/fluid/framework/ir/mkldnn/matmul_activation_mkldnn_fuse_pass.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/platform/mkldnn_reuse.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/phi/backends/onednn/onednn_reuse.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/matmul_elementwise_add_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/matmul_elementwise_add_mkldnn_fuse_pass.cc index 85e49c68ff8243bc6e9e9b1759c3ae989009a2a0..f045377465e0322207d2d5ebdb888f74878e8d43 100644 --- a/paddle/fluid/framework/ir/mkldnn/matmul_elementwise_add_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/matmul_elementwise_add_mkldnn_fuse_pass.cc @@ -16,7 +16,7 @@ #include "paddle/fluid/framework/ir/graph_traits.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_mkldnn_fuse_pass.cc index a71961837681cc45f6438b48e495d3c10dc9e742..40dbaa03a0615f1456c6530ed1340741d443f193 100644 --- a/paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_mkldnn_fuse_pass.cc @@ -13,9 +13,9 @@ // limitations under the License. #include "paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_mkldnn_fuse_pass.h" -#include #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/platform/enforce.h" +#include "paddle/phi/core/enforce.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/mkldnn_conv_bn_fuse_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/mkldnn_conv_bn_fuse_pass_tester.cc index 7911b125b12154e4a832d0b72e718a06565b339a..5672ca2eb7b80fc75ac638fe18c0ba0bd43b89c0 100644 --- a/paddle/fluid/framework/ir/mkldnn/mkldnn_conv_bn_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/mkldnn_conv_bn_fuse_pass_tester.cc @@ -23,7 +23,7 @@ #include "paddle/fluid/framework/ir/pass_tester_helper.h" #include "paddle/fluid/framework/naive_executor.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/platform/place.h" +#include "paddle/phi/common/place.h" #include "paddle/phi/core/kernel_registry.h" PD_DECLARE_KERNEL(conv2d_transpose, CPU, ALL_LAYOUT); @@ -195,7 +195,7 @@ class MKLDNNConvBatchNormPassTest { void FillTensorWithRandomData(phi::DenseTensor* tnsr, float lowb, float upb, - platform::CPUPlace place) { + phi::CPUPlace place) { float* ptr = tnsr->mutable_data(place); // Initialize input data std::uniform_real_distribution dist(static_cast(lowb), @@ -219,7 +219,7 @@ class MKLDNNConvBatchNormPassTest { std::unique_ptr graph(new ir::Graph(base_prog)); Scope scope; - auto place = paddle::platform::CPUPlace(); + auto place = phi::CPUPlace(); NaiveExecutor exe{place}; auto pass = PassRegistry::Instance().Get( diff --git a/paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h b/paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h index 6899a7202da9cc3734dc01d6ab7c24d30a6a5606..b091236ddd8f3248598505eb10bd55f30aee1d6f 100644 --- a/paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h +++ b/paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h @@ -140,7 +140,7 @@ static void GetInfoFromTheFirstOp(ir::Graph* graph, op_desc->GetAttr(vector_name)); phi::DenseTensor tensor; const int size = static_cast(scales_vector.size()); - auto data = tensor.mutable_data({size}, platform::CPUPlace()); + auto data = tensor.mutable_data({size}, phi::CPUPlace()); std::copy(scales_vector.begin(), scales_vector.end(), data); auto pair = std::make_pair(is_unsigned, tensor); info_map->insert(std::make_pair(var_name, pair)); diff --git a/paddle/fluid/framework/ir/mkldnn/multi_gru_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/multi_gru_fuse_pass.cc index eee1840675547c6286b950d1753da0bdfccbd527..e83cc1fa6adf4cd3c5c27af45191e2bf9f383de8 100644 --- a/paddle/fluid/framework/ir/mkldnn/multi_gru_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/multi_gru_fuse_pass.cc @@ -18,8 +18,8 @@ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" -#include "paddle/fluid/platform/errors.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/phi/core/errors.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { @@ -44,12 +44,11 @@ std::vector JoinInputs(Node* op1, void MultiGRUFusePass::ApplyImpl(ir::Graph* graph) const { VLOG(3) << "Fusing two concatenated multi_gru ops."; PADDLE_ENFORCE_NOT_NULL(graph, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Pointer to graph argument cannot be NULL.")); FusePassBase::Init(name_scope_, graph); PADDLE_ENFORCE_NOT_NULL( - param_scope(), - platform::errors::InvalidArgument("Scope cannot be nullptr.")); + param_scope(), phi::errors::InvalidArgument("Scope cannot be nullptr.")); GraphPatternDetector gpd; patterns::TwoFusionGruConcat pattern{gpd.mutable_pattern(), name_scope_}; diff --git a/paddle/fluid/framework/ir/mkldnn/multi_gru_seq_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/multi_gru_seq_fuse_pass.cc index d143f087918c3ae17ad0c97316b0498f0743dcfd..35813bc22d6914c1f8362c6f01cc86695fd7a688 100644 --- a/paddle/fluid/framework/ir/mkldnn/multi_gru_seq_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/multi_gru_seq_fuse_pass.cc @@ -21,9 +21,9 @@ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" -#include "paddle/fluid/platform/errors.h" #include "paddle/fluid/platform/mkldnn_helper.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/phi/core/errors.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { @@ -48,12 +48,11 @@ std::vector JoinInputs(Node* op1, void MultiGruSeqFusePass::ApplyImpl(ir::Graph* graph) const { VLOG(3) << "Fusing two consecutive multi_gru ops."; PADDLE_ENFORCE_NOT_NULL(graph, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Pointer to graph argument cannot be NULL.")); FusePassBase::Init(name_scope_, graph); PADDLE_ENFORCE_NOT_NULL( - param_scope(), - platform::errors::InvalidArgument("Scope cannot be nullptr.")); + param_scope(), phi::errors::InvalidArgument("Scope cannot be nullptr.")); GraphPatternDetector gpd; patterns::MultiGruSeq pattern{gpd.mutable_pattern(), name_scope_}; diff --git a/paddle/fluid/framework/ir/mkldnn/operator_reshape2_onednn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/operator_reshape2_onednn_fuse_pass.cc index 0f8d0452aa17ba84073e845a3f50bdc54b69f6fa..66e75193bbbe4a7363c2122412ec0c35dabfac6c 100644 --- a/paddle/fluid/framework/ir/mkldnn/operator_reshape2_onednn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/operator_reshape2_onednn_fuse_pass.cc @@ -15,8 +15,8 @@ #include "paddle/fluid/framework/ir/mkldnn/operator_reshape2_onednn_fuse_pass.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/platform/mkldnn_reuse.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/phi/backends/onednn/onednn_reuse.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/operator_scale_onednn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/operator_scale_onednn_fuse_pass.cc index 31b9229bfcedd196d28e3226f6e6fdc06137760f..cb06f6eb1205e94d0a1861183014edfc1a67de02 100644 --- a/paddle/fluid/framework/ir/mkldnn/operator_scale_onednn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/operator_scale_onednn_fuse_pass.cc @@ -15,8 +15,8 @@ #include "paddle/fluid/framework/ir/mkldnn/operator_scale_onednn_fuse_pass.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/platform/mkldnn_reuse.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/phi/backends/onednn/onednn_reuse.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/operator_unsqueeze2_onednn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/operator_unsqueeze2_onednn_fuse_pass.cc index 80f49613c63aca70504338f2024f9a1f1e783d1b..716d14886327a0d44e1b7e3dc79bbdc2ff0beb76 100644 --- a/paddle/fluid/framework/ir/mkldnn/operator_unsqueeze2_onednn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/operator_unsqueeze2_onednn_fuse_pass.cc @@ -15,8 +15,8 @@ #include "paddle/fluid/framework/ir/mkldnn/operator_unsqueeze2_onednn_fuse_pass.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/platform/mkldnn_reuse.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/phi/backends/onednn/onednn_reuse.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass.cc b/paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass.cc index 5ad1e95cd79c069eb709aa38edd14e51c4bf6e3d..c03caa7ee83d479067b820ce59f7f463868ad0e1 100644 --- a/paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass.cc @@ -16,7 +16,7 @@ #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/platform/mkldnn_helper.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass_tester.cc index f6eee81cece4b25abc83e89369fdb11f138d89be..52f3843566f1f1c440b7ad5be9d3ea0a6595bb44 100755 --- a/paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass_tester.cc @@ -16,7 +16,7 @@ #include "paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass.h" // NOLINT #include "paddle/fluid/imperative/type_defs.h" -#include "paddle/fluid/platform/place.h" +#include "paddle/phi/common/place.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.cc b/paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.cc index 433b9aba2e7f23a2ac86d4bbacd9867f1c697f22..932f5fb6707899145bd5c0bf540c1cb89fedfa1c 100755 --- a/paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.cc @@ -430,8 +430,8 @@ void QuantDequantMkldnnPass::TransposeWeight(phi::DenseTensor* input) const { phi::DenseTensor trans_tensor; trans_tensor.Resize(out_dims); - float* trans_data = trans_tensor.mutable_data(platform::CPUPlace()); - float* in_data = input->mutable_data(platform::CPUPlace()); + float* trans_data = trans_tensor.mutable_data(phi::CPUPlace()); + float* in_data = input->mutable_data(phi::CPUPlace()); for (int64_t out_idx = 0; out_idx < count; ++out_idx) { int64_t in_idx = 0; @@ -493,8 +493,7 @@ void QuantDequantMkldnnPass::ConvertFromINT8ToFP32( weight_tensor->clear(); // clear int weight weight_tensor->Resize(phi::make_ddim(phi::vectorize(weight_dims))); - auto* new_weight_data = - weight_tensor->mutable_data(platform::CPUPlace()); + auto* new_weight_data = weight_tensor->mutable_data(phi::CPUPlace()); memcpy(new_weight_data, weight_data.data(), weight_tensor->numel() * sizeof(float)); @@ -536,8 +535,7 @@ void QuantDequantMkldnnPass::ConvertFromINT8ToFP32( } weight_tensor->clear(); // clear int weight weight_tensor->Resize(phi::make_ddim(phi::vectorize(weight_dims))); - auto* new_weight_data = - weight_tensor->mutable_data(platform::CPUPlace()); + auto* new_weight_data = weight_tensor->mutable_data(phi::CPUPlace()); memcpy(new_weight_data, weight_data.data(), weight_tensor->numel() * sizeof(float)); @@ -582,8 +580,7 @@ void QuantDequantMkldnnPass::DequantizeOpWeights( weight_var_name, op_desc->Type())); auto* weight_tensor = var->GetMutable(); - float* fp32_weight_data = - weight_tensor->mutable_data(platform::CPUPlace()); + float* fp32_weight_data = weight_tensor->mutable_data(phi::CPUPlace()); ConvertFromINT8ToFP32( scales, weight_tensor, nullptr, fp32_weight_data, weight_var_name); } @@ -628,7 +625,7 @@ void QuantDequantMkldnnPass::DequantizeOpWeightsFromONNXFormat( op_desc->Type())); auto* weight_tensor = var->GetMutable(); int8_t* int8_weight_data = - weight_tensor->mutable_data(platform::CPUPlace()); + weight_tensor->mutable_data(phi::CPUPlace()); ConvertFromINT8ToFP32( scales, weight_tensor, int8_weight_data, nullptr, weight_var_name); diff --git a/paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.cc index bb6ceb6064c638078bb6518c194b4f2c9fc8bd94..25a79509b53f531ce53cd354bea1e16f9680f5c0 100644 --- a/paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.cc @@ -14,8 +14,8 @@ #include "paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/phi/core/enforce.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass_tester.cc index 111a28403fbdcf4482e3b66a18b7aee666c0aa14..2dd13573d98a054167db0a7686d106fb151af605 100644 --- a/paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass_tester.cc @@ -26,7 +26,7 @@ void AddVarToScope(Scope* param_scope, const DDim& dims) { auto* tensor = param_scope->Var(name)->GetMutable(); tensor->Resize(dims); - tensor->mutable_data(platform::CPUPlace()); + tensor->mutable_data(phi::CPUPlace()); } Scope* CreateParamScope() { diff --git a/paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.cc index bcc44a53fe50eccd52b3bf0aed841e1c5a39ac18..9f50aefc46ce5b16d35abba6eff502315db84b8a 100644 --- a/paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.cc @@ -20,7 +20,7 @@ limitations under the License. */ #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/platform/mkldnn_helper.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/shuffle_channel_mkldnn_detect_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/shuffle_channel_mkldnn_detect_pass_tester.cc index 11c5b42af57100b39d78701c5919441445c9b59f..4c6fc3774e8408e21312d1d5dfd0b817e1ff8970 100644 --- a/paddle/fluid/framework/ir/mkldnn/shuffle_channel_mkldnn_detect_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/shuffle_channel_mkldnn_detect_pass_tester.cc @@ -28,7 +28,7 @@ void AddVarToScope(Scope* param_scope, const DDim& dims) { auto* tensor = param_scope->Var(name)->GetMutable(); tensor->Resize(dims); - tensor->mutable_data(platform::CPUPlace()); + tensor->mutable_data(phi::CPUPlace()); } Scope* CreateParamScope() { diff --git a/paddle/fluid/framework/ir/mkldnn/softplus_activation_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/softplus_activation_mkldnn_fuse_pass.cc index 38c86d225f9705529190ec79ef842c1dcbf2a6ad..a4e74bb376daf7fdab322fac674e570e8e30b1ae 100644 --- a/paddle/fluid/framework/ir/mkldnn/softplus_activation_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/softplus_activation_mkldnn_fuse_pass.cc @@ -16,9 +16,9 @@ #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/mkldnn_reuse.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/phi/backends/onednn/onednn_reuse.h" +#include "paddle/phi/core/enforce.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/squeeze2_transpose2_onednn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/squeeze2_transpose2_onednn_fuse_pass.cc index 9a3b420073ea825c9def560d090609cb54c457aa..b71873a9dcb190245cb838fec9faf7d2c8f2e3b9 100644 --- a/paddle/fluid/framework/ir/mkldnn/squeeze2_transpose2_onednn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/squeeze2_transpose2_onednn_fuse_pass.cc @@ -13,8 +13,8 @@ // limitations under the License. #include "paddle/fluid/framework/ir/mkldnn/squeeze2_transpose2_onednn_fuse_pass.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/platform/mkldnn_reuse.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/phi/backends/onednn/onednn_reuse.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/inference/api/mkldnn_quantizer.cc b/paddle/fluid/inference/api/mkldnn_quantizer.cc index 53ed8c8134937d5a831cb4e1261daa9b6c796631..5fec6e74d311090bf328fefebede6cf0ebf0383f 100644 --- a/paddle/fluid/inference/api/mkldnn_quantizer.cc +++ b/paddle/fluid/inference/api/mkldnn_quantizer.cc @@ -30,14 +30,14 @@ #include "paddle/fluid/inference/analysis/analyzer.h" #include "paddle/fluid/inference/api/analysis_predictor.h" #include "paddle/fluid/platform/mkldnn_helper.h" -#include "paddle/fluid/platform/place.h" -#include "paddle/fluid/string/pretty_log.h" +#include "paddle/phi/common/place.h" +#include "paddle/utils/string/pretty_log.h" namespace paddle { using framework::Variable; using framework::ir::Graph; -using platform::CPUPlace; +using phi::CPUPlace; using ConstEigenVectorArrayMap = Eigen::Map>; using EigenMatrixDoubleArray = diff --git a/paddle/fluid/inference/api/mkldnn_quantizer_tester.cc b/paddle/fluid/inference/api/mkldnn_quantizer_tester.cc index 79873a63afa9d576b9f05f435ccd865acce21c68..5e699a8b4c6a58f525ea16e9e4b2917ef2faf682 100644 --- a/paddle/fluid/inference/api/mkldnn_quantizer_tester.cc +++ b/paddle/fluid/inference/api/mkldnn_quantizer_tester.cc @@ -111,7 +111,7 @@ TEST_F(MkldnnQuantizerTest, histogram_inverted_min_max) { var_tensor.Resize(phi::make_dim(values.size())); std::copy(begin(values), end(values), - var_tensor.mutable_data(platform::CPUPlace())); + var_tensor.mutable_data(phi::CPUPlace())); ASSERT_THROW(Histogram(var_tensor, max_val, min_val, 3), platform::EnforceNotMet); @@ -127,7 +127,7 @@ TEST_F(MkldnnQuantizerTest, histogram_non_negative_to_3) { var_tensor.Resize(phi::make_dim(values.size())); std::copy(begin(values), end(values), - var_tensor.mutable_data(platform::CPUPlace())); + var_tensor.mutable_data(phi::CPUPlace())); std::vector histogram; float bin_width; @@ -151,7 +151,7 @@ TEST_F(MkldnnQuantizerTest, histogram_positive_and_negative_to_3) { var_tensor.Resize(phi::make_dim(values.size())); std::copy(begin(values), end(values), - var_tensor.mutable_data(platform::CPUPlace())); + var_tensor.mutable_data(phi::CPUPlace())); std::vector histogram; float bin_width; @@ -175,7 +175,7 @@ TEST_F(MkldnnQuantizerTest, histogram_zero_bins) { var_tensor.Resize(phi::make_dim(values.size())); std::copy(begin(values), end(values), - var_tensor.mutable_data(platform::CPUPlace())); + var_tensor.mutable_data(phi::CPUPlace())); ASSERT_THROW(Histogram(var_tensor, min_val, max_val, 0), platform::EnforceNotMet); @@ -188,7 +188,7 @@ TEST_F(MkldnnQuantizerTest, histogram_empty) { // zero tensor phi::DenseTensor var_tensor; var_tensor.Resize({0}); - var_tensor.mutable_data(platform::CPUPlace()); + var_tensor.mutable_data(phi::CPUPlace()); ASSERT_THROW(Histogram(var_tensor, -1, 1, 1), platform::EnforceNotMet); } @@ -200,7 +200,7 @@ TEST_F(MkldnnQuantizerTest, kl_scaling_factor_signed) { var_tensor.Resize(phi::make_dim(values.size())); std::copy(begin(values), end(values), - var_tensor.mutable_data(platform::CPUPlace())); + var_tensor.mutable_data(phi::CPUPlace())); bool is_unsigned; phi::DenseTensor lod_tensor; @@ -220,7 +220,7 @@ TEST_F(MkldnnQuantizerTest, max_scaling_factor_signed) { var_tensor.Resize(phi::make_dim(values.size())); std::copy(begin(values), end(values), - var_tensor.mutable_data(platform::CPUPlace())); + var_tensor.mutable_data(phi::CPUPlace())); bool is_unsigned; phi::DenseTensor lod_tensor; @@ -240,7 +240,7 @@ TEST_F(MkldnnQuantizerTest, max_scaling_factor_unsigned) { var_tensor.Resize(phi::make_dim(values.size())); std::copy(begin(values), end(values), - var_tensor.mutable_data(platform::CPUPlace())); + var_tensor.mutable_data(phi::CPUPlace())); bool is_unsigned; phi::DenseTensor lod_tensor; @@ -260,10 +260,10 @@ TEST_F(MkldnnQuantizerTest, max_scaling_factor_chwise_unsigned) { phi::DenseTensor var_tensor; var_tensor.Resize(phi::make_dim(channels, 1, 1, values.size())); for (int i = 0; i < channels; i++) - std::copy(begin(values), - end(values), - var_tensor.mutable_data(platform::CPUPlace()) + - i * values.size()); + std::copy( + begin(values), + end(values), + var_tensor.mutable_data(phi::CPUPlace()) + i * values.size()); bool is_unsigned; phi::DenseTensor lod_tensor; @@ -284,7 +284,7 @@ TEST_F(MkldnnQuantizerTest, kl_scaling_factor_unsigned) { var_tensor.Resize(phi::make_dim(values.size())); std::copy(begin(values), end(values), - var_tensor.mutable_data(platform::CPUPlace())); + var_tensor.mutable_data(phi::CPUPlace())); bool is_unsigned; phi::DenseTensor lod_tensor; @@ -312,14 +312,14 @@ TEST_F(MkldnnQuantizerTest, max_ch_gru_scaling_factor) { std::copy( begin(wx[i]), end(wx[i]), - wx_tensor.mutable_data(platform::CPUPlace()) + i * wx[0].size()); + wx_tensor.mutable_data(phi::CPUPlace()) + i * wx[0].size()); wh_tensor.Resize(phi::make_dim(wh.size(), wh[0].size())); for (size_t i = 0; i < wh.size(); i++) std::copy( begin(wh[i]), end(wh[i]), - wh_tensor.mutable_data(platform::CPUPlace()) + i * wh[0].size()); + wh_tensor.mutable_data(phi::CPUPlace()) + i * wh[0].size()); bool is_unsigned; std::tie(is_unsigned, lod_tensor) = @@ -342,14 +342,14 @@ TEST_F(MkldnnQuantizerTest, max_ch_lstm_scaling_factor) { std::copy( begin(wx[i]), end(wx[i]), - wx_tensor.mutable_data(platform::CPUPlace()) + i * wx[0].size()); + wx_tensor.mutable_data(phi::CPUPlace()) + i * wx[0].size()); wh_tensor.Resize(phi::make_dim(wh.size(), wh[0].size())); for (size_t i = 0; i < wh.size(); i++) std::copy( begin(wh[i]), end(wh[i]), - wh_tensor.mutable_data(platform::CPUPlace()) + i * wh[0].size()); + wh_tensor.mutable_data(phi::CPUPlace()) + i * wh[0].size()); bool is_unsigned; std::tie(is_unsigned, lod_tensor) = diff --git a/paddle/fluid/inference/tests/api/analyzer_detect_functional_mkldnn_tester.cc b/paddle/fluid/inference/tests/api/analyzer_detect_functional_mkldnn_tester.cc index f73798faa49894452ebe9fbf9546455f4e6bf52c..277f68d09bd116178f4dbcf61ca859f58868efbd 100644 --- a/paddle/fluid/inference/tests/api/analyzer_detect_functional_mkldnn_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_detect_functional_mkldnn_tester.cc @@ -18,8 +18,7 @@ limitations under the License. */ #include #include "paddle/fluid/inference/tests/api/tester_helper.h" -#include "paddle/fluid/platform/device_context.h" -#include "paddle/fluid/platform/place.h" +#include "paddle/phi/common/place.h" DEFINE_string(infer_shape, "", "data shape file"); DEFINE_int32(sample, 20, "number of sample"); @@ -78,7 +77,7 @@ void SetInput(std::vector> *inputs, #ifdef PADDLE_WITH_MKLDNN int GetNumCachedObjects(void) { auto &pool = platform::DeviceContextPool::Instance(); - platform::CPUPlace place; + phi::CPUPlace place; auto onednn_dev_ctx = dynamic_cast(pool.Get(place)); return onednn_dev_ctx->GetCachedObjectsNumber(); } diff --git a/paddle/fluid/operators/fused/mkldnn/fusion_gru_mkldnn_op.cc b/paddle/fluid/operators/fused/mkldnn/fusion_gru_mkldnn_op.cc index 00f0f94175e434a6db9dc74b4cd08345da8103f9..806c883228035c09308c4b0e8db0b81739e5244d 100644 --- a/paddle/fluid/operators/fused/mkldnn/fusion_gru_mkldnn_op.cc +++ b/paddle/fluid/operators/fused/mkldnn/fusion_gru_mkldnn_op.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/operators/fused/fusion_gru_op.h" #include "paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h" +#include "paddle/phi/backends/onednn/onednn_reuse.h" #include "paddle/phi/core/expect.h" namespace paddle { @@ -24,13 +25,14 @@ using phi::OneDNNContext; using phi::funcs::OneDNNGetDataType; using phi::funcs::OneDNNMemDesc; using phi::funcs::RNNReorderType; +using OneDNNMemoryFormat = dnnl::memory::format_tag; template class GRUMKLDNNHandler : public RNNMKLDNNHandler { public: GRUMKLDNNHandler(const paddle::framework::ExecutionContext& ctx, const OneDNNContext& dev_ctx, - const dnnl::engine mkldnn_engine, + const dnnl::engine onednn_engine, platform::Place cpu_place, const phi::DenseTensor* input, const phi::DenseTensor* weight_h, @@ -44,7 +46,7 @@ class GRUMKLDNNHandler : public RNNMKLDNNHandler { : RNNMKLDNNHandler( ctx, dev_ctx, - mkldnn_engine, + onednn_engine, ctx.GetPlace(), input, weight_h, @@ -256,7 +258,7 @@ class FusionGRUMKLDNNKernel : public framework::OpKernel { template void RunKernel(const framework::ExecutionContext& ctx) const { auto& dev_ctx = ctx.template device_context(); - const auto& mkldnn_engine = dev_ctx.GetEngine(); + const auto& onednn_engine = dev_ctx.GetEngine(); // Get Tensors const auto* input = ctx.Input("X"); @@ -294,7 +296,7 @@ class FusionGRUMKLDNNKernel : public framework::OpKernel { GRUMKLDNNHandler handler( ctx, dev_ctx, - mkldnn_engine, + onednn_engine, ctx.GetPlace(), input, weight_h, @@ -379,7 +381,7 @@ class FusionGRUMKLDNNKernel : public framework::OpKernel { namespace ops = paddle::operators; REGISTER_OP_KERNEL(fusion_gru, MKLDNN, - paddle::platform::CPUPlace, + phi::CPUPlace, ops::FusionGRUMKLDNNKernel, ops::FusionGRUMKLDNNKernel, ops::FusionGRUMKLDNNKernel); diff --git a/paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc b/paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc index 68ab4f55a5e56e03697b017a4866e7337660e1ff..bda5eab2d725f58d319be6bab8ae6f6e0987516e 100644 --- a/paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc +++ b/paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc @@ -24,6 +24,7 @@ using phi::OneDNNContext; using phi::funcs::OneDNNGetDataType; using phi::funcs::OneDNNMemDesc; using phi::funcs::RNNReorderType; +using OneDNNMemoryFormat = dnnl::memory::format_tag; template class LSTMMKLDNNHandler @@ -31,7 +32,7 @@ class LSTMMKLDNNHandler public: LSTMMKLDNNHandler(const paddle::framework::ExecutionContext& ctx, const OneDNNContext& dev_ctx, - const dnnl::engine mkldnn_engine, + const dnnl::engine onednn_engine, platform::Place cpu_place, const phi::DenseTensor* input, const phi::DenseTensor* weight_h, @@ -46,7 +47,7 @@ class LSTMMKLDNNHandler : RNNMKLDNNHandler( ctx, dev_ctx, - mkldnn_engine, + onednn_engine, ctx.GetPlace(), input, weight_h, @@ -338,7 +339,7 @@ class FusionLSTMMKLDNNKernel : public framework::OpKernel { template void RunKernel(const framework::ExecutionContext& ctx) const { auto& dev_ctx = ctx.template device_context(); - const auto& mkldnn_engine = dev_ctx.GetEngine(); + const auto& onednn_engine = dev_ctx.GetEngine(); // Get Tensors const auto* input = ctx.Input("X"); @@ -379,7 +380,7 @@ class FusionLSTMMKLDNNKernel : public framework::OpKernel { LSTMMKLDNNHandler handler( ctx, dev_ctx, - mkldnn_engine, + onednn_engine, ctx.GetPlace(), input, weight_h, @@ -474,7 +475,7 @@ class FusionLSTMMKLDNNKernel : public framework::OpKernel { namespace ops = paddle::operators; REGISTER_OP_KERNEL(fusion_lstm, MKLDNN, - paddle::platform::CPUPlace, + phi::CPUPlace, ops::FusionLSTMMKLDNNKernel, ops::FusionLSTMMKLDNNKernel, ops::FusionLSTMMKLDNNKernel); diff --git a/paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h b/paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h index 51b2d9c99562588cc7a8b145480309f9a3bfa257..b45af1b4bc9ad6610862bfd6e9cb12dc778fac52 100644 --- a/paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h +++ b/paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h @@ -14,7 +14,8 @@ limitations under the License. */ #pragma once -#include "paddle/fluid/platform/mkldnn_reuse.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/phi/backends/onednn/onednn_reuse.h" namespace paddle { namespace operators { @@ -22,13 +23,14 @@ namespace operators { using phi::funcs::CreateKey; using phi::funcs::OneDNNGetDataType; using phi::funcs::RNNReorderType; +using OneDNNMemoryFormat = dnnl::memory::format_tag; template class RNNMKLDNNHandler : public phi::funcs::OneDNNHandlerT { public: RNNMKLDNNHandler(const paddle::framework::ExecutionContext& ctx, const phi::OneDNNContext& dev_ctx, - const dnnl::engine mkldnn_engine, + const dnnl::engine onednn_engine, platform::Place cpu_place, const phi::DenseTensor* input, const phi::DenseTensor* weight_h, diff --git a/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc b/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc index a0cc21892597d1f3465bb4d88213570537d404af..3ce6d18bde4c93d77924eb36c58d5bbc62c4a375 100644 --- a/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc +++ b/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc @@ -18,10 +18,10 @@ limitations under the License. */ #include "dnnl.hpp" // NOLINT #include "paddle/fluid/framework/mixed_vector.h" +#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/operators/fused/multi_gru_op.h" -#include "paddle/fluid/platform/errors.h" -#include "paddle/fluid/platform/mkldnn_reuse.h" +#include "paddle/phi/backends/onednn/onednn_reuse.h" namespace paddle { namespace operators { @@ -31,6 +31,7 @@ using phi::funcs::OneDNNGetDataType; using phi::funcs::OneDNNMemDesc; using Direction = dnnl::rnn_direction; using phi::OneDNNContext; +using OneDNNMemoryFormat = dnnl::memory::format_tag; namespace { @@ -721,6 +722,6 @@ class MultiGRUMKLDNNKernel : public framework::OpKernel { namespace ops = paddle::operators; REGISTER_OP_KERNEL(multi_gru, MKLDNN, - paddle::platform::CPUPlace, + phi::CPUPlace, ops::MultiGRUMKLDNNKernel, ops::MultiGRUMKLDNNKernel); diff --git a/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc index 146ee52fc62ff735e6238138c278235999e092b9..6ed5aefd66d1b97e3b9ca11e33190665bb24f247 100644 --- a/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc @@ -14,11 +14,10 @@ limitations under the License. */ #include "paddle/fluid/operators/dequantize_op.h" -#include "paddle/fluid/framework/data_layout_transform.h" #include "paddle/fluid/framework/tensor.h" -#include "paddle/fluid/platform/errors.h" #include "paddle/fluid/platform/mkldnn_helper.h" -#include "paddle/fluid/platform/mkldnn_reuse.h" +#include "paddle/phi/backends/onednn/onednn_reuse.h" +#include "paddle/phi/core/errors.h" namespace paddle { namespace operators { @@ -39,11 +38,11 @@ class DeQuantOpKernel : public framework::OpKernel { auto* out = ctx.Output("Output"); PADDLE_ENFORCE(quantization_scale != 0.0f, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Dequantization scale must be different than 0.0f")); PADDLE_ENFORCE(quantization_shift <= 255 && quantization_shift >= 0, - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "Dequantization shift must be lower or equal to ", "255 and greater or equal to 0, but got %f", quantization_shift)); @@ -91,7 +90,7 @@ namespace ops = paddle::operators; REGISTER_OP_KERNEL(dequantize, MKLDNN, - ::paddle::platform::CPUPlace, + ::phi::CPUPlace, ops::DeQuantOpKernel, ops::DeQuantOpKernel, ops::DeQuantOpKernel); diff --git a/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc index fa376cd45e9d57fd81dea5125023e8ce3face9d4..31b4ca34a2d210e9a354140c57ddf3ba4940605c 100644 --- a/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc @@ -14,9 +14,10 @@ limitations under the License. */ #include +#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/fc_op.h" #include "paddle/fluid/platform/mkldnn_helper.h" -#include "paddle/fluid/platform/mkldnn_reuse.h" +#include "paddle/phi/backends/onednn/onednn_reuse.h" namespace paddle { namespace operators { @@ -51,10 +52,10 @@ class FCMKLDNNHandler const phi::DenseTensor* bias, phi::DenseTensor* out, const int in_num_col_dims, - dnnl::engine mkldnn_engine, + dnnl::engine onednn_engine, platform::Place cpu_place) : phi::funcs::OneDNNHandlerNoCachingT( - mkldnn_engine, cpu_place), + onednn_engine, cpu_place), dev_ctx_(dev_ctx) { this->memory_key_ = ctx.InputName("W"); @@ -122,7 +123,7 @@ class FCMKLDNNHandler post_operations.append_eltwise( activation_scale, dnnl::algorithm::eltwise_relu, 0.0f, 0.0f); } - platform::AppendActivation(ctx, post_operations, activation_scale); + AppendActivation(ctx, post_operations, activation_scale); if (ctx.HasAttr("fused_output_scale")) { float scale_alpha = ctx.Attr("fused_output_scale"); @@ -154,6 +155,59 @@ class FCMKLDNNHandler } } + void AppendActivation(const ExecutionContext& ctx, + dnnl::post_ops& post_ops, // NOLINT + float activation_scale = 1.0f) { + const auto invalid_attribute = + ctx.HasAttr("fuse_activation") + ? ctx.Attr("fuse_activation").empty() + : true; + if (invalid_attribute) return; + + const auto fuse_activation = ctx.Attr("fuse_activation"); + const auto fuse_alpha = + ctx.HasAttr("fuse_alpha") ? ctx.Attr("fuse_alpha") : 0.0f; + const auto fuse_beta = + ctx.HasAttr("fuse_beta") ? ctx.Attr("fuse_beta") : 0.0f; + + if (fuse_activation == "hard_sigmoid") { + post_ops.append_eltwise(activation_scale, + dnnl::algorithm::eltwise_linear, + fuse_alpha, + fuse_beta); + post_ops.append_eltwise( + activation_scale, dnnl::algorithm::eltwise_clip, 0.0f, 1.0f); + } else { + const std::unordered_map activation_map = { + {"abs", dnnl::algorithm::eltwise_abs}, + {"clip", dnnl::algorithm::eltwise_clip}, + {"gelu", dnnl::algorithm::eltwise_gelu_erf}, + {"gelu_erf", dnnl::algorithm::eltwise_gelu_erf}, + {"gelu_tanh", dnnl::algorithm::eltwise_gelu_tanh}, + {"hard_swish", dnnl::algorithm::eltwise_hardswish}, + {"leaky_relu", dnnl::algorithm::eltwise_relu}, + {"mish", dnnl::algorithm::eltwise_mish}, + {"relu", dnnl::algorithm::eltwise_relu}, + {"relu6", dnnl::algorithm::eltwise_bounded_relu}, + {"sigmoid", dnnl::algorithm::eltwise_logistic}, + {"sqrt", dnnl::algorithm::eltwise_sqrt}, + {"swish", dnnl::algorithm::eltwise_swish}, + {"tanh", dnnl::algorithm::eltwise_tanh}}; + + const auto& activation_type = activation_map.find(fuse_activation); + + PADDLE_ENFORCE_NE( + activation_type, + activation_map.end(), + platform::errors::InvalidArgument( + "Activation '%s' not found in oneDNN algorithms mapper", + fuse_activation)); + + post_ops.append_eltwise( + activation_scale, activation_type->second, fuse_alpha, fuse_beta); + } + } + // Correct output scale, to take into account scaling of input and weights // Since the data that comes out of input and weight multiplication is // scaled with its own scales, this data needs to be divided by @@ -396,10 +450,76 @@ class FCMKLDNNKernel : public framework::OpKernel { } } + void SetOutMemDescWithUnsqueeze2FuseSupport( + const framework::ExecutionContext& ctx, + phi::DenseTensor* out, + const dnnl::memory::desc& out_md) const { + const std::vector& fused_unsqueeze2_axes = + ctx.Attr>("fused_unsqueeze2_axes"); + const std::vector& op_tz = out_md.dims(); + std::vector unsqueezed_op_tz( + op_tz.size() + fused_unsqueeze2_axes.size(), 0); + + for (const auto& axis : fused_unsqueeze2_axes) { + int positive_axis = axis < 0 ? unsqueezed_op_tz.size() + axis : axis; + unsqueezed_op_tz[positive_axis] = 1; + } + + int j = 0; + for (size_t i = 0; i < unsqueezed_op_tz.size(); ++i) { + if (unsqueezed_op_tz[i] == 0) { + unsqueezed_op_tz[i] = op_tz[j++]; + } + } + out->set_mem_desc(out_md.reshape(unsqueezed_op_tz)); + out->Resize(phi::make_ddim(unsqueezed_op_tz)); + } + + void SetOutMemDescWithReshape2FuseSupport( + const framework::ExecutionContext& ctx, + phi::DenseTensor* out, + const dnnl::memory::desc& out_md) const { + std::vector fused_reshape2_shape( + ctx.Attr>("fused_reshape2_shape").begin(), + ctx.Attr>("fused_reshape2_shape").end()); + + const int out_shape_numel = out->numel(); + const int new_shape_numel = std::accumulate(fused_reshape2_shape.begin(), + fused_reshape2_shape.end(), + 1, + std::multiplies()); + + for (size_t i = 0; i < fused_reshape2_shape.size(); ++i) { + if (fused_reshape2_shape[i] == -1) { + fused_reshape2_shape[i] = -out_shape_numel / new_shape_numel; + break; + } + } + + out->set_mem_desc(out_md.reshape(fused_reshape2_shape)); + out->Resize(phi::make_ddim(fused_reshape2_shape)); + } + + void SetOutMemDescWithLogicalLayoutFusesSupport( + const framework::ExecutionContext& ctx, + phi::DenseTensor* out, + const dnnl::memory::desc& out_md) const { + if (ctx.HasAttr("fused_unsqueeze2_axes")) { + SetOutMemDescWithUnsqueeze2FuseSupport(ctx, out, out_md); + } else if (ctx.HasAttr("fused_reshape2_shape")) { + SetOutMemDescWithReshape2FuseSupport(ctx, out, out_md); + } else if (ctx.HasAttr("fused_squeeze2_axes")) { + out->set_mem_desc(out_md); + out->Resize(phi::make_ddim(out_md.dims())); + } else { + out->set_mem_desc(out_md); + } + } + template void RunKernel(const framework::ExecutionContext& ctx) const { const auto& dev_ctx = ctx.template device_context(); - const auto& mkldnn_engine = dev_ctx.GetEngine(); + const auto& onednn_engine = dev_ctx.GetEngine(); const auto* x = ctx.Input("Input"); const auto* weights = ctx.Input("W"); @@ -433,7 +553,7 @@ class FCMKLDNNKernel : public framework::OpKernel { inner_product_cache->inner_product_p); src_memory_p = std::make_shared(inner_product_cache->src_mem); - PrepareSrcMem(fc_p, src_memory_p, x, mkldnn_engine); + PrepareSrcMem(fc_p, src_memory_p, x, onednn_engine); weights_memory_p = std::make_shared(inner_product_cache->weights_mem); @@ -463,7 +583,7 @@ class FCMKLDNNKernel : public framework::OpKernel { bias, out, in_col_dims, - mkldnn_engine, + onednn_engine, ctx.GetPlace()); src_memory_p = handler.AcquireSrcMemoryWithReorder(x); @@ -504,7 +624,7 @@ class FCMKLDNNKernel : public framework::OpKernel { dev_ctx.SetBlob(cache_key, ip_cache); } - platform::SetOutMemDescWithLogicalLayoutFusesSupport( + SetOutMemDescWithLogicalLayoutFusesSupport( ctx, out, dst_memory_p->get_desc().reshape(phi::vectorize(out->dims()))); @@ -541,7 +661,7 @@ namespace ops = paddle::operators; REGISTER_OP_KERNEL(fc, MKLDNN, - ::paddle::platform::CPUPlace, + ::phi::CPUPlace, ops::FCMKLDNNKernel, ops::FCMKLDNNKernel, ops::FCMKLDNNKernel, diff --git a/paddle/fluid/operators/mkldnn/interpolate_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/interpolate_mkldnn_op.cc index a4f97c865414f24222bdc315e48cbb5faba989aa..908d0b74816d2f7e3de799370e01e9180af86ce9 100644 --- a/paddle/fluid/operators/mkldnn/interpolate_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/interpolate_mkldnn_op.cc @@ -12,9 +12,8 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/framework/data_layout_transform.h" #include "paddle/fluid/operators/interpolate_op.h" -#include "paddle/fluid/platform/mkldnn_reuse.h" +#include "paddle/phi/backends/onednn/onednn_reuse.h" namespace paddle { namespace operators { @@ -25,6 +24,7 @@ using dnnl::reorder; using dnnl::resampling_forward; using dnnl::stream; using phi::DataLayout; +using OneDNNMemoryFormat = dnnl::memory::format_tag; template class InterpolateOneDNNHandler @@ -131,7 +131,7 @@ class InterpolateOneDNNKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { const auto& dev_ctx = ctx.template device_context(); - const auto& mkldnn_engine = dev_ctx.GetEngine(); + const auto& onednn_engine = dev_ctx.GetEngine(); const auto* x = ctx.Input("X"); auto* out = ctx.Output("Out"); @@ -146,7 +146,7 @@ class InterpolateOneDNNKernel : public framework::OpKernel { out->Resize(dim_out); InterpolateOneDNNHandler handler( - algo, mkldnn_engine, ctx.GetPlace(), x, out); + algo, onednn_engine, ctx.GetPlace(), x, out); auto src_memory_p = handler.AcquireSrcMemory(x); auto dst_memory_p = handler.AcquireDstMemory(out); @@ -170,11 +170,11 @@ namespace ops = paddle::operators; REGISTER_OP_KERNEL(nearest_interp, MKLDNN, - ::paddle::platform::CPUPlace, + ::phi::CPUPlace, ops::InterpolateOneDNNKernel, ops::InterpolateOneDNNKernel, ops::InterpolateOneDNNKernel); REGISTER_OP_KERNEL(bilinear_interp, MKLDNN, - ::paddle::platform::CPUPlace, + ::phi::CPUPlace, ops::InterpolateOneDNNKernel); diff --git a/paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc index df3804ab766ddc617fe65d7fb41d7a3e104a0a44..c41a2aabeef50df1816b4f3d6b7a9926a96339c5 100644 --- a/paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc @@ -13,7 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/platform/mkldnn_reuse.h" + +#include "paddle/phi/backends/onednn/onednn_reuse.h" #include "paddle/phi/common/data_type.h" namespace paddle { @@ -99,7 +100,7 @@ class LayerNormMKLDNNOpKernel : public paddle::framework::OpKernel { const bool is_test = ctx.Attr("is_test"); auto& dev_ctx = ctx.template device_context(); - const auto& mkldnn_engine = dev_ctx.GetEngine(); + const auto& onednn_engine = dev_ctx.GetEngine(); auto src_tz = phi::vectorize(x->dims()); PADDLE_ENFORCE_EQ(begin_norm_axis, @@ -117,7 +118,7 @@ class LayerNormMKLDNNOpKernel : public paddle::framework::OpKernel { } LayerNormOneDNNHandler handler( - src_tz, epsilon, flags, is_test, x, mkldnn_engine, ctx.GetPlace()); + src_tz, epsilon, flags, is_test, x, onednn_engine, ctx.GetPlace()); auto src_memory = handler.AcquireSrcMemory(x); auto dst_memory = handler.AcquireDstMemory(out); @@ -159,6 +160,6 @@ class LayerNormMKLDNNOpKernel : public paddle::framework::OpKernel { namespace ops = paddle::operators; REGISTER_OP_KERNEL(layer_norm, MKLDNN, - ::paddle::platform::CPUPlace, + ::phi::CPUPlace, ops::LayerNormMKLDNNOpKernel, ops::LayerNormMKLDNNOpKernel); diff --git a/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc index e10e868c4684104b48bf4aa1c7b45bd0141ad469..74c13c07275056af8564c3ee77f111f2c89a7fd8 100644 --- a/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc @@ -12,7 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/platform/mkldnn_reuse.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/phi/backends/onednn/onednn_reuse.h" namespace paddle { namespace operators { @@ -25,13 +26,13 @@ class LRNOneDNNHandler OneDNNHandlerNoCachingT { public: LRNOneDNNHandler(const framework::ExecutionContext& ctx, - const dnnl::engine mkldnn_engine, + const dnnl::engine onednn_engine, platform::Place cpu_place, const phi::DenseTensor* input) : phi::funcs:: OneDNNHandlerNoCachingT( - mkldnn_engine, cpu_place) { + onednn_engine, cpu_place) { const int n = ctx.Attr("n"); // MKL-DNN implements LRN in a caffe way: // http://caffe.berkeleyvision.org/tutorial/layers/lrn.html @@ -56,14 +57,14 @@ class LRNOneDNNHandler } LRNOneDNNHandler(const framework::ExecutionContext& ctx, - const dnnl::engine mkldnn_engine, + const dnnl::engine onednn_engine, platform::Place cpu_place, const phi::DenseTensor* in_x, const phi::DenseTensor* out_grad, phi::DenseTensor* in_x_grad) : phi::funcs:: OneDNNHandlerNoCachingT( - mkldnn_engine, cpu_place) { + onednn_engine, cpu_place) { PADDLE_ENFORCE_EQ( ctx.Attr("is_test"), false, @@ -125,13 +126,13 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel { paddle::platform::errors::PreconditionNotMet( "Operator DNNL LRN must use CPUPlace")); auto& dev_ctx = ctx.template device_context(); - const auto& mkldnn_engine = dev_ctx.GetEngine(); + const auto& onednn_engine = dev_ctx.GetEngine(); auto x = ctx.Input("X"); auto out = ctx.Output("Out"); auto mid = ctx.Output("MidOut"); - LRNOneDNNHandler handler(ctx, mkldnn_engine, ctx.GetPlace(), x); + LRNOneDNNHandler handler(ctx, onednn_engine, ctx.GetPlace(), x); auto src_memory = handler.AcquireSrcMemory(x); auto dst_memory = handler.AcquireDstMemory(out); @@ -179,10 +180,10 @@ class LRNMKLDNNGradOpKernel : public paddle::framework::OpKernel { auto in_x_grad = ctx.Output(framework::GradVarName("X")); auto& dev_ctx = ctx.template device_context(); - const auto& mkldnn_engine = dev_ctx.GetEngine(); + const auto& onednn_engine = dev_ctx.GetEngine(); LRNOneDNNHandler handler( - ctx, mkldnn_engine, ctx.GetPlace(), in_x, out_grad, in_x_grad); + ctx, onednn_engine, ctx.GetPlace(), in_x, out_grad, in_x_grad); auto src_memory = handler.AcquireSrcMemory(in_x); auto workspace = handler.AcquireBackwardWorkspaceMemory(mid); @@ -207,11 +208,8 @@ class LRNMKLDNNGradOpKernel : public paddle::framework::OpKernel { namespace ops = paddle::operators; -REGISTER_OP_KERNEL(lrn, - MKLDNN, - paddle::platform::CPUPlace, - ops::LRNMKLDNNOpKernel); +REGISTER_OP_KERNEL(lrn, MKLDNN, phi::CPUPlace, ops::LRNMKLDNNOpKernel); REGISTER_OP_KERNEL(lrn_grad, MKLDNN, - paddle::platform::CPUPlace, + phi::CPUPlace, ops::LRNMKLDNNGradOpKernel); diff --git a/paddle/fluid/operators/mkldnn/matmul_v2_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/matmul_v2_mkldnn_op.cc index c2556b6bfc41dfa4f8efbbd17edd717ea13a5151..7bf66bae93cc3e0659dce090415bac35c357e462 100644 --- a/paddle/fluid/operators/mkldnn/matmul_v2_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/matmul_v2_mkldnn_op.cc @@ -14,14 +14,13 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor.h" -#include "paddle/fluid/platform/mkldnn_reuse.h" +#include "paddle/phi/backends/onednn/onednn_reuse.h" #include "paddle/phi/kernels/funcs/blas/blas.h" namespace { using dnnl::memory; using paddle::framework::ExecutionContext; using paddle::framework::GradVarName; -using paddle::platform::MatMulV2MKLDNNHandler; using phi::OneDNNContext; using phi::vectorize; using phi::funcs::OneDNNGetDataType; @@ -82,6 +81,239 @@ phi::DDim GetDimForInput(const ExecutionContext &ctx, std::string input_name) { return input_dims; } +template +class MatMulV2MKLDNNHandler + : public phi::funcs::OneDNNHandlerNoCachingT { + public: + MatMulV2MKLDNNHandler(const ExecutionContext &ctx, + const dnnl::engine engine, + paddle::platform::Place cpu_place, + const std::vector &x_org_dims, + bool trans_x, + const std::vector &y_org_dims, + bool trans_y, + bool is_output_fused, + const std::vector &x_strides_override, + const std::vector &y_strides_override) + : phi::funcs::OneDNNHandlerNoCachingT(engine, + cpu_place) { + // M X K * K X N + std::vector x_dims(x_org_dims); + std::vector y_dims(y_org_dims); + + const int MB_idx = x_dims.size() - 3; + const int H_idx = x_dims.size() - 2; + const int W_idx = x_dims.size() - 1; + + if (trans_x) std::swap(x_dims[H_idx], x_dims[W_idx]); + if (trans_y) std::swap(y_dims[H_idx], y_dims[W_idx]); + + const memory::dim M = x_dims[H_idx]; + const memory::dim K = x_dims[W_idx]; + const memory::dim N = y_dims[W_idx]; + + std::vector x_strides(x_dims.size() - 3, 1); + std::vector y_strides(x_dims.size() - 3, 1); + std::vector out_strides(x_dims.size() - 3, 1); + std::vector out_ddims(x_dims.size() - 3, 1); + + x_strides.reserve(x_dims.size()); + y_strides.reserve(x_dims.size()); + out_strides.reserve(x_dims.size()); + + if (!x_strides_override.empty()) { + x_strides = x_strides_override; + } else { + if (!trans_x) { + x_strides.insert(x_strides.end(), {M * K, K, 1}); + } else { + x_strides.insert(x_strides.end(), {M * K, 1, M}); + } + } + + if (!y_strides_override.empty()) { + y_strides = y_strides_override; + } else { + if (!trans_y) { + y_strides.insert(y_strides.end(), {N * K, N, 1}); + } else { + y_strides.insert(y_strides.end(), {N * K, 1, K}); + } + } + + out_strides.insert(out_strides.end(), {M * N, N, 1}); + out_ddims.insert(out_ddims.end(), + {std::max(x_dims[MB_idx], y_dims[MB_idx]), M, N}); + + for (int i = x_dims.size() - 4; i >= 0; --i) { + out_ddims[i] = std::max(x_dims[i], y_dims[i]); + if (x_strides_override.empty()) { + x_strides[i] = x_dims[i + 1] * x_strides[i + 1]; + } + if (y_strides_override.empty()) { + y_strides[i] = y_dims[i + 1] * y_strides[i + 1]; + } + out_strides[i] = out_ddims[i + 1] * out_strides[i + 1]; + } + + // TODO(jczaja): Why not for int8?? + if (!phi::funcs::is_int8() && is_output_fused) { + out_strides = FakeTransposeStrides(out_ddims); + } + + auto x_md = + memory::desc(x_dims, phi::funcs::OneDNNGetDataType(), x_strides); + auto y_md = + memory::desc(y_dims, phi::funcs::OneDNNGetDataType(), y_strides); + auto out_md = memory::desc( + out_ddims, phi::funcs::OneDNNGetDataType(), out_strides); + + const dnnl::primitive_attr matmul_attrs = CreateMatmulAttrs(ctx); + + this->AcquireForwardPrimitiveDescriptor(matmul_attrs, x_md, y_md, out_md); + } + + void AppendActivation(const ExecutionContext &ctx, + dnnl::post_ops &post_ops, // NOLINT + float activation_scale = 1.0f) { + const auto invalid_attribute = + ctx.HasAttr("fuse_activation") + ? ctx.Attr("fuse_activation").empty() + : true; + if (invalid_attribute) return; + + const auto fuse_activation = ctx.Attr("fuse_activation"); + const auto fuse_alpha = + ctx.HasAttr("fuse_alpha") ? ctx.Attr("fuse_alpha") : 0.0f; + const auto fuse_beta = + ctx.HasAttr("fuse_beta") ? ctx.Attr("fuse_beta") : 0.0f; + + if (fuse_activation == "hard_sigmoid") { + post_ops.append_eltwise(activation_scale, + dnnl::algorithm::eltwise_linear, + fuse_alpha, + fuse_beta); + post_ops.append_eltwise( + activation_scale, dnnl::algorithm::eltwise_clip, 0.0f, 1.0f); + } else { + const std::unordered_map activation_map = { + {"abs", dnnl::algorithm::eltwise_abs}, + {"clip", dnnl::algorithm::eltwise_clip}, + {"gelu", dnnl::algorithm::eltwise_gelu_erf}, + {"gelu_erf", dnnl::algorithm::eltwise_gelu_erf}, + {"gelu_tanh", dnnl::algorithm::eltwise_gelu_tanh}, + {"hard_swish", dnnl::algorithm::eltwise_hardswish}, + {"leaky_relu", dnnl::algorithm::eltwise_relu}, + {"mish", dnnl::algorithm::eltwise_mish}, + {"relu", dnnl::algorithm::eltwise_relu}, + {"relu6", dnnl::algorithm::eltwise_bounded_relu}, + {"sigmoid", dnnl::algorithm::eltwise_logistic}, + {"sqrt", dnnl::algorithm::eltwise_sqrt}, + {"swish", dnnl::algorithm::eltwise_swish}, + {"tanh", dnnl::algorithm::eltwise_tanh}}; + + const auto &activation_type = activation_map.find(fuse_activation); + + PADDLE_ENFORCE_NE( + activation_type, + activation_map.end(), + phi::errors::InvalidArgument( + "Activation '%s' not found in oneDNN algorithms mapper", + fuse_activation)); + + post_ops.append_eltwise( + activation_scale, activation_type->second, fuse_alpha, fuse_beta); + } + } + + float ComputeOutputScale(const ExecutionContext &ctx) { + float alpha = ctx.HasAttr("alpha") ? ctx.Attr("alpha") : 1.0f; + if (ctx.HasAttr("Scale_x") && ctx.HasAttr("Scale_y") && + ctx.HasAttr("Scale_out")) { + float scale_x = ctx.Attr("Scale_x"); + float scale_y = ctx.Attr("Scale_y"); + bool force_fp32_out = ctx.HasAttr("force_fp32_output") + ? ctx.Attr("force_fp32_output") + : false; + float scale_out = force_fp32_out ? 1.f : ctx.Attr("Scale_out"); + alpha *= scale_out / (scale_x * scale_y); + } + return alpha; + } + + dnnl::primitive_attr CreateMatmulAttrs(const ExecutionContext &ctx) { + dnnl::primitive_attr matmul_attrs; + dnnl::post_ops post_operations; + + float scale_out = ComputeOutputScale(ctx); + if (scale_out != 1.0f) { + matmul_attrs.set_output_scales(0, {scale_out}); + } + + if (ctx.HasInput("ResidualData")) { + auto *residual_data = ctx.Input("ResidualData"); + auto residual_data_tz = phi::vectorize(residual_data->dims()); + auto residual_data_md = memory::desc(residual_data_tz, + phi::funcs::OneDNNGetDataType(), + dnnl::memory::format_tag::any); + post_operations.append_binary(dnnl::algorithm::binary_add, + residual_data_md); + if (ctx.HasAttr("Scale_in_eltwise")) { + float sum_scale = scale_out / ctx.Attr("Scale_in_eltwise"); + post_operations.append_sum(sum_scale); + } + } + + AppendActivation(ctx, post_operations); + + if (ctx.HasAttr("fused_output_scale")) { + float scale_alpha = ctx.Attr("fused_output_scale"); + post_operations.append_eltwise( + 1.0, dnnl::algorithm::eltwise_linear, scale_alpha, 0.0f); + } + + matmul_attrs.set_post_ops(post_operations); + return matmul_attrs; + } + + std::vector FakeTransposeStrides( + const std::vector &matmul_out_dims) const { + // fuse matmul_v2 + transpose + reshape guarantees that output is 4D and + // transpose axis are: {0, 2, 1, 3} + std::vector transpose_axis = {0, 2, 1, 3}; + std::vector fake_strides(transpose_axis.size()); + int ndims = static_cast(transpose_axis.size()); + + int total_stride = 1; + + for (int i = ndims - 1; i >= 0; --i) { + fake_strides[transpose_axis[i]] = total_stride; + total_stride *= matmul_out_dims[transpose_axis[i]]; + } + + return fake_strides; + } + + std::shared_ptr AcquireWeightsMemory(const phi::DenseTensor *input) { + const YT *input_data = input->data(); + return this->AcquireMemoryFromPrimitive( + this->fwd_pd_->weights_desc(), + phi::funcs::to_void_cast(input_data)); + } + + std::shared_ptr AcquireDstMemory(phi::DenseTensor *output) { + // We cannot use base AcquireDstMemory as it makes an allocation request + // base on DST memory primitive size. This is fine in general, but in MatMul + // we have primitive that covers only one batch of Data and then shift + // pointer for every new batch. Hence phi::DenseTensor size is bigger that + // dst memory primitive size. So would we request less memory that is there + // and it triggers an assertion. So as there is no 'any' format here we can + // leave default size of phi::DenseTensor as computed in ComputeInferShape + OT *ptr = output->mutable_data(this->place_); + return this->AcquireMemoryFromPrimitive(this->fwd_pd_->dst_desc(), ptr); + } +}; + template class MatMulMKLDNNHandler : public phi::funcs::OneDNNHandlerNoCachingT { @@ -696,7 +928,7 @@ class MatMulGradMKLDNNKernel : public paddle::framework::OpKernel { REGISTER_OP_KERNEL(matmul, MKLDNN, - ::paddle::platform::CPUPlace, + ::phi::CPUPlace, MatMulMKLDNNKernel, MatMulMKLDNNKernel, MatMulMKLDNNKernel, @@ -704,6 +936,6 @@ REGISTER_OP_KERNEL(matmul, REGISTER_OP_KERNEL(matmul_grad, MKLDNN, - ::paddle::platform::CPUPlace, + ::phi::CPUPlace, MatMulGradMKLDNNKernel, MatMulGradMKLDNNKernel); diff --git a/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc index c23f247c9d212963226b07b08e0abfb0fc497924..d78b8b6e1887514aa552433750d66ea2078e4944 100644 --- a/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc @@ -14,10 +14,10 @@ limitations under the License. */ #include "paddle/fluid/operators/quantize_op.h" -#include "paddle/fluid/framework/data_layout_transform.h" +#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/platform/mkldnn_helper.h" -#include "paddle/fluid/platform/mkldnn_reuse.h" +#include "paddle/phi/backends/onednn/onednn_reuse.h" namespace paddle { namespace operators { @@ -106,5 +106,5 @@ namespace ops = paddle::operators; REGISTER_OP_KERNEL(quantize, MKLDNN, - ::paddle::platform::CPUPlace, + ::phi::CPUPlace, ops::QuantOpKernel); diff --git a/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc index a9408ad38e3a192e180ed46b4cbee4064f898a44..0d6708988c75f4c1c516184289a20f9dfd26b1f7 100644 --- a/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc @@ -14,7 +14,6 @@ limitations under the License. */ #include // NOLINT #include "dnnl.hpp" // NOLINT -#include "paddle/fluid/framework/data_layout_transform.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/operators/requantize_op.h" #include "paddle/phi/backends/onednn/onednn_helper.h" @@ -115,7 +114,7 @@ namespace ops = paddle::operators; REGISTER_OP_KERNEL(requantize, MKLDNN, - ::paddle::platform::CPUPlace, + ::phi::CPUPlace, ops::ReQuantOpKernel, ops::ReQuantOpKernel, ops::ReQuantOpKernel); diff --git a/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc index 5a540b802e60bdeaa3d25d368835e265976dc9e7..6a6c4df4a5da491368c9bce6bed97dba94b8c62c 100644 --- a/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc @@ -12,9 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/flatten_op.h" #include "paddle/fluid/operators/squeeze_op.h" -#include "paddle/fluid/platform/mkldnn_reuse.h" +#include "paddle/phi/backends/onednn/onednn_reuse.h" namespace { enum class ReshapeKernelOpName { @@ -357,7 +358,7 @@ namespace ops = paddle::operators; REGISTER_OP_KERNEL( squeeze, MKLDNN, - paddle::platform::CPUPlace, + phi::CPUPlace, ops::ReshapeMKLDNNKernel, ops::ReshapeMKLDNNKernel); @@ -365,7 +366,7 @@ REGISTER_OP_KERNEL( REGISTER_OP_KERNEL( squeeze_grad, MKLDNN, - paddle::platform::CPUPlace, + phi::CPUPlace, ops::ReshapeGradMKLDNNKernel, ops::ReshapeGradMKLDNNKernel); @@ -373,7 +374,7 @@ REGISTER_OP_KERNEL( REGISTER_OP_KERNEL( reshape, MKLDNN, - paddle::platform::CPUPlace, + phi::CPUPlace, ops::ReshapeMKLDNNKernel, ops::ReshapeMKLDNNKernel); @@ -381,7 +382,7 @@ REGISTER_OP_KERNEL( REGISTER_OP_KERNEL( reshape_grad, MKLDNN, - paddle::platform::CPUPlace, + phi::CPUPlace, ops::ReshapeGradMKLDNNKernel, ops::ReshapeGradMKLDNNKernel); @@ -389,7 +390,7 @@ REGISTER_OP_KERNEL( REGISTER_OP_KERNEL( reshape2_grad, MKLDNN, - paddle::platform::CPUPlace, + phi::CPUPlace, ops::ReshapeGradMKLDNNKernel, ops::ReshapeGradMKLDNNKernel); @@ -397,7 +398,7 @@ REGISTER_OP_KERNEL( REGISTER_OP_KERNEL( flatten, MKLDNN, - paddle::platform::CPUPlace, + phi::CPUPlace, ops::ReshapeMKLDNNKernel, ops::ReshapeMKLDNNKernel); @@ -405,7 +406,7 @@ REGISTER_OP_KERNEL( REGISTER_OP_KERNEL( flatten_grad, MKLDNN, - paddle::platform::CPUPlace, + phi::CPUPlace, ops::ReshapeGradMKLDNNKernel, ops::ReshapeGradMKLDNNKernel); @@ -413,7 +414,7 @@ REGISTER_OP_KERNEL( REGISTER_OP_KERNEL( flatten2, MKLDNN, - paddle::platform::CPUPlace, + phi::CPUPlace, ops::ReshapeMKLDNNKernel, ops::ReshapeMKLDNNKernel); @@ -421,7 +422,7 @@ REGISTER_OP_KERNEL( REGISTER_OP_KERNEL( flatten2_grad, MKLDNN, - paddle::platform::CPUPlace, + phi::CPUPlace, ops::ReshapeGradMKLDNNKernel, ops::ReshapeGradMKLDNNKernel); diff --git a/paddle/fluid/operators/mkldnn/shuffle_channel_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/shuffle_channel_mkldnn_op.cc index 36ea07dfdd710a9b154089b437876c0a2c4e8178..783ef8bc50b436195e682047fc755ea9244accb3 100644 --- a/paddle/fluid/operators/mkldnn/shuffle_channel_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/shuffle_channel_mkldnn_op.cc @@ -12,7 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/platform/mkldnn_reuse.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/phi/backends/onednn/onednn_reuse.h" namespace paddle { namespace operators { @@ -38,7 +39,7 @@ class ShuffleChannelMKLDNNKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { const auto& dev_ctx = ctx.template device_context(); - const auto& mkldnn_engine = dev_ctx.GetEngine(); + const auto& onednn_engine = dev_ctx.GetEngine(); const auto* x = ctx.Input("X"); auto* out = ctx.Output("Out"); @@ -47,7 +48,7 @@ class ShuffleChannelMKLDNNKernel : public framework::OpKernel { const int group = x->dims()[1] / ctx.Attr("group"); ShuffleChannelMKLDNNHandler handler( - x, group, mkldnn_engine, ctx.GetPlace()); + x, group, onednn_engine, ctx.GetPlace()); auto src_memory_p = handler.AcquireSrcMemory(x); auto dst_memory_p = handler.AcquireDstMemory(out); @@ -69,6 +70,6 @@ class ShuffleChannelMKLDNNKernel : public framework::OpKernel { namespace ops = paddle::operators; REGISTER_OP_KERNEL(shuffle_channel, MKLDNN, - paddle::platform::CPUPlace, + phi::CPUPlace, ops::ShuffleChannelMKLDNNKernel, ops::ShuffleChannelMKLDNNKernel); diff --git a/paddle/fluid/operators/mkldnn/test_mkldnn_caching.cc b/paddle/fluid/operators/mkldnn/test_mkldnn_caching.cc index cbf0b918e6d727706a403d290c5def54e5dc8704..24be9e518d37a5d6ff201ef1d65a0ba7ec45b03a 100644 --- a/paddle/fluid/operators/mkldnn/test_mkldnn_caching.cc +++ b/paddle/fluid/operators/mkldnn/test_mkldnn_caching.cc @@ -22,9 +22,8 @@ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/scope.h" -#include "paddle/fluid/platform/device_context.h" -#include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/place.h" +#include "paddle/phi/common/place.h" +#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_registry.h" USE_OP_ITSELF(elementwise_add); @@ -51,7 +50,7 @@ class CacheTester { CacheTester() { // Clear oneDNN cache auto &pool = platform::DeviceContextPool::Instance(); - platform::CPUPlace place; + phi::CPUPlace place; onednn_dev_ctx_ = dynamic_cast(pool.Get(place)); onednn_dev_ctx_->ResetBlobMap(nullptr); } @@ -140,7 +139,7 @@ void RunOperator(const platform::Place &place, TEST(test_conv2d_reuse_cache, cpu_place) { framework::DDim dims({1, 16, 32, 64}); - platform::CPUPlace p; + phi::CPUPlace p; CacheTester ct; RunOperator(p, "conv2d", dims, "input_signal"); RunOperator(p, "conv2d", dims, "input_signal"); @@ -152,7 +151,7 @@ TEST(test_conv2d_reuse_cache, cpu_place) { TEST(test_conv2d_noreuse_cache, cpu_place) { framework::DDim dims({1, 16, 32, 64}); - platform::CPUPlace p; + phi::CPUPlace p; CacheTester ct; RunOperator(p, "conv2d", dims, "input_signal"); RunOperator(p, "conv2d", dims, "input_signal2"); diff --git a/paddle/fluid/operators/mkldnn/test_mkldnn_op_inplace.cc b/paddle/fluid/operators/mkldnn/test_mkldnn_op_inplace.cc index 2c8ef7f0981dd1d107a402e497d8edd7a7b787d7..4beb314fe6b76b3db9ebb99aa5cc631df5b7734f 100644 --- a/paddle/fluid/operators/mkldnn/test_mkldnn_op_inplace.cc +++ b/paddle/fluid/operators/mkldnn/test_mkldnn_op_inplace.cc @@ -22,9 +22,8 @@ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/scope.h" -#include "paddle/fluid/platform/device_context.h" -#include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/place.h" +#include "paddle/phi/common/place.h" +#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_registry.h" USE_OP_ITSELF(elementwise_add); @@ -137,13 +136,13 @@ bool TestMain(const platform::Place &place, TEST(test_softmax_inplace, cpu_place) { framework::DDim dims({32, 64}); - platform::CPUPlace p; + phi::CPUPlace p; ASSERT_TRUE(TestMain(p, "softmax", dims, 1)); } TEST(test_relu_inplace, cpu_place) { framework::DDim dims({1, 12, 20, 20}); - platform::CPUPlace p; + phi::CPUPlace p; ASSERT_TRUE(TestMain(p, "relu", dims, 1)); } diff --git a/paddle/fluid/operators/mkldnn/test_mkldnn_op_nhwc.cc b/paddle/fluid/operators/mkldnn/test_mkldnn_op_nhwc.cc index 6b371e2317c52dd8328ae00ac3feb8c2d6bb3806..456c6831080520173861ac5c33849833c275e4b7 100644 --- a/paddle/fluid/operators/mkldnn/test_mkldnn_op_nhwc.cc +++ b/paddle/fluid/operators/mkldnn/test_mkldnn_op_nhwc.cc @@ -22,9 +22,8 @@ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/scope.h" -#include "paddle/fluid/platform/device_context.h" -#include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/place.h" +#include "paddle/phi/common/place.h" +#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_registry.h" USE_OP_ITSELF(pool2d); @@ -53,7 +52,7 @@ struct InputVars { TEST(test_pool2d_transpose_nhwc, cpu_place) { framework::DDim dims({1, 4, 8, 512}); // NHWC shape framework::DDim expected_dims({1, 7, 512, 3}); // NHWC expected shape - platform::CPUPlace p; + phi::CPUPlace p; framework::Scope scope; InputVars input_name = {"x", scope.Var("x")->GetMutable()}; @@ -109,7 +108,7 @@ TEST(test_pool2d_transpose_nhwc, cpu_place) { TEST(test_pool2d_relu_relu_nhwc, cpu_place) { framework::DDim dims({1, 4, 8, 512}); // NHWC shape framework::DDim expected_dims({1, 512, 3, 7}); // NCHW expected shape - platform::CPUPlace p; + phi::CPUPlace p; framework::Scope scope; InputVars input_name = {"x", scope.Var("x")->GetMutable()}; @@ -172,7 +171,7 @@ TEST(test_pool2d_relu_relu_nhwc, cpu_place) { TEST(test_pool2d_shape_nhwc, cpu_place) { framework::DDim dims({1, 4, 8, 512}); // NHWC shape std::vector expected_dims{1, 3, 7, 512}; // NHWC expected shape - platform::CPUPlace p; + phi::CPUPlace p; framework::Scope scope; InputVars input_name = {"x", scope.Var("x")->GetMutable()}; @@ -227,7 +226,7 @@ TEST(test_pool2d_shape_nhwc, cpu_place) { TEST(test_pool2d_crop_nhwc, cpu_place) { framework::DDim dims({1, 4, 8, 512}); // NHWC shape framework::DDim expected_dims({1, 3, 7, 512}); // NCHW expected shape - platform::CPUPlace p; + phi::CPUPlace p; framework::Scope scope; InputVars input_name = {"x", scope.Var("x")->GetMutable()}; diff --git a/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc index aa8c84879139ac09688f878868285328f58436b4..8b947b4679b0b3e8eae643ad9b425c4ab56ec168 100644 --- a/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc @@ -12,10 +12,9 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/framework/data_layout_transform.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/malloc.h" -#include "paddle/fluid/platform/mkldnn_reuse.h" +#include "paddle/phi/backends/onednn/onednn_reuse.h" namespace paddle { namespace operators { @@ -166,10 +165,10 @@ namespace ops = paddle::operators; REGISTER_OP_KERNEL(transpose, MKLDNN, - ::paddle::platform::CPUPlace, + ::phi::CPUPlace, ops::TransposeMKLDNNOpKernel); REGISTER_OP_KERNEL(transpose_grad, MKLDNN, - ::paddle::platform::CPUPlace, + ::phi::CPUPlace, ops::TransposeMKLDNNGradOpKernel); diff --git a/paddle/fluid/operators/pool_op.h b/paddle/fluid/operators/pool_op.h index fd2c0ce15b4615bf789fd6a69d55c990da5cb29e..9bb7572c103aeebf595781f5334ca8153ae6d4cd 100644 --- a/paddle/fluid/operators/pool_op.h +++ b/paddle/fluid/operators/pool_op.h @@ -12,9 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -// NOTE(Ruibiao): Difficult to remove code from this header file because too -// many files rely on it through "mkldnn_reuse.h" - #pragma once #include "paddle/fluid/framework/op_registry.h" diff --git a/paddle/fluid/platform/mkldnn_helper.h b/paddle/fluid/platform/mkldnn_helper.h index a7f15dc1297928529eae7f026b94ac16bcb3662b..86a8a24e84673ca78682527f0e88b743b8120a10 100644 --- a/paddle/fluid/platform/mkldnn_helper.h +++ b/paddle/fluid/platform/mkldnn_helper.h @@ -23,11 +23,10 @@ limitations under the License. */ #include "dnnl.hpp" // NOLINT #include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/platform/place.h" #include "paddle/phi/backends/onednn/onednn_helper.h" +#include "paddle/phi/common/place.h" namespace paddle { #ifdef PADDLE_WITH_MKLDNN -using OneDNNMemoryFormat = dnnl::memory::format_tag; using phi::OneDNNContext; #endif namespace platform { diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h deleted file mode 100644 index 0142fa2afd13de6e5faa3c2b537df05d0a7bd59b..0000000000000000000000000000000000000000 --- a/paddle/fluid/platform/mkldnn_reuse.h +++ /dev/null @@ -1,336 +0,0 @@ -/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ -#pragma once - -#include -#include -#include -#include -#include -#include - -#include "paddle/fluid/framework/data_layout_transform.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/operators/pool_op.h" -#include "paddle/fluid/platform/mkldnn_helper.h" -#include "paddle/fluid/platform/place.h" -#include "paddle/phi/backends/onednn/onednn_reuse.h" - -namespace paddle { -namespace platform { - -using memory = dnnl::memory; - -static void AppendActivation(const framework::ExecutionContext& ctx, - dnnl::post_ops& post_ops, // NOLINT - float activation_scale = 1.0f) { - const auto invalid_attribute = - ctx.HasAttr("fuse_activation") - ? ctx.Attr("fuse_activation").empty() - : true; - if (invalid_attribute) return; - - const auto fuse_activation = ctx.Attr("fuse_activation"); - const auto fuse_alpha = - ctx.HasAttr("fuse_alpha") ? ctx.Attr("fuse_alpha") : 0.0f; - const auto fuse_beta = - ctx.HasAttr("fuse_beta") ? ctx.Attr("fuse_beta") : 0.0f; - - if (fuse_activation == "hard_sigmoid") { - post_ops.append_eltwise(activation_scale, - dnnl::algorithm::eltwise_linear, - fuse_alpha, - fuse_beta); - post_ops.append_eltwise( - activation_scale, dnnl::algorithm::eltwise_clip, 0.0f, 1.0f); - } else { - const std::unordered_map activation_map = { - {"abs", dnnl::algorithm::eltwise_abs}, - {"clip", dnnl::algorithm::eltwise_clip}, - {"gelu", dnnl::algorithm::eltwise_gelu_erf}, - {"gelu_erf", dnnl::algorithm::eltwise_gelu_erf}, - {"gelu_tanh", dnnl::algorithm::eltwise_gelu_tanh}, - {"hard_swish", dnnl::algorithm::eltwise_hardswish}, - {"leaky_relu", dnnl::algorithm::eltwise_relu}, - {"mish", dnnl::algorithm::eltwise_mish}, - {"relu", dnnl::algorithm::eltwise_relu}, - {"relu6", dnnl::algorithm::eltwise_bounded_relu}, - {"sigmoid", dnnl::algorithm::eltwise_logistic}, - {"sqrt", dnnl::algorithm::eltwise_sqrt}, - {"swish", dnnl::algorithm::eltwise_swish}, - {"tanh", dnnl::algorithm::eltwise_tanh}}; - - const auto& activation_type = activation_map.find(fuse_activation); - - PADDLE_ENFORCE_NE( - activation_type, - activation_map.end(), - platform::errors::InvalidArgument( - "Activation '%s' not found in oneDNN algorithms mapper", - fuse_activation)); - - post_ops.append_eltwise( - activation_scale, activation_type->second, fuse_alpha, fuse_beta); - } -} - -static void SetOutMemDescWithUnsqueeze2FuseSupport( - const framework::ExecutionContext& ctx, - phi::DenseTensor* out, - const dnnl::memory::desc& out_md) { - const std::vector& fused_unsqueeze2_axes = - ctx.Attr>("fused_unsqueeze2_axes"); - const std::vector& op_tz = out_md.dims(); - std::vector unsqueezed_op_tz( - op_tz.size() + fused_unsqueeze2_axes.size(), 0); - - for (const auto& axis : fused_unsqueeze2_axes) { - int positive_axis = axis < 0 ? unsqueezed_op_tz.size() + axis : axis; - unsqueezed_op_tz[positive_axis] = 1; - } - - int j = 0; - for (size_t i = 0; i < unsqueezed_op_tz.size(); ++i) { - if (unsqueezed_op_tz[i] == 0) { - unsqueezed_op_tz[i] = op_tz[j++]; - } - } - out->set_mem_desc(out_md.reshape(unsqueezed_op_tz)); - out->Resize(phi::make_ddim(unsqueezed_op_tz)); -} - -static void SetOutMemDescWithReshape2FuseSupport( - const framework::ExecutionContext& ctx, - phi::DenseTensor* out, - const dnnl::memory::desc& out_md) { - std::vector fused_reshape2_shape( - ctx.Attr>("fused_reshape2_shape").begin(), - ctx.Attr>("fused_reshape2_shape").end()); - - const int out_shape_numel = out->numel(); - const int new_shape_numel = std::accumulate(fused_reshape2_shape.begin(), - fused_reshape2_shape.end(), - 1, - std::multiplies()); - - for (size_t i = 0; i < fused_reshape2_shape.size(); ++i) { - if (fused_reshape2_shape[i] == -1) { - fused_reshape2_shape[i] = -out_shape_numel / new_shape_numel; - break; - } - } - - out->set_mem_desc(out_md.reshape(fused_reshape2_shape)); - out->Resize(phi::make_ddim(fused_reshape2_shape)); -} - -static void SetOutMemDescWithLogicalLayoutFusesSupport( - const framework::ExecutionContext& ctx, - phi::DenseTensor* out, - const dnnl::memory::desc& out_md) { - if (ctx.HasAttr("fused_unsqueeze2_axes")) { - SetOutMemDescWithUnsqueeze2FuseSupport(ctx, out, out_md); - } else if (ctx.HasAttr("fused_reshape2_shape")) { - SetOutMemDescWithReshape2FuseSupport(ctx, out, out_md); - } else if (ctx.HasAttr("fused_squeeze2_axes")) { - out->set_mem_desc(out_md); - out->Resize(phi::make_ddim(out_md.dims())); - } else { - out->set_mem_desc(out_md); - } -} - -template -class MatMulV2MKLDNNHandler - : public phi::funcs::OneDNNHandlerNoCachingT { - public: - MatMulV2MKLDNNHandler(const framework::ExecutionContext& ctx, - const dnnl::engine engine, - paddle::platform::Place cpu_place, - const std::vector& x_org_dims, - bool trans_x, - const std::vector& y_org_dims, - bool trans_y, - bool is_output_fused, - const std::vector& x_strides_override, - const std::vector& y_strides_override) - : phi::funcs::OneDNNHandlerNoCachingT(engine, - cpu_place) { - // M X K * K X N - std::vector x_dims(x_org_dims); - std::vector y_dims(y_org_dims); - - const int MB_idx = x_dims.size() - 3; - const int H_idx = x_dims.size() - 2; - const int W_idx = x_dims.size() - 1; - - if (trans_x) std::swap(x_dims[H_idx], x_dims[W_idx]); - if (trans_y) std::swap(y_dims[H_idx], y_dims[W_idx]); - - const memory::dim M = x_dims[H_idx]; - const memory::dim K = x_dims[W_idx]; - const memory::dim N = y_dims[W_idx]; - - std::vector x_strides(x_dims.size() - 3, 1); - std::vector y_strides(x_dims.size() - 3, 1); - std::vector out_strides(x_dims.size() - 3, 1); - std::vector out_ddims(x_dims.size() - 3, 1); - - x_strides.reserve(x_dims.size()); - y_strides.reserve(x_dims.size()); - out_strides.reserve(x_dims.size()); - - if (!x_strides_override.empty()) { - x_strides = x_strides_override; - } else { - if (!trans_x) { - x_strides.insert(x_strides.end(), {M * K, K, 1}); - } else { - x_strides.insert(x_strides.end(), {M * K, 1, M}); - } - } - - if (!y_strides_override.empty()) { - y_strides = y_strides_override; - } else { - if (!trans_y) { - y_strides.insert(y_strides.end(), {N * K, N, 1}); - } else { - y_strides.insert(y_strides.end(), {N * K, 1, K}); - } - } - - out_strides.insert(out_strides.end(), {M * N, N, 1}); - out_ddims.insert(out_ddims.end(), - {std::max(x_dims[MB_idx], y_dims[MB_idx]), M, N}); - - for (int i = x_dims.size() - 4; i >= 0; --i) { - out_ddims[i] = std::max(x_dims[i], y_dims[i]); - if (x_strides_override.empty()) { - x_strides[i] = x_dims[i + 1] * x_strides[i + 1]; - } - if (y_strides_override.empty()) { - y_strides[i] = y_dims[i + 1] * y_strides[i + 1]; - } - out_strides[i] = out_ddims[i + 1] * out_strides[i + 1]; - } - - // TODO(jczaja): Why not for int8?? - if (!phi::funcs::is_int8() && is_output_fused) { - out_strides = FakeTransposeStrides(out_ddims); - } - - auto x_md = - memory::desc(x_dims, phi::funcs::OneDNNGetDataType(), x_strides); - auto y_md = - memory::desc(y_dims, phi::funcs::OneDNNGetDataType(), y_strides); - auto out_md = memory::desc( - out_ddims, phi::funcs::OneDNNGetDataType(), out_strides); - - const dnnl::primitive_attr matmul_attrs = CreateMatmulAttrs(ctx); - - this->AcquireForwardPrimitiveDescriptor(matmul_attrs, x_md, y_md, out_md); - } - - float ComputeOutputScale(const framework::ExecutionContext& ctx) { - float alpha = ctx.HasAttr("alpha") ? ctx.Attr("alpha") : 1.0f; - if (ctx.HasAttr("Scale_x") && ctx.HasAttr("Scale_y") && - ctx.HasAttr("Scale_out")) { - float scale_x = ctx.Attr("Scale_x"); - float scale_y = ctx.Attr("Scale_y"); - bool force_fp32_out = ctx.HasAttr("force_fp32_output") - ? ctx.Attr("force_fp32_output") - : false; - float scale_out = force_fp32_out ? 1.f : ctx.Attr("Scale_out"); - alpha *= scale_out / (scale_x * scale_y); - } - return alpha; - } - - dnnl::primitive_attr CreateMatmulAttrs( - const framework::ExecutionContext& ctx) { - dnnl::primitive_attr matmul_attrs; - dnnl::post_ops post_operations; - - float scale_out = ComputeOutputScale(ctx); - if (scale_out != 1.0f) { - matmul_attrs.set_output_scales(0, {scale_out}); - } - - if (ctx.HasInput("ResidualData")) { - auto* residual_data = ctx.Input("ResidualData"); - auto residual_data_tz = phi::vectorize(residual_data->dims()); - auto residual_data_md = memory::desc(residual_data_tz, - phi::funcs::OneDNNGetDataType(), - dnnl::memory::format_tag::any); - post_operations.append_binary(dnnl::algorithm::binary_add, - residual_data_md); - if (ctx.HasAttr("Scale_in_eltwise")) { - float sum_scale = scale_out / ctx.Attr("Scale_in_eltwise"); - post_operations.append_sum(sum_scale); - } - } - - AppendActivation(ctx, post_operations); - - if (ctx.HasAttr("fused_output_scale")) { - float scale_alpha = ctx.Attr("fused_output_scale"); - post_operations.append_eltwise( - 1.0, dnnl::algorithm::eltwise_linear, scale_alpha, 0.0f); - } - - matmul_attrs.set_post_ops(post_operations); - return matmul_attrs; - } - - std::vector FakeTransposeStrides( - const std::vector& matmul_out_dims) const { - // fuse matmul_v2 + transpose + reshape guarantees that output is 4D and - // transpose axis are: {0, 2, 1, 3} - std::vector transpose_axis = {0, 2, 1, 3}; - std::vector fake_strides(transpose_axis.size()); - int ndims = static_cast(transpose_axis.size()); - - int total_stride = 1; - - for (int i = ndims - 1; i >= 0; --i) { - fake_strides[transpose_axis[i]] = total_stride; - total_stride *= matmul_out_dims[transpose_axis[i]]; - } - - return fake_strides; - } - - std::shared_ptr AcquireWeightsMemory(const phi::DenseTensor* input) { - const YT* input_data = input->data(); - return this->AcquireMemoryFromPrimitive( - this->fwd_pd_->weights_desc(), - phi::funcs::to_void_cast(input_data)); - } - - std::shared_ptr AcquireDstMemory(phi::DenseTensor* output) { - // We cannot use base AcquireDstMemory as it makes an allocation request - // base on DST memory primitive size. This is fine in general, but in MatMul - // we have primitive that covers only one batch of Data and then shift - // pointer for every new batch. Hence phi::DenseTensor size is bigger that - // dst memory primitive size. So would we request less memory that is there - // and it triggers an assertion. So as there is no 'any' format here we can - // leave default size of phi::DenseTensor as computed in ComputeInferShape - OT* ptr = output->mutable_data(this->place_); - return this->AcquireMemoryFromPrimitive(this->fwd_pd_->dst_desc(), ptr); - } -}; - -} // namespace platform -} // namespace paddle diff --git a/paddle/phi/kernels/onednn/conv_function.h b/paddle/phi/kernels/onednn/conv_function.h index 4b3c4d58895cc0b59d98f84495a13693667010d3..eaa2f41d64e54088bf4e0818bb070b2de7f71c7d 100644 --- a/paddle/phi/kernels/onednn/conv_function.h +++ b/paddle/phi/kernels/onednn/conv_function.h @@ -178,7 +178,7 @@ void ComputeINT8(const OneDNNContext& dev_ctx, const std::string& unique_name = dev_ctx.GetInputsName("Input")[0] + dev_ctx.GetInputsName("Filter")[0]; PD_VISIT_FLOAT_AND_INT8_TYPES( - filter->dtype(), "ConvMKLDNNHandlerT", ([&] { + filter->dtype(), "ConvOneDNNHandlerT", ([&] { onednn::ConvOneDNNHandlerT handler(dev_ctx, onednn_engine, dev_ctx.GetPlace(), diff --git a/paddle/phi/kernels/onednn/conv_handler.h b/paddle/phi/kernels/onednn/conv_handler.h index 0e99113594cbaec0420e088bb6abe27b4a3866d3..113d54ce57f2fbc1aa1d16a463bec7c654fe2eb6 100644 --- a/paddle/phi/kernels/onednn/conv_handler.h +++ b/paddle/phi/kernels/onednn/conv_handler.h @@ -40,7 +40,7 @@ class ConvOneDNNHandlerT dnnl::convolution_backward_weights> { public: ConvOneDNNHandlerT(const OneDNNContext& dev_ctx, - const dnnl::engine mkldnn_engine, + const dnnl::engine onednn_engine, Place cpu_place, const phi::DenseTensor* input, const phi::DenseTensor* filter, @@ -63,7 +63,7 @@ class ConvOneDNNHandlerT dnnl::convolution_backward_data, dnnl::convolution_backward_weights>( dev_ctx, - mkldnn_engine, + onednn_engine, cpu_place, funcs::CreateKey( dev_ctx, phi::vectorize(input->dims()), unique_name)) {