未验证 提交 4d5a5533 编写于 作者: S Sławomir Siwek 提交者: GitHub

[PHI decoupling] Remove fluid imports from MKLDNN code (#48981)

* fix wrong handler name

* mkldnn_engine -> onednn_engine

* remove fluid/errors.h imports

* remove fluid/enforce.h imports

* remove note and unnecessary import

* remove fluid/pretty_log.h imports

* remove fluid/place.h imports

* remove fluid/data_layout_transform.h imports

* remove fluid/device_context.h imports

* remove mkldnn_helper code

* remove fluid/mkldnn_reuse.h imports

* pretty_log import
上级 32633c8e
...@@ -16,8 +16,8 @@ ...@@ -16,8 +16,8 @@
#include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/phi/core/enforce.h"
#include "paddle/fluid/string/pretty_log.h" #include "paddle/utils/string/pretty_log.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -19,7 +19,6 @@ ...@@ -19,7 +19,6 @@
#include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/op_desc.h"
#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/platform/errors.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -28,7 +28,7 @@ namespace ir { ...@@ -28,7 +28,7 @@ namespace ir {
void ComputePropagateScalesMkldnnPass::GetTensorFromVector( void ComputePropagateScalesMkldnnPass::GetTensorFromVector(
const std::vector<float>& data_v, phi::DenseTensor* tensor) const { const std::vector<float>& data_v, phi::DenseTensor* tensor) const {
const int size = static_cast<int>(data_v.size()); const int size = static_cast<int>(data_v.size());
auto* data = tensor->mutable_data<float>({size}, platform::CPUPlace()); auto* data = tensor->mutable_data<float>({size}, phi::CPUPlace());
for (int i = 0; i < size; i++) { for (int i = 0; i < size; i++) {
data[i] = data_v[i]; data[i] = data_v[i];
} }
...@@ -123,7 +123,7 @@ void ComputePropagateScalesMkldnnPass::ComputeVarScales( ...@@ -123,7 +123,7 @@ void ComputePropagateScalesMkldnnPass::ComputeVarScales(
std::vector<int64_t> reshape_dims = {dims[0], volume}; std::vector<int64_t> reshape_dims = {dims[0], volume};
tmp_tensor.Resize(phi::make_ddim(reshape_dims)); tmp_tensor.Resize(phi::make_ddim(reshape_dims));
auto* weight_data = weight_tensor->data<float>(); auto* weight_data = weight_tensor->data<float>();
auto* tmp_data = tmp_tensor.mutable_data<float>(platform::CPUPlace()); auto* tmp_data = tmp_tensor.mutable_data<float>(phi::CPUPlace());
for (int i = 0; i < weight_tensor->numel(); i++) { for (int i = 0; i < weight_tensor->numel(); i++) {
tmp_data[i] = std::abs(weight_data[i]); tmp_data[i] = std::abs(weight_data[i]);
} }
...@@ -365,7 +365,7 @@ void ComputePropagateScalesMkldnnPass::UpdateScaleOpInOutScales( ...@@ -365,7 +365,7 @@ void ComputePropagateScalesMkldnnPass::UpdateScaleOpInOutScales(
auto pair = iter->second; auto pair = iter->second;
const auto tensor = pair.second; const auto tensor = pair.second;
tmp_tensor.Resize(tensor.dims()); tmp_tensor.Resize(tensor.dims());
auto* data = tmp_tensor.mutable_data<float>(platform::CPUPlace()); auto* data = tmp_tensor.mutable_data<float>(phi::CPUPlace());
auto* src_data = tensor.data<float>(); auto* src_data = tensor.data<float>();
for (int i = 0; i < tensor.numel(); i++) { for (int i = 0; i < tensor.numel(); i++) {
if (out_iter != var_quant_scales->end()) { if (out_iter != var_quant_scales->end()) {
......
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
#include "paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.h" #include "paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.h"
#include "paddle/fluid/framework/naive_executor.h" #include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/platform/place.h" #include "paddle/phi/common/place.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -119,7 +119,7 @@ class ComputePropagateScalesMkldnnPassTest : public testing::Test { ...@@ -119,7 +119,7 @@ class ComputePropagateScalesMkldnnPassTest : public testing::Test {
const ProgramDesc& prog, const ProgramDesc& prog,
Scope* scope, Scope* scope,
const std::initializer_list<std::string>& variable_names) { const std::initializer_list<std::string>& variable_names) {
auto place = paddle::platform::CPUPlace(); auto place = phi::CPUPlace();
NaiveExecutor exe{place}; NaiveExecutor exe{place};
exe.CreateVariables(prog, 0, true, scope); exe.CreateVariables(prog, 0, true, scope);
...@@ -148,19 +148,19 @@ class ComputePropagateScalesMkldnnPassTest : public testing::Test { ...@@ -148,19 +148,19 @@ class ComputePropagateScalesMkldnnPassTest : public testing::Test {
auto* wx_tensor = wx_var->GetMutable<phi::DenseTensor>(); auto* wx_tensor = wx_var->GetMutable<phi::DenseTensor>();
wx_tensor->Resize(phi::make_dim(wx.size(), wx[0].size())); wx_tensor->Resize(phi::make_dim(wx.size(), wx[0].size()));
for (size_t i = 0; i < wx.size(); i++) for (size_t i = 0; i < wx.size(); i++)
std::copy(begin(wx[i]), std::copy(
begin(wx[i]),
end(wx[i]), end(wx[i]),
wx_tensor->mutable_data<float>(platform::CPUPlace()) + wx_tensor->mutable_data<float>(phi::CPUPlace()) + i * wx[0].size());
i * wx[0].size());
auto* wh_var = scope.FindVar(wh_var_names); auto* wh_var = scope.FindVar(wh_var_names);
auto* wh_tensor = wh_var->GetMutable<phi::DenseTensor>(); auto* wh_tensor = wh_var->GetMutable<phi::DenseTensor>();
wh_tensor->Resize(phi::make_dim(wh.size(), wh[0].size())); wh_tensor->Resize(phi::make_dim(wh.size(), wh[0].size()));
for (size_t i = 0; i < wh.size(); i++) for (size_t i = 0; i < wh.size(); i++)
std::copy(begin(wh[i]), std::copy(
begin(wh[i]),
end(wh[i]), end(wh[i]),
wh_tensor->mutable_data<float>(platform::CPUPlace()) + wh_tensor->mutable_data<float>(phi::CPUPlace()) + i * wh[0].size());
i * wh[0].size());
if (type == "gru") { if (type == "gru") {
ComputeGruWeightScales( ComputeGruWeightScales(
graph, &scope, wx_name, wh_name, &var_quant_scales); graph, &scope, wx_name, wh_name, &var_quant_scales);
...@@ -283,7 +283,7 @@ TEST_F(ComputePropagateScalesMkldnnPassTest, get_scales_function) { ...@@ -283,7 +283,7 @@ TEST_F(ComputePropagateScalesMkldnnPassTest, get_scales_function) {
var_tensor.Resize(phi::make_dim(values.size(), 1)); var_tensor.Resize(phi::make_dim(values.size(), 1));
std::copy(begin(values), std::copy(begin(values),
end(values), end(values),
var_tensor.mutable_data<float>(platform::CPUPlace())); var_tensor.mutable_data<float>(phi::CPUPlace()));
std::vector<float> results = GetScales(&var_tensor, 0); std::vector<float> results = GetScales(&var_tensor, 0);
ASSERT_EQ(results.size(), std::size_t(1)); ASSERT_EQ(results.size(), std::size_t(1));
...@@ -310,7 +310,7 @@ TEST_F(ComputePropagateScalesMkldnnPassTest, compute_var_scales) { ...@@ -310,7 +310,7 @@ TEST_F(ComputePropagateScalesMkldnnPassTest, compute_var_scales) {
weight_tensor->Resize(phi::make_dim(1, values.size())); weight_tensor->Resize(phi::make_dim(1, values.size()));
std::copy(begin(values), std::copy(begin(values),
end(values), end(values),
weight_tensor->mutable_data<float>(platform::CPUPlace())); weight_tensor->mutable_data<float>(phi::CPUPlace()));
auto max_val = *std::max_element(values.begin(), values.end()); auto max_val = *std::max_element(values.begin(), values.end());
...@@ -338,7 +338,7 @@ TEST_F(ComputePropagateScalesMkldnnPassTest, update_relu_output_scales) { ...@@ -338,7 +338,7 @@ TEST_F(ComputePropagateScalesMkldnnPassTest, update_relu_output_scales) {
StringPairMap var_quant_scales; StringPairMap var_quant_scales;
for (auto& var_name : conv_variable_names) { for (auto& var_name : conv_variable_names) {
phi::DenseTensor tensor; phi::DenseTensor tensor;
auto* data = tensor.mutable_data<float>({1}, platform::CPUPlace()); auto* data = tensor.mutable_data<float>({1}, phi::CPUPlace());
data[0] = 10; data[0] = 10;
auto pair = std::make_pair(false, tensor); auto pair = std::make_pair(false, tensor);
var_quant_scales.insert(std::make_pair(var_name, pair)); var_quant_scales.insert(std::make_pair(var_name, pair));
......
...@@ -15,8 +15,8 @@ ...@@ -15,8 +15,8 @@
#include "paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.h" #include "paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.h"
#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/mkldnn_reuse.h" #include "paddle/phi/backends/onednn/onednn_reuse.h"
#include "paddle/fluid/string/pretty_log.h" #include "paddle/utils/string/pretty_log.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -80,7 +80,7 @@ void recompute_bias_and_weights(const Scope* scope, ...@@ -80,7 +80,7 @@ void recompute_bias_and_weights(const Scope* scope,
ac_bias_tensor.data<float>(), ac_bias_tensor.numel(), 1); ac_bias_tensor.data<float>(), ac_bias_tensor.numel(), 1);
EigenVectorArrayMap eltwise_y_in_array( EigenVectorArrayMap eltwise_y_in_array(
eltwise_y_in_tensor->mutable_data<float>(platform::CPUPlace()), eltwise_y_in_tensor->mutable_data<float>(phi::CPUPlace()),
eltwise_y_in_tensor->numel(), eltwise_y_in_tensor->numel(),
1); 1);
...@@ -91,7 +91,7 @@ void recompute_bias_and_weights(const Scope* scope, ...@@ -91,7 +91,7 @@ void recompute_bias_and_weights(const Scope* scope,
scope->FindVar(conv_weight->Name())->GetMutable<phi::DenseTensor>(); scope->FindVar(conv_weight->Name())->GetMutable<phi::DenseTensor>();
auto weights_shape = weights->dims(); auto weights_shape = weights->dims();
auto weights_shape_2d = phi::flatten_to_2d(weights_shape, 1); auto weights_shape_2d = phi::flatten_to_2d(weights_shape, 1);
auto* weights_data = weights->mutable_data<float>(platform::CPUPlace()); auto* weights_data = weights->mutable_data<float>(phi::CPUPlace());
EigenMatrixArrayMap weights_array_2d( EigenMatrixArrayMap weights_array_2d(
weights_data, weights_shape_2d[0], weights_shape_2d[1]); weights_data, weights_shape_2d[0], weights_shape_2d[1]);
...@@ -233,7 +233,7 @@ void ConvAffineChannelFusePass::ApplyImpl(ir::Graph* graph) const { ...@@ -233,7 +233,7 @@ void ConvAffineChannelFusePass::ApplyImpl(ir::Graph* graph) const {
auto* eltwise_y_in_tensor = auto* eltwise_y_in_tensor =
scope->Var(eltwise_y_in_node->Name())->GetMutable<phi::DenseTensor>(); scope->Var(eltwise_y_in_node->Name())->GetMutable<phi::DenseTensor>();
eltwise_y_in_tensor->Resize(ac_bias_tensor->dims()); eltwise_y_in_tensor->Resize(ac_bias_tensor->dims());
std::fill_n(eltwise_y_in_tensor->mutable_data<float>(platform::CPUPlace()), std::fill_n(eltwise_y_in_tensor->mutable_data<float>(phi::CPUPlace()),
eltwise_y_in_tensor->numel(), eltwise_y_in_tensor->numel(),
0.0f); 0.0f);
......
...@@ -19,8 +19,8 @@ ...@@ -19,8 +19,8 @@
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/phi/core/enforce.h"
#include "paddle/fluid/string/pretty_log.h" #include "paddle/utils/string/pretty_log.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -263,7 +263,7 @@ phi::DenseTensor tensor_apply_eltwise(const phi::DenseTensor& vec_a, ...@@ -263,7 +263,7 @@ phi::DenseTensor tensor_apply_eltwise(const phi::DenseTensor& vec_a,
vec_y.Resize(vec_a.dims()); vec_y.Resize(vec_a.dims());
const float* a = vec_a.data<float>(); const float* a = vec_a.data<float>();
const float* b = vec_b.data<float>(); const float* b = vec_b.data<float>();
float* y = vec_y.mutable_data<float>(platform::CPUPlace()); float* y = vec_y.mutable_data<float>(phi::CPUPlace());
for (int i = 0; i < vec_a.numel(); i++) { for (int i = 0; i < vec_a.numel(); i++) {
y[i] = f(a[i], b[i]); y[i] = f(a[i], b[i]);
} }
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
#include "paddle/fluid/framework/op_proto_maker.h" #include "paddle/fluid/framework/op_proto_maker.h"
#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/imperative/type_defs.h" #include "paddle/fluid/imperative/type_defs.h"
#include "paddle/fluid/platform/place.h" #include "paddle/phi/common/place.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -112,7 +112,7 @@ void InitTensorHolder(Scope* scope, ...@@ -112,7 +112,7 @@ void InitTensorHolder(Scope* scope,
void MainTest(bool convWithExistingBias) { void MainTest(bool convWithExistingBias) {
auto prog = BuildProgramDesc(convWithExistingBias); auto prog = BuildProgramDesc(convWithExistingBias);
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog)); std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
auto place = paddle::platform::CPUPlace(); auto place = phi::CPUPlace();
NaiveExecutor exe{place}; NaiveExecutor exe{place};
Scope scope; Scope scope;
// Init scope, as it is used in pass // Init scope, as it is used in pass
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
#include "paddle/fluid/framework/ir/graph_traits.h" #include "paddle/fluid/framework/ir/graph_traits.h"
#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/string/pretty_log.h" #include "paddle/utils/string/pretty_log.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -18,7 +18,7 @@ limitations under the License. */ ...@@ -18,7 +18,7 @@ limitations under the License. */
#include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/string/pretty_log.h" #include "paddle/utils/string/pretty_log.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -17,7 +17,6 @@ ...@@ -17,7 +17,6 @@
#include "paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.h" #include "paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.h"
#include "paddle/fluid/framework/naive_executor.h" #include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/imperative/type_defs.h" #include "paddle/fluid/imperative/type_defs.h"
#include "paddle/fluid/platform/place.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -68,23 +67,16 @@ void SetOp(ProgramDesc* prog, ...@@ -68,23 +67,16 @@ void SetOp(ProgramDesc* prog,
static const std::initializer_list<std::string> variable_names{ static const std::initializer_list<std::string> variable_names{
"z", "a", "b", "c", "d", "e", "f", "g", "h", "i"}; "z", "a", "b", "c", "d", "e", "f", "g", "h", "i"};
void PreparePass(std::unique_ptr<ir::Graph>& graph,
int* original_nodes_num,
int* current_nodes_num) {
auto pass = PassRegistry::Instance().Get("cpu_bfloat16_pass");
*original_nodes_num = graph->Nodes().size();
graph.reset(pass->Apply(graph.release()));
*current_nodes_num = graph->Nodes().size();
}
void MainTest(const ProgramDesc& prog, void MainTest(const ProgramDesc& prog,
const int& quant_count, const int& quant_count,
const int& dequant_count, const int& dequant_count,
const int& added_nodes_count) { const int& added_nodes_count) {
auto graph = std::make_unique<ir::Graph>(prog); auto graph = std::make_unique<ir::Graph>(prog);
int original_nodes_num, current_nodes_num; auto pass = PassRegistry::Instance().Get("cpu_bfloat16_pass");
PreparePass(graph, &original_nodes_num, &current_nodes_num);
int original_nodes_num = graph->Nodes().size();
graph.reset(pass->Apply(graph.release()));
int current_nodes_num = graph->Nodes().size();
int quantize_nodes_count = 0; int quantize_nodes_count = 0;
int dequantize_nodes_count = 0; int dequantize_nodes_count = 0;
......
...@@ -19,7 +19,7 @@ limitations under the License. */ ...@@ -19,7 +19,7 @@ limitations under the License. */
#include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/platform/mkldnn_helper.h" #include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/string/pretty_log.h" #include "paddle/utils/string/pretty_log.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" #include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h"
#include "paddle/fluid/platform/mkldnn_helper.h" #include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/string/pretty_log.h" #include "paddle/utils/string/pretty_log.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -1204,8 +1204,7 @@ void CPUQuantizePass::QuantizeMultiGru(Graph* graph) const { ...@@ -1204,8 +1204,7 @@ void CPUQuantizePass::QuantizeMultiGru(Graph* graph) const {
auto* w_scale_tensor_dst = auto* w_scale_tensor_dst =
scope->Var(w_scale_node->Name())->GetMutable<phi::DenseTensor>(); scope->Var(w_scale_node->Name())->GetMutable<phi::DenseTensor>();
w_scale_tensor_dst->Resize(scale_tensor_src.dims()); w_scale_tensor_dst->Resize(scale_tensor_src.dims());
auto* dst_data = auto* dst_data = w_scale_tensor_dst->mutable_data<float>(phi::CPUPlace());
w_scale_tensor_dst->mutable_data<float>(platform::CPUPlace());
EigenVectorArrayMapFloat eigen_tensor_dst{dst_data, EigenVectorArrayMapFloat eigen_tensor_dst{dst_data,
w_scale_tensor_dst->numel()}; w_scale_tensor_dst->numel()};
eigen_tensor_dst = eigen_tensor_dst =
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
#include "paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h" // NOLINT #include "paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h" // NOLINT
#include "paddle/fluid/framework/naive_executor.h" #include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/imperative/type_defs.h" #include "paddle/fluid/imperative/type_defs.h"
#include "paddle/fluid/platform/place.h" #include "paddle/phi/common/place.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -146,7 +146,7 @@ void PreparePass(std::unique_ptr<ir::Graph>* graph, ...@@ -146,7 +146,7 @@ void PreparePass(std::unique_ptr<ir::Graph>* graph,
int* current_nodes_num, int* current_nodes_num,
std::string var_without_scale = "", std::string var_without_scale = "",
std::string var_signed = "") { std::string var_signed = "") {
auto place = paddle::platform::CPUPlace(); auto place = phi::CPUPlace();
NaiveExecutor exe{place}; NaiveExecutor exe{place};
Scope scope; Scope scope;
exe.CreateVariables(prog, 0, true, &scope); exe.CreateVariables(prog, 0, true, &scope);
......
...@@ -18,9 +18,9 @@ ...@@ -18,9 +18,9 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/mkldnn_helper.h" #include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/string/pretty_log.h" #include "paddle/phi/core/enforce.h"
#include "paddle/utils/string/pretty_log.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
#include "paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.h" #include "paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.h"
#include "paddle/fluid/framework/naive_executor.h" #include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/platform/place.h" #include "paddle/phi/common/place.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -722,7 +722,7 @@ void InitTensorHolder(Scope* scope, ...@@ -722,7 +722,7 @@ void InitTensorHolder(Scope* scope,
} }
void PrepareGraph(std::unique_ptr<ir::Graph>* graph, const ProgramDesc& prog) { void PrepareGraph(std::unique_ptr<ir::Graph>* graph, const ProgramDesc& prog) {
auto place = paddle::platform::CPUPlace(); auto place = phi::CPUPlace();
NaiveExecutor exe{place}; NaiveExecutor exe{place};
Scope scope; Scope scope;
exe.CreateVariables(prog, 0, true, &scope); exe.CreateVariables(prog, 0, true, &scope);
......
...@@ -16,9 +16,9 @@ ...@@ -16,9 +16,9 @@
#include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/phi/backends/onednn/onednn_reuse.h"
#include "paddle/fluid/platform/mkldnn_reuse.h" #include "paddle/phi/core/enforce.h"
#include "paddle/fluid/string/pretty_log.h" #include "paddle/utils/string/pretty_log.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -15,8 +15,8 @@ ...@@ -15,8 +15,8 @@
#include "paddle/fluid/framework/ir/mkldnn/fc_act_mkldnn_fuse_pass.h" #include "paddle/fluid/framework/ir/mkldnn/fc_act_mkldnn_fuse_pass.h"
#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/mkldnn_reuse.h" #include "paddle/phi/backends/onednn/onednn_reuse.h"
#include "paddle/fluid/string/pretty_log.h" #include "paddle/utils/string/pretty_log.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
#include "paddle/fluid/framework/ir/graph_traits.h" #include "paddle/fluid/framework/ir/graph_traits.h"
#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/string/pretty_log.h" #include "paddle/utils/string/pretty_log.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -14,8 +14,8 @@ ...@@ -14,8 +14,8 @@
#include "paddle/fluid/framework/ir/mkldnn/fc_mkldnn_pass.h" #include "paddle/fluid/framework/ir/mkldnn/fc_mkldnn_pass.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/phi/core/enforce.h"
#include "paddle/fluid/string/pretty_log.h" #include "paddle/utils/string/pretty_log.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -15,8 +15,8 @@ ...@@ -15,8 +15,8 @@
#include "paddle/fluid/framework/ir/mkldnn/int8_scale_calculation_mkldnn_pass.h" #include "paddle/fluid/framework/ir/mkldnn/int8_scale_calculation_mkldnn_pass.h"
#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/mkldnn_helper.h" #include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/phi/core/enforce.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include "paddle/fluid/platform/enforce.h" #include "paddle/phi/core/enforce.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -15,8 +15,8 @@ ...@@ -15,8 +15,8 @@
#include "paddle/fluid/framework/ir/mkldnn/layer_norm_onednn_optimization_pass.h" #include "paddle/fluid/framework/ir/mkldnn/layer_norm_onednn_optimization_pass.h"
#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/mkldnn_reuse.h" #include "paddle/phi/backends/onednn/onednn_reuse.h"
#include "paddle/fluid/string/pretty_log.h" #include "paddle/utils/string/pretty_log.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -77,7 +77,7 @@ void LayerNormOneDNNOptimizationPass::ApplyImpl(Graph *graph) const { ...@@ -77,7 +77,7 @@ void LayerNormOneDNNOptimizationPass::ApplyImpl(Graph *graph) const {
scale_shift_tensor->Resize(phi::make_ddim({channels * 2})); scale_shift_tensor->Resize(phi::make_ddim({channels * 2}));
memcpy(scale_shift_tensor->mutable_data<float>(platform::CPUPlace()), memcpy(scale_shift_tensor->mutable_data<float>(phi::CPUPlace()),
ln_scale_tensor->data<float>(), ln_scale_tensor->data<float>(),
channels * sizeof(float)); channels * sizeof(float));
......
...@@ -15,8 +15,8 @@ ...@@ -15,8 +15,8 @@
#include "paddle/fluid/framework/ir/mkldnn/matmul_activation_mkldnn_fuse_pass.h" #include "paddle/fluid/framework/ir/mkldnn/matmul_activation_mkldnn_fuse_pass.h"
#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/mkldnn_reuse.h" #include "paddle/phi/backends/onednn/onednn_reuse.h"
#include "paddle/fluid/string/pretty_log.h" #include "paddle/utils/string/pretty_log.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
#include "paddle/fluid/framework/ir/graph_traits.h" #include "paddle/fluid/framework/ir/graph_traits.h"
#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/string/pretty_log.h" #include "paddle/utils/string/pretty_log.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -13,9 +13,9 @@ ...@@ -13,9 +13,9 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_mkldnn_fuse_pass.h" #include "paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_mkldnn_fuse_pass.h"
#include <paddle/fluid/string/pretty_log.h>
#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/phi/core/enforce.h"
#include "paddle/utils/string/pretty_log.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -23,7 +23,7 @@ ...@@ -23,7 +23,7 @@
#include "paddle/fluid/framework/ir/pass_tester_helper.h" #include "paddle/fluid/framework/ir/pass_tester_helper.h"
#include "paddle/fluid/framework/naive_executor.h" #include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/place.h" #include "paddle/phi/common/place.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
PD_DECLARE_KERNEL(conv2d_transpose, CPU, ALL_LAYOUT); PD_DECLARE_KERNEL(conv2d_transpose, CPU, ALL_LAYOUT);
...@@ -195,7 +195,7 @@ class MKLDNNConvBatchNormPassTest { ...@@ -195,7 +195,7 @@ class MKLDNNConvBatchNormPassTest {
void FillTensorWithRandomData(phi::DenseTensor* tnsr, void FillTensorWithRandomData(phi::DenseTensor* tnsr,
float lowb, float lowb,
float upb, float upb,
platform::CPUPlace place) { phi::CPUPlace place) {
float* ptr = tnsr->mutable_data<float>(place); float* ptr = tnsr->mutable_data<float>(place);
// Initialize input data // Initialize input data
std::uniform_real_distribution<float> dist(static_cast<float>(lowb), std::uniform_real_distribution<float> dist(static_cast<float>(lowb),
...@@ -219,7 +219,7 @@ class MKLDNNConvBatchNormPassTest { ...@@ -219,7 +219,7 @@ class MKLDNNConvBatchNormPassTest {
std::unique_ptr<ir::Graph> graph(new ir::Graph(base_prog)); std::unique_ptr<ir::Graph> graph(new ir::Graph(base_prog));
Scope scope; Scope scope;
auto place = paddle::platform::CPUPlace(); auto place = phi::CPUPlace();
NaiveExecutor exe{place}; NaiveExecutor exe{place};
auto pass = PassRegistry::Instance().Get( auto pass = PassRegistry::Instance().Get(
......
...@@ -140,7 +140,7 @@ static void GetInfoFromTheFirstOp(ir::Graph* graph, ...@@ -140,7 +140,7 @@ static void GetInfoFromTheFirstOp(ir::Graph* graph,
op_desc->GetAttr(vector_name)); op_desc->GetAttr(vector_name));
phi::DenseTensor tensor; phi::DenseTensor tensor;
const int size = static_cast<int>(scales_vector.size()); const int size = static_cast<int>(scales_vector.size());
auto data = tensor.mutable_data<double>({size}, platform::CPUPlace()); auto data = tensor.mutable_data<double>({size}, phi::CPUPlace());
std::copy(scales_vector.begin(), scales_vector.end(), data); std::copy(scales_vector.begin(), scales_vector.end(), data);
auto pair = std::make_pair(is_unsigned, tensor); auto pair = std::make_pair(is_unsigned, tensor);
info_map->insert(std::make_pair(var_name, pair)); info_map->insert(std::make_pair(var_name, pair));
......
...@@ -18,8 +18,8 @@ ...@@ -18,8 +18,8 @@
#include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/platform/errors.h" #include "paddle/phi/core/errors.h"
#include "paddle/fluid/string/pretty_log.h" #include "paddle/utils/string/pretty_log.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -44,12 +44,11 @@ std::vector<std::string> JoinInputs(Node* op1, ...@@ -44,12 +44,11 @@ std::vector<std::string> JoinInputs(Node* op1,
void MultiGRUFusePass::ApplyImpl(ir::Graph* graph) const { void MultiGRUFusePass::ApplyImpl(ir::Graph* graph) const {
VLOG(3) << "Fusing two concatenated multi_gru ops."; VLOG(3) << "Fusing two concatenated multi_gru ops.";
PADDLE_ENFORCE_NOT_NULL(graph, PADDLE_ENFORCE_NOT_NULL(graph,
platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"Pointer to graph argument cannot be NULL.")); "Pointer to graph argument cannot be NULL."));
FusePassBase::Init(name_scope_, graph); FusePassBase::Init(name_scope_, graph);
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
param_scope(), param_scope(), phi::errors::InvalidArgument("Scope cannot be nullptr."));
platform::errors::InvalidArgument("Scope cannot be nullptr."));
GraphPatternDetector gpd; GraphPatternDetector gpd;
patterns::TwoFusionGruConcat pattern{gpd.mutable_pattern(), name_scope_}; patterns::TwoFusionGruConcat pattern{gpd.mutable_pattern(), name_scope_};
......
...@@ -21,9 +21,9 @@ ...@@ -21,9 +21,9 @@
#include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/platform/errors.h"
#include "paddle/fluid/platform/mkldnn_helper.h" #include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/string/pretty_log.h" #include "paddle/phi/core/errors.h"
#include "paddle/utils/string/pretty_log.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -48,12 +48,11 @@ std::vector<std::string> JoinInputs(Node* op1, ...@@ -48,12 +48,11 @@ std::vector<std::string> JoinInputs(Node* op1,
void MultiGruSeqFusePass::ApplyImpl(ir::Graph* graph) const { void MultiGruSeqFusePass::ApplyImpl(ir::Graph* graph) const {
VLOG(3) << "Fusing two consecutive multi_gru ops."; VLOG(3) << "Fusing two consecutive multi_gru ops.";
PADDLE_ENFORCE_NOT_NULL(graph, PADDLE_ENFORCE_NOT_NULL(graph,
platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"Pointer to graph argument cannot be NULL.")); "Pointer to graph argument cannot be NULL."));
FusePassBase::Init(name_scope_, graph); FusePassBase::Init(name_scope_, graph);
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
param_scope(), param_scope(), phi::errors::InvalidArgument("Scope cannot be nullptr."));
platform::errors::InvalidArgument("Scope cannot be nullptr."));
GraphPatternDetector gpd; GraphPatternDetector gpd;
patterns::MultiGruSeq pattern{gpd.mutable_pattern(), name_scope_}; patterns::MultiGruSeq pattern{gpd.mutable_pattern(), name_scope_};
......
...@@ -15,8 +15,8 @@ ...@@ -15,8 +15,8 @@
#include "paddle/fluid/framework/ir/mkldnn/operator_reshape2_onednn_fuse_pass.h" #include "paddle/fluid/framework/ir/mkldnn/operator_reshape2_onednn_fuse_pass.h"
#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/mkldnn_reuse.h" #include "paddle/phi/backends/onednn/onednn_reuse.h"
#include "paddle/fluid/string/pretty_log.h" #include "paddle/utils/string/pretty_log.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -15,8 +15,8 @@ ...@@ -15,8 +15,8 @@
#include "paddle/fluid/framework/ir/mkldnn/operator_scale_onednn_fuse_pass.h" #include "paddle/fluid/framework/ir/mkldnn/operator_scale_onednn_fuse_pass.h"
#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/mkldnn_reuse.h" #include "paddle/phi/backends/onednn/onednn_reuse.h"
#include "paddle/fluid/string/pretty_log.h" #include "paddle/utils/string/pretty_log.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -15,8 +15,8 @@ ...@@ -15,8 +15,8 @@
#include "paddle/fluid/framework/ir/mkldnn/operator_unsqueeze2_onednn_fuse_pass.h" #include "paddle/fluid/framework/ir/mkldnn/operator_unsqueeze2_onednn_fuse_pass.h"
#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/mkldnn_reuse.h" #include "paddle/phi/backends/onednn/onednn_reuse.h"
#include "paddle/fluid/string/pretty_log.h" #include "paddle/utils/string/pretty_log.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/mkldnn_helper.h" #include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/string/pretty_log.h" #include "paddle/utils/string/pretty_log.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
#include "paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass.h" // NOLINT #include "paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass.h" // NOLINT
#include "paddle/fluid/imperative/type_defs.h" #include "paddle/fluid/imperative/type_defs.h"
#include "paddle/fluid/platform/place.h" #include "paddle/phi/common/place.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -430,8 +430,8 @@ void QuantDequantMkldnnPass::TransposeWeight(phi::DenseTensor* input) const { ...@@ -430,8 +430,8 @@ void QuantDequantMkldnnPass::TransposeWeight(phi::DenseTensor* input) const {
phi::DenseTensor trans_tensor; phi::DenseTensor trans_tensor;
trans_tensor.Resize(out_dims); trans_tensor.Resize(out_dims);
float* trans_data = trans_tensor.mutable_data<float>(platform::CPUPlace()); float* trans_data = trans_tensor.mutable_data<float>(phi::CPUPlace());
float* in_data = input->mutable_data<float>(platform::CPUPlace()); float* in_data = input->mutable_data<float>(phi::CPUPlace());
for (int64_t out_idx = 0; out_idx < count; ++out_idx) { for (int64_t out_idx = 0; out_idx < count; ++out_idx) {
int64_t in_idx = 0; int64_t in_idx = 0;
...@@ -493,8 +493,7 @@ void QuantDequantMkldnnPass::ConvertFromINT8ToFP32( ...@@ -493,8 +493,7 @@ void QuantDequantMkldnnPass::ConvertFromINT8ToFP32(
weight_tensor->clear(); // clear int weight weight_tensor->clear(); // clear int weight
weight_tensor->Resize(phi::make_ddim(phi::vectorize(weight_dims))); weight_tensor->Resize(phi::make_ddim(phi::vectorize(weight_dims)));
auto* new_weight_data = auto* new_weight_data = weight_tensor->mutable_data<float>(phi::CPUPlace());
weight_tensor->mutable_data<float>(platform::CPUPlace());
memcpy(new_weight_data, memcpy(new_weight_data,
weight_data.data(), weight_data.data(),
weight_tensor->numel() * sizeof(float)); weight_tensor->numel() * sizeof(float));
...@@ -536,8 +535,7 @@ void QuantDequantMkldnnPass::ConvertFromINT8ToFP32( ...@@ -536,8 +535,7 @@ void QuantDequantMkldnnPass::ConvertFromINT8ToFP32(
} }
weight_tensor->clear(); // clear int weight weight_tensor->clear(); // clear int weight
weight_tensor->Resize(phi::make_ddim(phi::vectorize(weight_dims))); weight_tensor->Resize(phi::make_ddim(phi::vectorize(weight_dims)));
auto* new_weight_data = auto* new_weight_data = weight_tensor->mutable_data<float>(phi::CPUPlace());
weight_tensor->mutable_data<float>(platform::CPUPlace());
memcpy(new_weight_data, memcpy(new_weight_data,
weight_data.data(), weight_data.data(),
weight_tensor->numel() * sizeof(float)); weight_tensor->numel() * sizeof(float));
...@@ -582,8 +580,7 @@ void QuantDequantMkldnnPass::DequantizeOpWeights( ...@@ -582,8 +580,7 @@ void QuantDequantMkldnnPass::DequantizeOpWeights(
weight_var_name, weight_var_name,
op_desc->Type())); op_desc->Type()));
auto* weight_tensor = var->GetMutable<phi::DenseTensor>(); auto* weight_tensor = var->GetMutable<phi::DenseTensor>();
float* fp32_weight_data = float* fp32_weight_data = weight_tensor->mutable_data<float>(phi::CPUPlace());
weight_tensor->mutable_data<float>(platform::CPUPlace());
ConvertFromINT8ToFP32( ConvertFromINT8ToFP32(
scales, weight_tensor, nullptr, fp32_weight_data, weight_var_name); scales, weight_tensor, nullptr, fp32_weight_data, weight_var_name);
} }
...@@ -628,7 +625,7 @@ void QuantDequantMkldnnPass::DequantizeOpWeightsFromONNXFormat( ...@@ -628,7 +625,7 @@ void QuantDequantMkldnnPass::DequantizeOpWeightsFromONNXFormat(
op_desc->Type())); op_desc->Type()));
auto* weight_tensor = var->GetMutable<phi::DenseTensor>(); auto* weight_tensor = var->GetMutable<phi::DenseTensor>();
int8_t* int8_weight_data = int8_t* int8_weight_data =
weight_tensor->mutable_data<int8_t>(platform::CPUPlace()); weight_tensor->mutable_data<int8_t>(phi::CPUPlace());
ConvertFromINT8ToFP32( ConvertFromINT8ToFP32(
scales, weight_tensor, int8_weight_data, nullptr, weight_var_name); scales, weight_tensor, int8_weight_data, nullptr, weight_var_name);
......
...@@ -14,8 +14,8 @@ ...@@ -14,8 +14,8 @@
#include "paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.h" #include "paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.h"
#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/phi/core/enforce.h"
#include "paddle/fluid/string/pretty_log.h" #include "paddle/utils/string/pretty_log.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -26,7 +26,7 @@ void AddVarToScope(Scope* param_scope, ...@@ -26,7 +26,7 @@ void AddVarToScope(Scope* param_scope,
const DDim& dims) { const DDim& dims) {
auto* tensor = param_scope->Var(name)->GetMutable<phi::DenseTensor>(); auto* tensor = param_scope->Var(name)->GetMutable<phi::DenseTensor>();
tensor->Resize(dims); tensor->Resize(dims);
tensor->mutable_data<float>(platform::CPUPlace()); tensor->mutable_data<float>(phi::CPUPlace());
} }
Scope* CreateParamScope() { Scope* CreateParamScope() {
......
...@@ -20,7 +20,7 @@ limitations under the License. */ ...@@ -20,7 +20,7 @@ limitations under the License. */
#include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/mkldnn_helper.h" #include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/string/pretty_log.h" #include "paddle/utils/string/pretty_log.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -28,7 +28,7 @@ void AddVarToScope(Scope* param_scope, ...@@ -28,7 +28,7 @@ void AddVarToScope(Scope* param_scope,
const DDim& dims) { const DDim& dims) {
auto* tensor = param_scope->Var(name)->GetMutable<phi::DenseTensor>(); auto* tensor = param_scope->Var(name)->GetMutable<phi::DenseTensor>();
tensor->Resize(dims); tensor->Resize(dims);
tensor->mutable_data<float>(platform::CPUPlace()); tensor->mutable_data<float>(phi::CPUPlace());
} }
Scope* CreateParamScope() { Scope* CreateParamScope() {
......
...@@ -16,9 +16,9 @@ ...@@ -16,9 +16,9 @@
#include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/phi/backends/onednn/onednn_reuse.h"
#include "paddle/fluid/platform/mkldnn_reuse.h" #include "paddle/phi/core/enforce.h"
#include "paddle/fluid/string/pretty_log.h" #include "paddle/utils/string/pretty_log.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -13,8 +13,8 @@ ...@@ -13,8 +13,8 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/framework/ir/mkldnn/squeeze2_transpose2_onednn_fuse_pass.h" #include "paddle/fluid/framework/ir/mkldnn/squeeze2_transpose2_onednn_fuse_pass.h"
#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/mkldnn_reuse.h" #include "paddle/phi/backends/onednn/onednn_reuse.h"
#include "paddle/fluid/string/pretty_log.h" #include "paddle/utils/string/pretty_log.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -30,14 +30,14 @@ ...@@ -30,14 +30,14 @@
#include "paddle/fluid/inference/analysis/analyzer.h" #include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/api/analysis_predictor.h" #include "paddle/fluid/inference/api/analysis_predictor.h"
#include "paddle/fluid/platform/mkldnn_helper.h" #include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/platform/place.h" #include "paddle/phi/common/place.h"
#include "paddle/fluid/string/pretty_log.h" #include "paddle/utils/string/pretty_log.h"
namespace paddle { namespace paddle {
using framework::Variable; using framework::Variable;
using framework::ir::Graph; using framework::ir::Graph;
using platform::CPUPlace; using phi::CPUPlace;
using ConstEigenVectorArrayMap = using ConstEigenVectorArrayMap =
Eigen::Map<const Eigen::Array<float, Eigen::Dynamic, 1>>; Eigen::Map<const Eigen::Array<float, Eigen::Dynamic, 1>>;
using EigenMatrixDoubleArray = using EigenMatrixDoubleArray =
......
...@@ -111,7 +111,7 @@ TEST_F(MkldnnQuantizerTest, histogram_inverted_min_max) { ...@@ -111,7 +111,7 @@ TEST_F(MkldnnQuantizerTest, histogram_inverted_min_max) {
var_tensor.Resize(phi::make_dim(values.size())); var_tensor.Resize(phi::make_dim(values.size()));
std::copy(begin(values), std::copy(begin(values),
end(values), end(values),
var_tensor.mutable_data<float>(platform::CPUPlace())); var_tensor.mutable_data<float>(phi::CPUPlace()));
ASSERT_THROW(Histogram(var_tensor, max_val, min_val, 3), ASSERT_THROW(Histogram(var_tensor, max_val, min_val, 3),
platform::EnforceNotMet); platform::EnforceNotMet);
...@@ -127,7 +127,7 @@ TEST_F(MkldnnQuantizerTest, histogram_non_negative_to_3) { ...@@ -127,7 +127,7 @@ TEST_F(MkldnnQuantizerTest, histogram_non_negative_to_3) {
var_tensor.Resize(phi::make_dim(values.size())); var_tensor.Resize(phi::make_dim(values.size()));
std::copy(begin(values), std::copy(begin(values),
end(values), end(values),
var_tensor.mutable_data<float>(platform::CPUPlace())); var_tensor.mutable_data<float>(phi::CPUPlace()));
std::vector<int> histogram; std::vector<int> histogram;
float bin_width; float bin_width;
...@@ -151,7 +151,7 @@ TEST_F(MkldnnQuantizerTest, histogram_positive_and_negative_to_3) { ...@@ -151,7 +151,7 @@ TEST_F(MkldnnQuantizerTest, histogram_positive_and_negative_to_3) {
var_tensor.Resize(phi::make_dim(values.size())); var_tensor.Resize(phi::make_dim(values.size()));
std::copy(begin(values), std::copy(begin(values),
end(values), end(values),
var_tensor.mutable_data<float>(platform::CPUPlace())); var_tensor.mutable_data<float>(phi::CPUPlace()));
std::vector<int> histogram; std::vector<int> histogram;
float bin_width; float bin_width;
...@@ -175,7 +175,7 @@ TEST_F(MkldnnQuantizerTest, histogram_zero_bins) { ...@@ -175,7 +175,7 @@ TEST_F(MkldnnQuantizerTest, histogram_zero_bins) {
var_tensor.Resize(phi::make_dim(values.size())); var_tensor.Resize(phi::make_dim(values.size()));
std::copy(begin(values), std::copy(begin(values),
end(values), end(values),
var_tensor.mutable_data<float>(platform::CPUPlace())); var_tensor.mutable_data<float>(phi::CPUPlace()));
ASSERT_THROW(Histogram(var_tensor, min_val, max_val, 0), ASSERT_THROW(Histogram(var_tensor, min_val, max_val, 0),
platform::EnforceNotMet); platform::EnforceNotMet);
...@@ -188,7 +188,7 @@ TEST_F(MkldnnQuantizerTest, histogram_empty) { ...@@ -188,7 +188,7 @@ TEST_F(MkldnnQuantizerTest, histogram_empty) {
// zero tensor // zero tensor
phi::DenseTensor var_tensor; phi::DenseTensor var_tensor;
var_tensor.Resize({0}); var_tensor.Resize({0});
var_tensor.mutable_data<double>(platform::CPUPlace()); var_tensor.mutable_data<double>(phi::CPUPlace());
ASSERT_THROW(Histogram(var_tensor, -1, 1, 1), platform::EnforceNotMet); ASSERT_THROW(Histogram(var_tensor, -1, 1, 1), platform::EnforceNotMet);
} }
...@@ -200,7 +200,7 @@ TEST_F(MkldnnQuantizerTest, kl_scaling_factor_signed) { ...@@ -200,7 +200,7 @@ TEST_F(MkldnnQuantizerTest, kl_scaling_factor_signed) {
var_tensor.Resize(phi::make_dim(values.size())); var_tensor.Resize(phi::make_dim(values.size()));
std::copy(begin(values), std::copy(begin(values),
end(values), end(values),
var_tensor.mutable_data<float>(platform::CPUPlace())); var_tensor.mutable_data<float>(phi::CPUPlace()));
bool is_unsigned; bool is_unsigned;
phi::DenseTensor lod_tensor; phi::DenseTensor lod_tensor;
...@@ -220,7 +220,7 @@ TEST_F(MkldnnQuantizerTest, max_scaling_factor_signed) { ...@@ -220,7 +220,7 @@ TEST_F(MkldnnQuantizerTest, max_scaling_factor_signed) {
var_tensor.Resize(phi::make_dim(values.size())); var_tensor.Resize(phi::make_dim(values.size()));
std::copy(begin(values), std::copy(begin(values),
end(values), end(values),
var_tensor.mutable_data<float>(platform::CPUPlace())); var_tensor.mutable_data<float>(phi::CPUPlace()));
bool is_unsigned; bool is_unsigned;
phi::DenseTensor lod_tensor; phi::DenseTensor lod_tensor;
...@@ -240,7 +240,7 @@ TEST_F(MkldnnQuantizerTest, max_scaling_factor_unsigned) { ...@@ -240,7 +240,7 @@ TEST_F(MkldnnQuantizerTest, max_scaling_factor_unsigned) {
var_tensor.Resize(phi::make_dim(values.size())); var_tensor.Resize(phi::make_dim(values.size()));
std::copy(begin(values), std::copy(begin(values),
end(values), end(values),
var_tensor.mutable_data<float>(platform::CPUPlace())); var_tensor.mutable_data<float>(phi::CPUPlace()));
bool is_unsigned; bool is_unsigned;
phi::DenseTensor lod_tensor; phi::DenseTensor lod_tensor;
...@@ -260,10 +260,10 @@ TEST_F(MkldnnQuantizerTest, max_scaling_factor_chwise_unsigned) { ...@@ -260,10 +260,10 @@ TEST_F(MkldnnQuantizerTest, max_scaling_factor_chwise_unsigned) {
phi::DenseTensor var_tensor; phi::DenseTensor var_tensor;
var_tensor.Resize(phi::make_dim(channels, 1, 1, values.size())); var_tensor.Resize(phi::make_dim(channels, 1, 1, values.size()));
for (int i = 0; i < channels; i++) for (int i = 0; i < channels; i++)
std::copy(begin(values), std::copy(
begin(values),
end(values), end(values),
var_tensor.mutable_data<float>(platform::CPUPlace()) + var_tensor.mutable_data<float>(phi::CPUPlace()) + i * values.size());
i * values.size());
bool is_unsigned; bool is_unsigned;
phi::DenseTensor lod_tensor; phi::DenseTensor lod_tensor;
...@@ -284,7 +284,7 @@ TEST_F(MkldnnQuantizerTest, kl_scaling_factor_unsigned) { ...@@ -284,7 +284,7 @@ TEST_F(MkldnnQuantizerTest, kl_scaling_factor_unsigned) {
var_tensor.Resize(phi::make_dim(values.size())); var_tensor.Resize(phi::make_dim(values.size()));
std::copy(begin(values), std::copy(begin(values),
end(values), end(values),
var_tensor.mutable_data<float>(platform::CPUPlace())); var_tensor.mutable_data<float>(phi::CPUPlace()));
bool is_unsigned; bool is_unsigned;
phi::DenseTensor lod_tensor; phi::DenseTensor lod_tensor;
...@@ -312,14 +312,14 @@ TEST_F(MkldnnQuantizerTest, max_ch_gru_scaling_factor) { ...@@ -312,14 +312,14 @@ TEST_F(MkldnnQuantizerTest, max_ch_gru_scaling_factor) {
std::copy( std::copy(
begin(wx[i]), begin(wx[i]),
end(wx[i]), end(wx[i]),
wx_tensor.mutable_data<float>(platform::CPUPlace()) + i * wx[0].size()); wx_tensor.mutable_data<float>(phi::CPUPlace()) + i * wx[0].size());
wh_tensor.Resize(phi::make_dim(wh.size(), wh[0].size())); wh_tensor.Resize(phi::make_dim(wh.size(), wh[0].size()));
for (size_t i = 0; i < wh.size(); i++) for (size_t i = 0; i < wh.size(); i++)
std::copy( std::copy(
begin(wh[i]), begin(wh[i]),
end(wh[i]), end(wh[i]),
wh_tensor.mutable_data<float>(platform::CPUPlace()) + i * wh[0].size()); wh_tensor.mutable_data<float>(phi::CPUPlace()) + i * wh[0].size());
bool is_unsigned; bool is_unsigned;
std::tie(is_unsigned, lod_tensor) = std::tie(is_unsigned, lod_tensor) =
...@@ -342,14 +342,14 @@ TEST_F(MkldnnQuantizerTest, max_ch_lstm_scaling_factor) { ...@@ -342,14 +342,14 @@ TEST_F(MkldnnQuantizerTest, max_ch_lstm_scaling_factor) {
std::copy( std::copy(
begin(wx[i]), begin(wx[i]),
end(wx[i]), end(wx[i]),
wx_tensor.mutable_data<float>(platform::CPUPlace()) + i * wx[0].size()); wx_tensor.mutable_data<float>(phi::CPUPlace()) + i * wx[0].size());
wh_tensor.Resize(phi::make_dim(wh.size(), wh[0].size())); wh_tensor.Resize(phi::make_dim(wh.size(), wh[0].size()));
for (size_t i = 0; i < wh.size(); i++) for (size_t i = 0; i < wh.size(); i++)
std::copy( std::copy(
begin(wh[i]), begin(wh[i]),
end(wh[i]), end(wh[i]),
wh_tensor.mutable_data<float>(platform::CPUPlace()) + i * wh[0].size()); wh_tensor.mutable_data<float>(phi::CPUPlace()) + i * wh[0].size());
bool is_unsigned; bool is_unsigned;
std::tie(is_unsigned, lod_tensor) = std::tie(is_unsigned, lod_tensor) =
......
...@@ -18,8 +18,7 @@ limitations under the License. */ ...@@ -18,8 +18,7 @@ limitations under the License. */
#include <iostream> #include <iostream>
#include "paddle/fluid/inference/tests/api/tester_helper.h" #include "paddle/fluid/inference/tests/api/tester_helper.h"
#include "paddle/fluid/platform/device_context.h" #include "paddle/phi/common/place.h"
#include "paddle/fluid/platform/place.h"
DEFINE_string(infer_shape, "", "data shape file"); DEFINE_string(infer_shape, "", "data shape file");
DEFINE_int32(sample, 20, "number of sample"); DEFINE_int32(sample, 20, "number of sample");
...@@ -78,7 +77,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs, ...@@ -78,7 +77,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs,
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
int GetNumCachedObjects(void) { int GetNumCachedObjects(void) {
auto &pool = platform::DeviceContextPool::Instance(); auto &pool = platform::DeviceContextPool::Instance();
platform::CPUPlace place; phi::CPUPlace place;
auto onednn_dev_ctx = dynamic_cast<phi::OneDNNContext *>(pool.Get(place)); auto onednn_dev_ctx = dynamic_cast<phi::OneDNNContext *>(pool.Get(place));
return onednn_dev_ctx->GetCachedObjectsNumber(); return onednn_dev_ctx->GetCachedObjectsNumber();
} }
......
...@@ -15,6 +15,7 @@ limitations under the License. */ ...@@ -15,6 +15,7 @@ limitations under the License. */
#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/operators/fused/fusion_gru_op.h" #include "paddle/fluid/operators/fused/fusion_gru_op.h"
#include "paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h" #include "paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h"
#include "paddle/phi/backends/onednn/onednn_reuse.h"
#include "paddle/phi/core/expect.h" #include "paddle/phi/core/expect.h"
namespace paddle { namespace paddle {
...@@ -24,13 +25,14 @@ using phi::OneDNNContext; ...@@ -24,13 +25,14 @@ using phi::OneDNNContext;
using phi::funcs::OneDNNGetDataType; using phi::funcs::OneDNNGetDataType;
using phi::funcs::OneDNNMemDesc; using phi::funcs::OneDNNMemDesc;
using phi::funcs::RNNReorderType; using phi::funcs::RNNReorderType;
using OneDNNMemoryFormat = dnnl::memory::format_tag;
template <typename T, typename T_out = T> template <typename T, typename T_out = T>
class GRUMKLDNNHandler : public RNNMKLDNNHandler<T, dnnl::gru_forward, T_out> { class GRUMKLDNNHandler : public RNNMKLDNNHandler<T, dnnl::gru_forward, T_out> {
public: public:
GRUMKLDNNHandler(const paddle::framework::ExecutionContext& ctx, GRUMKLDNNHandler(const paddle::framework::ExecutionContext& ctx,
const OneDNNContext& dev_ctx, const OneDNNContext& dev_ctx,
const dnnl::engine mkldnn_engine, const dnnl::engine onednn_engine,
platform::Place cpu_place, platform::Place cpu_place,
const phi::DenseTensor* input, const phi::DenseTensor* input,
const phi::DenseTensor* weight_h, const phi::DenseTensor* weight_h,
...@@ -44,7 +46,7 @@ class GRUMKLDNNHandler : public RNNMKLDNNHandler<T, dnnl::gru_forward, T_out> { ...@@ -44,7 +46,7 @@ class GRUMKLDNNHandler : public RNNMKLDNNHandler<T, dnnl::gru_forward, T_out> {
: RNNMKLDNNHandler<T, dnnl::gru_forward, T_out>( : RNNMKLDNNHandler<T, dnnl::gru_forward, T_out>(
ctx, ctx,
dev_ctx, dev_ctx,
mkldnn_engine, onednn_engine,
ctx.GetPlace(), ctx.GetPlace(),
input, input,
weight_h, weight_h,
...@@ -256,7 +258,7 @@ class FusionGRUMKLDNNKernel : public framework::OpKernel<T> { ...@@ -256,7 +258,7 @@ class FusionGRUMKLDNNKernel : public framework::OpKernel<T> {
template <typename Tout = T> template <typename Tout = T>
void RunKernel(const framework::ExecutionContext& ctx) const { void RunKernel(const framework::ExecutionContext& ctx) const {
auto& dev_ctx = ctx.template device_context<OneDNNContext>(); auto& dev_ctx = ctx.template device_context<OneDNNContext>();
const auto& mkldnn_engine = dev_ctx.GetEngine(); const auto& onednn_engine = dev_ctx.GetEngine();
// Get Tensors // Get Tensors
const auto* input = ctx.Input<phi::DenseTensor>("X"); const auto* input = ctx.Input<phi::DenseTensor>("X");
...@@ -294,7 +296,7 @@ class FusionGRUMKLDNNKernel : public framework::OpKernel<T> { ...@@ -294,7 +296,7 @@ class FusionGRUMKLDNNKernel : public framework::OpKernel<T> {
GRUMKLDNNHandler<T, Tout> handler( GRUMKLDNNHandler<T, Tout> handler(
ctx, ctx,
dev_ctx, dev_ctx,
mkldnn_engine, onednn_engine,
ctx.GetPlace(), ctx.GetPlace(),
input, input,
weight_h, weight_h,
...@@ -379,7 +381,7 @@ class FusionGRUMKLDNNKernel : public framework::OpKernel<T> { ...@@ -379,7 +381,7 @@ class FusionGRUMKLDNNKernel : public framework::OpKernel<T> {
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_KERNEL(fusion_gru, REGISTER_OP_KERNEL(fusion_gru,
MKLDNN, MKLDNN,
paddle::platform::CPUPlace, phi::CPUPlace,
ops::FusionGRUMKLDNNKernel<float>, ops::FusionGRUMKLDNNKernel<float>,
ops::FusionGRUMKLDNNKernel<paddle::platform::bfloat16>, ops::FusionGRUMKLDNNKernel<paddle::platform::bfloat16>,
ops::FusionGRUMKLDNNKernel<uint8_t>); ops::FusionGRUMKLDNNKernel<uint8_t>);
...@@ -24,6 +24,7 @@ using phi::OneDNNContext; ...@@ -24,6 +24,7 @@ using phi::OneDNNContext;
using phi::funcs::OneDNNGetDataType; using phi::funcs::OneDNNGetDataType;
using phi::funcs::OneDNNMemDesc; using phi::funcs::OneDNNMemDesc;
using phi::funcs::RNNReorderType; using phi::funcs::RNNReorderType;
using OneDNNMemoryFormat = dnnl::memory::format_tag;
template <typename T, typename T_out = T> template <typename T, typename T_out = T>
class LSTMMKLDNNHandler class LSTMMKLDNNHandler
...@@ -31,7 +32,7 @@ class LSTMMKLDNNHandler ...@@ -31,7 +32,7 @@ class LSTMMKLDNNHandler
public: public:
LSTMMKLDNNHandler(const paddle::framework::ExecutionContext& ctx, LSTMMKLDNNHandler(const paddle::framework::ExecutionContext& ctx,
const OneDNNContext& dev_ctx, const OneDNNContext& dev_ctx,
const dnnl::engine mkldnn_engine, const dnnl::engine onednn_engine,
platform::Place cpu_place, platform::Place cpu_place,
const phi::DenseTensor* input, const phi::DenseTensor* input,
const phi::DenseTensor* weight_h, const phi::DenseTensor* weight_h,
...@@ -46,7 +47,7 @@ class LSTMMKLDNNHandler ...@@ -46,7 +47,7 @@ class LSTMMKLDNNHandler
: RNNMKLDNNHandler<T, dnnl::lstm_forward, T_out>( : RNNMKLDNNHandler<T, dnnl::lstm_forward, T_out>(
ctx, ctx,
dev_ctx, dev_ctx,
mkldnn_engine, onednn_engine,
ctx.GetPlace(), ctx.GetPlace(),
input, input,
weight_h, weight_h,
...@@ -338,7 +339,7 @@ class FusionLSTMMKLDNNKernel : public framework::OpKernel<T> { ...@@ -338,7 +339,7 @@ class FusionLSTMMKLDNNKernel : public framework::OpKernel<T> {
template <typename Tout = T> template <typename Tout = T>
void RunKernel(const framework::ExecutionContext& ctx) const { void RunKernel(const framework::ExecutionContext& ctx) const {
auto& dev_ctx = ctx.template device_context<OneDNNContext>(); auto& dev_ctx = ctx.template device_context<OneDNNContext>();
const auto& mkldnn_engine = dev_ctx.GetEngine(); const auto& onednn_engine = dev_ctx.GetEngine();
// Get Tensors // Get Tensors
const auto* input = ctx.Input<phi::DenseTensor>("X"); const auto* input = ctx.Input<phi::DenseTensor>("X");
...@@ -379,7 +380,7 @@ class FusionLSTMMKLDNNKernel : public framework::OpKernel<T> { ...@@ -379,7 +380,7 @@ class FusionLSTMMKLDNNKernel : public framework::OpKernel<T> {
LSTMMKLDNNHandler<T, Tout> handler( LSTMMKLDNNHandler<T, Tout> handler(
ctx, ctx,
dev_ctx, dev_ctx,
mkldnn_engine, onednn_engine,
ctx.GetPlace(), ctx.GetPlace(),
input, input,
weight_h, weight_h,
...@@ -474,7 +475,7 @@ class FusionLSTMMKLDNNKernel : public framework::OpKernel<T> { ...@@ -474,7 +475,7 @@ class FusionLSTMMKLDNNKernel : public framework::OpKernel<T> {
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_KERNEL(fusion_lstm, REGISTER_OP_KERNEL(fusion_lstm,
MKLDNN, MKLDNN,
paddle::platform::CPUPlace, phi::CPUPlace,
ops::FusionLSTMMKLDNNKernel<float>, ops::FusionLSTMMKLDNNKernel<float>,
ops::FusionLSTMMKLDNNKernel<paddle::platform::bfloat16>, ops::FusionLSTMMKLDNNKernel<paddle::platform::bfloat16>,
ops::FusionLSTMMKLDNNKernel<uint8_t>); ops::FusionLSTMMKLDNNKernel<uint8_t>);
...@@ -14,7 +14,8 @@ limitations under the License. */ ...@@ -14,7 +14,8 @@ limitations under the License. */
#pragma once #pragma once
#include "paddle/fluid/platform/mkldnn_reuse.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/phi/backends/onednn/onednn_reuse.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -22,13 +23,14 @@ namespace operators { ...@@ -22,13 +23,14 @@ namespace operators {
using phi::funcs::CreateKey; using phi::funcs::CreateKey;
using phi::funcs::OneDNNGetDataType; using phi::funcs::OneDNNGetDataType;
using phi::funcs::RNNReorderType; using phi::funcs::RNNReorderType;
using OneDNNMemoryFormat = dnnl::memory::format_tag;
template <typename T, typename T_alg, typename T_out = T> template <typename T, typename T_alg, typename T_out = T>
class RNNMKLDNNHandler : public phi::funcs::OneDNNHandlerT<T, T_alg> { class RNNMKLDNNHandler : public phi::funcs::OneDNNHandlerT<T, T_alg> {
public: public:
RNNMKLDNNHandler(const paddle::framework::ExecutionContext& ctx, RNNMKLDNNHandler(const paddle::framework::ExecutionContext& ctx,
const phi::OneDNNContext& dev_ctx, const phi::OneDNNContext& dev_ctx,
const dnnl::engine mkldnn_engine, const dnnl::engine onednn_engine,
platform::Place cpu_place, platform::Place cpu_place,
const phi::DenseTensor* input, const phi::DenseTensor* input,
const phi::DenseTensor* weight_h, const phi::DenseTensor* weight_h,
......
...@@ -18,10 +18,10 @@ limitations under the License. */ ...@@ -18,10 +18,10 @@ limitations under the License. */
#include "dnnl.hpp" // NOLINT #include "dnnl.hpp" // NOLINT
#include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/fused/multi_gru_op.h" #include "paddle/fluid/operators/fused/multi_gru_op.h"
#include "paddle/fluid/platform/errors.h" #include "paddle/phi/backends/onednn/onednn_reuse.h"
#include "paddle/fluid/platform/mkldnn_reuse.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -31,6 +31,7 @@ using phi::funcs::OneDNNGetDataType; ...@@ -31,6 +31,7 @@ using phi::funcs::OneDNNGetDataType;
using phi::funcs::OneDNNMemDesc; using phi::funcs::OneDNNMemDesc;
using Direction = dnnl::rnn_direction; using Direction = dnnl::rnn_direction;
using phi::OneDNNContext; using phi::OneDNNContext;
using OneDNNMemoryFormat = dnnl::memory::format_tag;
namespace { namespace {
...@@ -721,6 +722,6 @@ class MultiGRUMKLDNNKernel : public framework::OpKernel<T> { ...@@ -721,6 +722,6 @@ class MultiGRUMKLDNNKernel : public framework::OpKernel<T> {
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_KERNEL(multi_gru, REGISTER_OP_KERNEL(multi_gru,
MKLDNN, MKLDNN,
paddle::platform::CPUPlace, phi::CPUPlace,
ops::MultiGRUMKLDNNKernel<float>, ops::MultiGRUMKLDNNKernel<float>,
ops::MultiGRUMKLDNNKernel<uint8_t>); ops::MultiGRUMKLDNNKernel<uint8_t>);
...@@ -14,11 +14,10 @@ limitations under the License. */ ...@@ -14,11 +14,10 @@ limitations under the License. */
#include "paddle/fluid/operators/dequantize_op.h" #include "paddle/fluid/operators/dequantize_op.h"
#include "paddle/fluid/framework/data_layout_transform.h"
#include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/errors.h"
#include "paddle/fluid/platform/mkldnn_helper.h" #include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/platform/mkldnn_reuse.h" #include "paddle/phi/backends/onednn/onednn_reuse.h"
#include "paddle/phi/core/errors.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -39,11 +38,11 @@ class DeQuantOpKernel : public framework::OpKernel<T> { ...@@ -39,11 +38,11 @@ class DeQuantOpKernel : public framework::OpKernel<T> {
auto* out = ctx.Output<phi::DenseTensor>("Output"); auto* out = ctx.Output<phi::DenseTensor>("Output");
PADDLE_ENFORCE(quantization_scale != 0.0f, PADDLE_ENFORCE(quantization_scale != 0.0f,
platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"Dequantization scale must be different than 0.0f")); "Dequantization scale must be different than 0.0f"));
PADDLE_ENFORCE(quantization_shift <= 255 && quantization_shift >= 0, PADDLE_ENFORCE(quantization_shift <= 255 && quantization_shift >= 0,
platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"Dequantization shift must be lower or equal to ", "Dequantization shift must be lower or equal to ",
"255 and greater or equal to 0, but got %f", "255 and greater or equal to 0, but got %f",
quantization_shift)); quantization_shift));
...@@ -91,7 +90,7 @@ namespace ops = paddle::operators; ...@@ -91,7 +90,7 @@ namespace ops = paddle::operators;
REGISTER_OP_KERNEL(dequantize, REGISTER_OP_KERNEL(dequantize,
MKLDNN, MKLDNN,
::paddle::platform::CPUPlace, ::phi::CPUPlace,
ops::DeQuantOpKernel<uint8_t>, ops::DeQuantOpKernel<uint8_t>,
ops::DeQuantOpKernel<int8_t>, ops::DeQuantOpKernel<int8_t>,
ops::DeQuantOpKernel<paddle::platform::bfloat16>); ops::DeQuantOpKernel<paddle::platform::bfloat16>);
...@@ -14,9 +14,10 @@ limitations under the License. */ ...@@ -14,9 +14,10 @@ limitations under the License. */
#include <memory> #include <memory>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/fc_op.h" #include "paddle/fluid/operators/fc_op.h"
#include "paddle/fluid/platform/mkldnn_helper.h" #include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/platform/mkldnn_reuse.h" #include "paddle/phi/backends/onednn/onednn_reuse.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -51,10 +52,10 @@ class FCMKLDNNHandler ...@@ -51,10 +52,10 @@ class FCMKLDNNHandler
const phi::DenseTensor* bias, const phi::DenseTensor* bias,
phi::DenseTensor* out, phi::DenseTensor* out,
const int in_num_col_dims, const int in_num_col_dims,
dnnl::engine mkldnn_engine, dnnl::engine onednn_engine,
platform::Place cpu_place) platform::Place cpu_place)
: phi::funcs::OneDNNHandlerNoCachingT<T_in, dnnl::inner_product_forward>( : phi::funcs::OneDNNHandlerNoCachingT<T_in, dnnl::inner_product_forward>(
mkldnn_engine, cpu_place), onednn_engine, cpu_place),
dev_ctx_(dev_ctx) { dev_ctx_(dev_ctx) {
this->memory_key_ = ctx.InputName("W"); this->memory_key_ = ctx.InputName("W");
...@@ -122,7 +123,7 @@ class FCMKLDNNHandler ...@@ -122,7 +123,7 @@ class FCMKLDNNHandler
post_operations.append_eltwise( post_operations.append_eltwise(
activation_scale, dnnl::algorithm::eltwise_relu, 0.0f, 0.0f); activation_scale, dnnl::algorithm::eltwise_relu, 0.0f, 0.0f);
} }
platform::AppendActivation(ctx, post_operations, activation_scale); AppendActivation(ctx, post_operations, activation_scale);
if (ctx.HasAttr("fused_output_scale")) { if (ctx.HasAttr("fused_output_scale")) {
float scale_alpha = ctx.Attr<float>("fused_output_scale"); float scale_alpha = ctx.Attr<float>("fused_output_scale");
...@@ -154,6 +155,59 @@ class FCMKLDNNHandler ...@@ -154,6 +155,59 @@ class FCMKLDNNHandler
} }
} }
void AppendActivation(const ExecutionContext& ctx,
dnnl::post_ops& post_ops, // NOLINT
float activation_scale = 1.0f) {
const auto invalid_attribute =
ctx.HasAttr("fuse_activation")
? ctx.Attr<std::string>("fuse_activation").empty()
: true;
if (invalid_attribute) return;
const auto fuse_activation = ctx.Attr<std::string>("fuse_activation");
const auto fuse_alpha =
ctx.HasAttr("fuse_alpha") ? ctx.Attr<float>("fuse_alpha") : 0.0f;
const auto fuse_beta =
ctx.HasAttr("fuse_beta") ? ctx.Attr<float>("fuse_beta") : 0.0f;
if (fuse_activation == "hard_sigmoid") {
post_ops.append_eltwise(activation_scale,
dnnl::algorithm::eltwise_linear,
fuse_alpha,
fuse_beta);
post_ops.append_eltwise(
activation_scale, dnnl::algorithm::eltwise_clip, 0.0f, 1.0f);
} else {
const std::unordered_map<std::string, dnnl::algorithm> activation_map = {
{"abs", dnnl::algorithm::eltwise_abs},
{"clip", dnnl::algorithm::eltwise_clip},
{"gelu", dnnl::algorithm::eltwise_gelu_erf},
{"gelu_erf", dnnl::algorithm::eltwise_gelu_erf},
{"gelu_tanh", dnnl::algorithm::eltwise_gelu_tanh},
{"hard_swish", dnnl::algorithm::eltwise_hardswish},
{"leaky_relu", dnnl::algorithm::eltwise_relu},
{"mish", dnnl::algorithm::eltwise_mish},
{"relu", dnnl::algorithm::eltwise_relu},
{"relu6", dnnl::algorithm::eltwise_bounded_relu},
{"sigmoid", dnnl::algorithm::eltwise_logistic},
{"sqrt", dnnl::algorithm::eltwise_sqrt},
{"swish", dnnl::algorithm::eltwise_swish},
{"tanh", dnnl::algorithm::eltwise_tanh}};
const auto& activation_type = activation_map.find(fuse_activation);
PADDLE_ENFORCE_NE(
activation_type,
activation_map.end(),
platform::errors::InvalidArgument(
"Activation '%s' not found in oneDNN algorithms mapper",
fuse_activation));
post_ops.append_eltwise(
activation_scale, activation_type->second, fuse_alpha, fuse_beta);
}
}
// Correct output scale, to take into account scaling of input and weights // Correct output scale, to take into account scaling of input and weights
// Since the data that comes out of input and weight multiplication is // Since the data that comes out of input and weight multiplication is
// scaled with its own scales, this data needs to be divided by // scaled with its own scales, this data needs to be divided by
...@@ -396,10 +450,76 @@ class FCMKLDNNKernel : public framework::OpKernel<T_in> { ...@@ -396,10 +450,76 @@ class FCMKLDNNKernel : public framework::OpKernel<T_in> {
} }
} }
void SetOutMemDescWithUnsqueeze2FuseSupport(
const framework::ExecutionContext& ctx,
phi::DenseTensor* out,
const dnnl::memory::desc& out_md) const {
const std::vector<int>& fused_unsqueeze2_axes =
ctx.Attr<std::vector<int>>("fused_unsqueeze2_axes");
const std::vector<int64_t>& op_tz = out_md.dims();
std::vector<int64_t> unsqueezed_op_tz(
op_tz.size() + fused_unsqueeze2_axes.size(), 0);
for (const auto& axis : fused_unsqueeze2_axes) {
int positive_axis = axis < 0 ? unsqueezed_op_tz.size() + axis : axis;
unsqueezed_op_tz[positive_axis] = 1;
}
int j = 0;
for (size_t i = 0; i < unsqueezed_op_tz.size(); ++i) {
if (unsqueezed_op_tz[i] == 0) {
unsqueezed_op_tz[i] = op_tz[j++];
}
}
out->set_mem_desc(out_md.reshape(unsqueezed_op_tz));
out->Resize(phi::make_ddim(unsqueezed_op_tz));
}
void SetOutMemDescWithReshape2FuseSupport(
const framework::ExecutionContext& ctx,
phi::DenseTensor* out,
const dnnl::memory::desc& out_md) const {
std::vector<int64_t> fused_reshape2_shape(
ctx.Attr<std::vector<int>>("fused_reshape2_shape").begin(),
ctx.Attr<std::vector<int>>("fused_reshape2_shape").end());
const int out_shape_numel = out->numel();
const int new_shape_numel = std::accumulate(fused_reshape2_shape.begin(),
fused_reshape2_shape.end(),
1,
std::multiplies<int64_t>());
for (size_t i = 0; i < fused_reshape2_shape.size(); ++i) {
if (fused_reshape2_shape[i] == -1) {
fused_reshape2_shape[i] = -out_shape_numel / new_shape_numel;
break;
}
}
out->set_mem_desc(out_md.reshape(fused_reshape2_shape));
out->Resize(phi::make_ddim(fused_reshape2_shape));
}
void SetOutMemDescWithLogicalLayoutFusesSupport(
const framework::ExecutionContext& ctx,
phi::DenseTensor* out,
const dnnl::memory::desc& out_md) const {
if (ctx.HasAttr("fused_unsqueeze2_axes")) {
SetOutMemDescWithUnsqueeze2FuseSupport(ctx, out, out_md);
} else if (ctx.HasAttr("fused_reshape2_shape")) {
SetOutMemDescWithReshape2FuseSupport(ctx, out, out_md);
} else if (ctx.HasAttr("fused_squeeze2_axes")) {
out->set_mem_desc(out_md);
out->Resize(phi::make_ddim(out_md.dims()));
} else {
out->set_mem_desc(out_md);
}
}
template <typename T_out, typename T_w> template <typename T_out, typename T_w>
void RunKernel(const framework::ExecutionContext& ctx) const { void RunKernel(const framework::ExecutionContext& ctx) const {
const auto& dev_ctx = ctx.template device_context<OneDNNContext>(); const auto& dev_ctx = ctx.template device_context<OneDNNContext>();
const auto& mkldnn_engine = dev_ctx.GetEngine(); const auto& onednn_engine = dev_ctx.GetEngine();
const auto* x = ctx.Input<phi::DenseTensor>("Input"); const auto* x = ctx.Input<phi::DenseTensor>("Input");
const auto* weights = ctx.Input<phi::DenseTensor>("W"); const auto* weights = ctx.Input<phi::DenseTensor>("W");
...@@ -433,7 +553,7 @@ class FCMKLDNNKernel : public framework::OpKernel<T_in> { ...@@ -433,7 +553,7 @@ class FCMKLDNNKernel : public framework::OpKernel<T_in> {
inner_product_cache->inner_product_p); inner_product_cache->inner_product_p);
src_memory_p = src_memory_p =
std::make_shared<dnnl::memory>(inner_product_cache->src_mem); std::make_shared<dnnl::memory>(inner_product_cache->src_mem);
PrepareSrcMem(fc_p, src_memory_p, x, mkldnn_engine); PrepareSrcMem(fc_p, src_memory_p, x, onednn_engine);
weights_memory_p = weights_memory_p =
std::make_shared<dnnl::memory>(inner_product_cache->weights_mem); std::make_shared<dnnl::memory>(inner_product_cache->weights_mem);
...@@ -463,7 +583,7 @@ class FCMKLDNNKernel : public framework::OpKernel<T_in> { ...@@ -463,7 +583,7 @@ class FCMKLDNNKernel : public framework::OpKernel<T_in> {
bias, bias,
out, out,
in_col_dims, in_col_dims,
mkldnn_engine, onednn_engine,
ctx.GetPlace()); ctx.GetPlace());
src_memory_p = handler.AcquireSrcMemoryWithReorder(x); src_memory_p = handler.AcquireSrcMemoryWithReorder(x);
...@@ -504,7 +624,7 @@ class FCMKLDNNKernel : public framework::OpKernel<T_in> { ...@@ -504,7 +624,7 @@ class FCMKLDNNKernel : public framework::OpKernel<T_in> {
dev_ctx.SetBlob(cache_key, ip_cache); dev_ctx.SetBlob(cache_key, ip_cache);
} }
platform::SetOutMemDescWithLogicalLayoutFusesSupport( SetOutMemDescWithLogicalLayoutFusesSupport(
ctx, ctx,
out, out,
dst_memory_p->get_desc().reshape(phi::vectorize(out->dims()))); dst_memory_p->get_desc().reshape(phi::vectorize(out->dims())));
...@@ -541,7 +661,7 @@ namespace ops = paddle::operators; ...@@ -541,7 +661,7 @@ namespace ops = paddle::operators;
REGISTER_OP_KERNEL(fc, REGISTER_OP_KERNEL(fc,
MKLDNN, MKLDNN,
::paddle::platform::CPUPlace, ::phi::CPUPlace,
ops::FCMKLDNNKernel<float>, ops::FCMKLDNNKernel<float>,
ops::FCMKLDNNKernel<paddle::platform::bfloat16>, ops::FCMKLDNNKernel<paddle::platform::bfloat16>,
ops::FCMKLDNNKernel<uint8_t>, ops::FCMKLDNNKernel<uint8_t>,
......
...@@ -12,9 +12,8 @@ ...@@ -12,9 +12,8 @@
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/framework/data_layout_transform.h"
#include "paddle/fluid/operators/interpolate_op.h" #include "paddle/fluid/operators/interpolate_op.h"
#include "paddle/fluid/platform/mkldnn_reuse.h" #include "paddle/phi/backends/onednn/onednn_reuse.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -25,6 +24,7 @@ using dnnl::reorder; ...@@ -25,6 +24,7 @@ using dnnl::reorder;
using dnnl::resampling_forward; using dnnl::resampling_forward;
using dnnl::stream; using dnnl::stream;
using phi::DataLayout; using phi::DataLayout;
using OneDNNMemoryFormat = dnnl::memory::format_tag;
template <typename T = float> template <typename T = float>
class InterpolateOneDNNHandler class InterpolateOneDNNHandler
...@@ -131,7 +131,7 @@ class InterpolateOneDNNKernel : public framework::OpKernel<T> { ...@@ -131,7 +131,7 @@ class InterpolateOneDNNKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
const auto& dev_ctx = ctx.template device_context<phi::OneDNNContext>(); const auto& dev_ctx = ctx.template device_context<phi::OneDNNContext>();
const auto& mkldnn_engine = dev_ctx.GetEngine(); const auto& onednn_engine = dev_ctx.GetEngine();
const auto* x = ctx.Input<phi::DenseTensor>("X"); const auto* x = ctx.Input<phi::DenseTensor>("X");
auto* out = ctx.Output<phi::DenseTensor>("Out"); auto* out = ctx.Output<phi::DenseTensor>("Out");
...@@ -146,7 +146,7 @@ class InterpolateOneDNNKernel : public framework::OpKernel<T> { ...@@ -146,7 +146,7 @@ class InterpolateOneDNNKernel : public framework::OpKernel<T> {
out->Resize(dim_out); out->Resize(dim_out);
InterpolateOneDNNHandler<T> handler( InterpolateOneDNNHandler<T> handler(
algo, mkldnn_engine, ctx.GetPlace(), x, out); algo, onednn_engine, ctx.GetPlace(), x, out);
auto src_memory_p = handler.AcquireSrcMemory(x); auto src_memory_p = handler.AcquireSrcMemory(x);
auto dst_memory_p = handler.AcquireDstMemory(out); auto dst_memory_p = handler.AcquireDstMemory(out);
...@@ -170,11 +170,11 @@ namespace ops = paddle::operators; ...@@ -170,11 +170,11 @@ namespace ops = paddle::operators;
REGISTER_OP_KERNEL(nearest_interp, REGISTER_OP_KERNEL(nearest_interp,
MKLDNN, MKLDNN,
::paddle::platform::CPUPlace, ::phi::CPUPlace,
ops::InterpolateOneDNNKernel<float>, ops::InterpolateOneDNNKernel<float>,
ops::InterpolateOneDNNKernel<int8_t>, ops::InterpolateOneDNNKernel<int8_t>,
ops::InterpolateOneDNNKernel<uint8_t>); ops::InterpolateOneDNNKernel<uint8_t>);
REGISTER_OP_KERNEL(bilinear_interp, REGISTER_OP_KERNEL(bilinear_interp,
MKLDNN, MKLDNN,
::paddle::platform::CPUPlace, ::phi::CPUPlace,
ops::InterpolateOneDNNKernel<float>); ops::InterpolateOneDNNKernel<float>);
...@@ -13,7 +13,8 @@ See the License for the specific language governing permissions and ...@@ -13,7 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/mkldnn_reuse.h"
#include "paddle/phi/backends/onednn/onednn_reuse.h"
#include "paddle/phi/common/data_type.h" #include "paddle/phi/common/data_type.h"
namespace paddle { namespace paddle {
...@@ -99,7 +100,7 @@ class LayerNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -99,7 +100,7 @@ class LayerNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
const bool is_test = ctx.Attr<bool>("is_test"); const bool is_test = ctx.Attr<bool>("is_test");
auto& dev_ctx = ctx.template device_context<phi::OneDNNContext>(); auto& dev_ctx = ctx.template device_context<phi::OneDNNContext>();
const auto& mkldnn_engine = dev_ctx.GetEngine(); const auto& onednn_engine = dev_ctx.GetEngine();
auto src_tz = phi::vectorize(x->dims()); auto src_tz = phi::vectorize(x->dims());
PADDLE_ENFORCE_EQ(begin_norm_axis, PADDLE_ENFORCE_EQ(begin_norm_axis,
...@@ -117,7 +118,7 @@ class LayerNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -117,7 +118,7 @@ class LayerNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
} }
LayerNormOneDNNHandler<T> handler( LayerNormOneDNNHandler<T> handler(
src_tz, epsilon, flags, is_test, x, mkldnn_engine, ctx.GetPlace()); src_tz, epsilon, flags, is_test, x, onednn_engine, ctx.GetPlace());
auto src_memory = handler.AcquireSrcMemory(x); auto src_memory = handler.AcquireSrcMemory(x);
auto dst_memory = handler.AcquireDstMemory(out); auto dst_memory = handler.AcquireDstMemory(out);
...@@ -159,6 +160,6 @@ class LayerNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -159,6 +160,6 @@ class LayerNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_KERNEL(layer_norm, REGISTER_OP_KERNEL(layer_norm,
MKLDNN, MKLDNN,
::paddle::platform::CPUPlace, ::phi::CPUPlace,
ops::LayerNormMKLDNNOpKernel<float>, ops::LayerNormMKLDNNOpKernel<float>,
ops::LayerNormMKLDNNOpKernel<paddle::platform::bfloat16>); ops::LayerNormMKLDNNOpKernel<paddle::platform::bfloat16>);
...@@ -12,7 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/platform/mkldnn_reuse.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/phi/backends/onednn/onednn_reuse.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -25,13 +26,13 @@ class LRNOneDNNHandler ...@@ -25,13 +26,13 @@ class LRNOneDNNHandler
OneDNNHandlerNoCachingT<T, dnnl::lrn_forward, dnnl::lrn_backward> { OneDNNHandlerNoCachingT<T, dnnl::lrn_forward, dnnl::lrn_backward> {
public: public:
LRNOneDNNHandler(const framework::ExecutionContext& ctx, LRNOneDNNHandler(const framework::ExecutionContext& ctx,
const dnnl::engine mkldnn_engine, const dnnl::engine onednn_engine,
platform::Place cpu_place, platform::Place cpu_place,
const phi::DenseTensor* input) const phi::DenseTensor* input)
: phi::funcs:: : phi::funcs::
OneDNNHandlerNoCachingT<T, dnnl::lrn_forward, dnnl::lrn_backward>( OneDNNHandlerNoCachingT<T, dnnl::lrn_forward, dnnl::lrn_backward>(
mkldnn_engine, cpu_place) { onednn_engine, cpu_place) {
const int n = ctx.Attr<int>("n"); const int n = ctx.Attr<int>("n");
// MKL-DNN implements LRN in a caffe way: // MKL-DNN implements LRN in a caffe way:
// http://caffe.berkeleyvision.org/tutorial/layers/lrn.html // http://caffe.berkeleyvision.org/tutorial/layers/lrn.html
...@@ -56,14 +57,14 @@ class LRNOneDNNHandler ...@@ -56,14 +57,14 @@ class LRNOneDNNHandler
} }
LRNOneDNNHandler(const framework::ExecutionContext& ctx, LRNOneDNNHandler(const framework::ExecutionContext& ctx,
const dnnl::engine mkldnn_engine, const dnnl::engine onednn_engine,
platform::Place cpu_place, platform::Place cpu_place,
const phi::DenseTensor* in_x, const phi::DenseTensor* in_x,
const phi::DenseTensor* out_grad, const phi::DenseTensor* out_grad,
phi::DenseTensor* in_x_grad) phi::DenseTensor* in_x_grad)
: phi::funcs:: : phi::funcs::
OneDNNHandlerNoCachingT<T, dnnl::lrn_forward, dnnl::lrn_backward>( OneDNNHandlerNoCachingT<T, dnnl::lrn_forward, dnnl::lrn_backward>(
mkldnn_engine, cpu_place) { onednn_engine, cpu_place) {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
ctx.Attr<bool>("is_test"), ctx.Attr<bool>("is_test"),
false, false,
...@@ -125,13 +126,13 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -125,13 +126,13 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
paddle::platform::errors::PreconditionNotMet( paddle::platform::errors::PreconditionNotMet(
"Operator DNNL LRN must use CPUPlace")); "Operator DNNL LRN must use CPUPlace"));
auto& dev_ctx = ctx.template device_context<OneDNNContext>(); auto& dev_ctx = ctx.template device_context<OneDNNContext>();
const auto& mkldnn_engine = dev_ctx.GetEngine(); const auto& onednn_engine = dev_ctx.GetEngine();
auto x = ctx.Input<phi::DenseTensor>("X"); auto x = ctx.Input<phi::DenseTensor>("X");
auto out = ctx.Output<phi::DenseTensor>("Out"); auto out = ctx.Output<phi::DenseTensor>("Out");
auto mid = ctx.Output<phi::DenseTensor>("MidOut"); auto mid = ctx.Output<phi::DenseTensor>("MidOut");
LRNOneDNNHandler<T> handler(ctx, mkldnn_engine, ctx.GetPlace(), x); LRNOneDNNHandler<T> handler(ctx, onednn_engine, ctx.GetPlace(), x);
auto src_memory = handler.AcquireSrcMemory(x); auto src_memory = handler.AcquireSrcMemory(x);
auto dst_memory = handler.AcquireDstMemory(out); auto dst_memory = handler.AcquireDstMemory(out);
...@@ -179,10 +180,10 @@ class LRNMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> { ...@@ -179,10 +180,10 @@ class LRNMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
auto in_x_grad = ctx.Output<phi::DenseTensor>(framework::GradVarName("X")); auto in_x_grad = ctx.Output<phi::DenseTensor>(framework::GradVarName("X"));
auto& dev_ctx = ctx.template device_context<OneDNNContext>(); auto& dev_ctx = ctx.template device_context<OneDNNContext>();
const auto& mkldnn_engine = dev_ctx.GetEngine(); const auto& onednn_engine = dev_ctx.GetEngine();
LRNOneDNNHandler<T> handler( LRNOneDNNHandler<T> handler(
ctx, mkldnn_engine, ctx.GetPlace(), in_x, out_grad, in_x_grad); ctx, onednn_engine, ctx.GetPlace(), in_x, out_grad, in_x_grad);
auto src_memory = handler.AcquireSrcMemory(in_x); auto src_memory = handler.AcquireSrcMemory(in_x);
auto workspace = handler.AcquireBackwardWorkspaceMemory(mid); auto workspace = handler.AcquireBackwardWorkspaceMemory(mid);
...@@ -207,11 +208,8 @@ class LRNMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> { ...@@ -207,11 +208,8 @@ class LRNMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_KERNEL(lrn, REGISTER_OP_KERNEL(lrn, MKLDNN, phi::CPUPlace, ops::LRNMKLDNNOpKernel<float>);
MKLDNN,
paddle::platform::CPUPlace,
ops::LRNMKLDNNOpKernel<float>);
REGISTER_OP_KERNEL(lrn_grad, REGISTER_OP_KERNEL(lrn_grad,
MKLDNN, MKLDNN,
paddle::platform::CPUPlace, phi::CPUPlace,
ops::LRNMKLDNNGradOpKernel<float>); ops::LRNMKLDNNGradOpKernel<float>);
...@@ -14,14 +14,13 @@ limitations under the License. */ ...@@ -14,14 +14,13 @@ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/mkldnn_reuse.h" #include "paddle/phi/backends/onednn/onednn_reuse.h"
#include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/blas/blas.h"
namespace { namespace {
using dnnl::memory; using dnnl::memory;
using paddle::framework::ExecutionContext; using paddle::framework::ExecutionContext;
using paddle::framework::GradVarName; using paddle::framework::GradVarName;
using paddle::platform::MatMulV2MKLDNNHandler;
using phi::OneDNNContext; using phi::OneDNNContext;
using phi::vectorize; using phi::vectorize;
using phi::funcs::OneDNNGetDataType; using phi::funcs::OneDNNGetDataType;
...@@ -82,6 +81,239 @@ phi::DDim GetDimForInput(const ExecutionContext &ctx, std::string input_name) { ...@@ -82,6 +81,239 @@ phi::DDim GetDimForInput(const ExecutionContext &ctx, std::string input_name) {
return input_dims; return input_dims;
} }
template <typename XT, typename YT, typename OT>
class MatMulV2MKLDNNHandler
: public phi::funcs::OneDNNHandlerNoCachingT<XT, dnnl::matmul> {
public:
MatMulV2MKLDNNHandler(const ExecutionContext &ctx,
const dnnl::engine engine,
paddle::platform::Place cpu_place,
const std::vector<int64_t> &x_org_dims,
bool trans_x,
const std::vector<int64_t> &y_org_dims,
bool trans_y,
bool is_output_fused,
const std::vector<int64_t> &x_strides_override,
const std::vector<int64_t> &y_strides_override)
: phi::funcs::OneDNNHandlerNoCachingT<XT, dnnl::matmul>(engine,
cpu_place) {
// M X K * K X N
std::vector<int64_t> x_dims(x_org_dims);
std::vector<int64_t> y_dims(y_org_dims);
const int MB_idx = x_dims.size() - 3;
const int H_idx = x_dims.size() - 2;
const int W_idx = x_dims.size() - 1;
if (trans_x) std::swap(x_dims[H_idx], x_dims[W_idx]);
if (trans_y) std::swap(y_dims[H_idx], y_dims[W_idx]);
const memory::dim M = x_dims[H_idx];
const memory::dim K = x_dims[W_idx];
const memory::dim N = y_dims[W_idx];
std::vector<int64_t> x_strides(x_dims.size() - 3, 1);
std::vector<int64_t> y_strides(x_dims.size() - 3, 1);
std::vector<int64_t> out_strides(x_dims.size() - 3, 1);
std::vector<int64_t> out_ddims(x_dims.size() - 3, 1);
x_strides.reserve(x_dims.size());
y_strides.reserve(x_dims.size());
out_strides.reserve(x_dims.size());
if (!x_strides_override.empty()) {
x_strides = x_strides_override;
} else {
if (!trans_x) {
x_strides.insert(x_strides.end(), {M * K, K, 1});
} else {
x_strides.insert(x_strides.end(), {M * K, 1, M});
}
}
if (!y_strides_override.empty()) {
y_strides = y_strides_override;
} else {
if (!trans_y) {
y_strides.insert(y_strides.end(), {N * K, N, 1});
} else {
y_strides.insert(y_strides.end(), {N * K, 1, K});
}
}
out_strides.insert(out_strides.end(), {M * N, N, 1});
out_ddims.insert(out_ddims.end(),
{std::max(x_dims[MB_idx], y_dims[MB_idx]), M, N});
for (int i = x_dims.size() - 4; i >= 0; --i) {
out_ddims[i] = std::max(x_dims[i], y_dims[i]);
if (x_strides_override.empty()) {
x_strides[i] = x_dims[i + 1] * x_strides[i + 1];
}
if (y_strides_override.empty()) {
y_strides[i] = y_dims[i + 1] * y_strides[i + 1];
}
out_strides[i] = out_ddims[i + 1] * out_strides[i + 1];
}
// TODO(jczaja): Why not for int8??
if (!phi::funcs::is_int8<OT>() && is_output_fused) {
out_strides = FakeTransposeStrides(out_ddims);
}
auto x_md =
memory::desc(x_dims, phi::funcs::OneDNNGetDataType<XT>(), x_strides);
auto y_md =
memory::desc(y_dims, phi::funcs::OneDNNGetDataType<YT>(), y_strides);
auto out_md = memory::desc(
out_ddims, phi::funcs::OneDNNGetDataType<OT>(), out_strides);
const dnnl::primitive_attr matmul_attrs = CreateMatmulAttrs(ctx);
this->AcquireForwardPrimitiveDescriptor(matmul_attrs, x_md, y_md, out_md);
}
void AppendActivation(const ExecutionContext &ctx,
dnnl::post_ops &post_ops, // NOLINT
float activation_scale = 1.0f) {
const auto invalid_attribute =
ctx.HasAttr("fuse_activation")
? ctx.Attr<std::string>("fuse_activation").empty()
: true;
if (invalid_attribute) return;
const auto fuse_activation = ctx.Attr<std::string>("fuse_activation");
const auto fuse_alpha =
ctx.HasAttr("fuse_alpha") ? ctx.Attr<float>("fuse_alpha") : 0.0f;
const auto fuse_beta =
ctx.HasAttr("fuse_beta") ? ctx.Attr<float>("fuse_beta") : 0.0f;
if (fuse_activation == "hard_sigmoid") {
post_ops.append_eltwise(activation_scale,
dnnl::algorithm::eltwise_linear,
fuse_alpha,
fuse_beta);
post_ops.append_eltwise(
activation_scale, dnnl::algorithm::eltwise_clip, 0.0f, 1.0f);
} else {
const std::unordered_map<std::string, dnnl::algorithm> activation_map = {
{"abs", dnnl::algorithm::eltwise_abs},
{"clip", dnnl::algorithm::eltwise_clip},
{"gelu", dnnl::algorithm::eltwise_gelu_erf},
{"gelu_erf", dnnl::algorithm::eltwise_gelu_erf},
{"gelu_tanh", dnnl::algorithm::eltwise_gelu_tanh},
{"hard_swish", dnnl::algorithm::eltwise_hardswish},
{"leaky_relu", dnnl::algorithm::eltwise_relu},
{"mish", dnnl::algorithm::eltwise_mish},
{"relu", dnnl::algorithm::eltwise_relu},
{"relu6", dnnl::algorithm::eltwise_bounded_relu},
{"sigmoid", dnnl::algorithm::eltwise_logistic},
{"sqrt", dnnl::algorithm::eltwise_sqrt},
{"swish", dnnl::algorithm::eltwise_swish},
{"tanh", dnnl::algorithm::eltwise_tanh}};
const auto &activation_type = activation_map.find(fuse_activation);
PADDLE_ENFORCE_NE(
activation_type,
activation_map.end(),
phi::errors::InvalidArgument(
"Activation '%s' not found in oneDNN algorithms mapper",
fuse_activation));
post_ops.append_eltwise(
activation_scale, activation_type->second, fuse_alpha, fuse_beta);
}
}
float ComputeOutputScale(const ExecutionContext &ctx) {
float alpha = ctx.HasAttr("alpha") ? ctx.Attr<float>("alpha") : 1.0f;
if (ctx.HasAttr("Scale_x") && ctx.HasAttr("Scale_y") &&
ctx.HasAttr("Scale_out")) {
float scale_x = ctx.Attr<float>("Scale_x");
float scale_y = ctx.Attr<float>("Scale_y");
bool force_fp32_out = ctx.HasAttr("force_fp32_output")
? ctx.Attr<bool>("force_fp32_output")
: false;
float scale_out = force_fp32_out ? 1.f : ctx.Attr<float>("Scale_out");
alpha *= scale_out / (scale_x * scale_y);
}
return alpha;
}
dnnl::primitive_attr CreateMatmulAttrs(const ExecutionContext &ctx) {
dnnl::primitive_attr matmul_attrs;
dnnl::post_ops post_operations;
float scale_out = ComputeOutputScale(ctx);
if (scale_out != 1.0f) {
matmul_attrs.set_output_scales(0, {scale_out});
}
if (ctx.HasInput("ResidualData")) {
auto *residual_data = ctx.Input<phi::DenseTensor>("ResidualData");
auto residual_data_tz = phi::vectorize(residual_data->dims());
auto residual_data_md = memory::desc(residual_data_tz,
phi::funcs::OneDNNGetDataType<OT>(),
dnnl::memory::format_tag::any);
post_operations.append_binary(dnnl::algorithm::binary_add,
residual_data_md);
if (ctx.HasAttr("Scale_in_eltwise")) {
float sum_scale = scale_out / ctx.Attr<float>("Scale_in_eltwise");
post_operations.append_sum(sum_scale);
}
}
AppendActivation(ctx, post_operations);
if (ctx.HasAttr("fused_output_scale")) {
float scale_alpha = ctx.Attr<float>("fused_output_scale");
post_operations.append_eltwise(
1.0, dnnl::algorithm::eltwise_linear, scale_alpha, 0.0f);
}
matmul_attrs.set_post_ops(post_operations);
return matmul_attrs;
}
std::vector<int64_t> FakeTransposeStrides(
const std::vector<int64_t> &matmul_out_dims) const {
// fuse matmul_v2 + transpose + reshape guarantees that output is 4D and
// transpose axis are: {0, 2, 1, 3}
std::vector<int64_t> transpose_axis = {0, 2, 1, 3};
std::vector<int64_t> fake_strides(transpose_axis.size());
int ndims = static_cast<int>(transpose_axis.size());
int total_stride = 1;
for (int i = ndims - 1; i >= 0; --i) {
fake_strides[transpose_axis[i]] = total_stride;
total_stride *= matmul_out_dims[transpose_axis[i]];
}
return fake_strides;
}
std::shared_ptr<memory> AcquireWeightsMemory(const phi::DenseTensor *input) {
const YT *input_data = input->data<YT>();
return this->AcquireMemoryFromPrimitive(
this->fwd_pd_->weights_desc(),
phi::funcs::to_void_cast<YT>(input_data));
}
std::shared_ptr<dnnl::memory> AcquireDstMemory(phi::DenseTensor *output) {
// We cannot use base AcquireDstMemory as it makes an allocation request
// base on DST memory primitive size. This is fine in general, but in MatMul
// we have primitive that covers only one batch of Data and then shift
// pointer for every new batch. Hence phi::DenseTensor size is bigger that
// dst memory primitive size. So would we request less memory that is there
// and it triggers an assertion. So as there is no 'any' format here we can
// leave default size of phi::DenseTensor as computed in ComputeInferShape
OT *ptr = output->mutable_data<OT>(this->place_);
return this->AcquireMemoryFromPrimitive(this->fwd_pd_->dst_desc(), ptr);
}
};
template <typename XT, typename YT, typename OT> template <typename XT, typename YT, typename OT>
class MatMulMKLDNNHandler class MatMulMKLDNNHandler
: public phi::funcs::OneDNNHandlerNoCachingT<XT, dnnl::matmul> { : public phi::funcs::OneDNNHandlerNoCachingT<XT, dnnl::matmul> {
...@@ -696,7 +928,7 @@ class MatMulGradMKLDNNKernel : public paddle::framework::OpKernel<T> { ...@@ -696,7 +928,7 @@ class MatMulGradMKLDNNKernel : public paddle::framework::OpKernel<T> {
REGISTER_OP_KERNEL(matmul, REGISTER_OP_KERNEL(matmul,
MKLDNN, MKLDNN,
::paddle::platform::CPUPlace, ::phi::CPUPlace,
MatMulMKLDNNKernel<float>, MatMulMKLDNNKernel<float>,
MatMulMKLDNNKernel<paddle::platform::bfloat16>, MatMulMKLDNNKernel<paddle::platform::bfloat16>,
MatMulMKLDNNKernel<int8_t>, MatMulMKLDNNKernel<int8_t>,
...@@ -704,6 +936,6 @@ REGISTER_OP_KERNEL(matmul, ...@@ -704,6 +936,6 @@ REGISTER_OP_KERNEL(matmul,
REGISTER_OP_KERNEL(matmul_grad, REGISTER_OP_KERNEL(matmul_grad,
MKLDNN, MKLDNN,
::paddle::platform::CPUPlace, ::phi::CPUPlace,
MatMulGradMKLDNNKernel<float>, MatMulGradMKLDNNKernel<float>,
MatMulGradMKLDNNKernel<paddle::platform::bfloat16>); MatMulGradMKLDNNKernel<paddle::platform::bfloat16>);
...@@ -14,10 +14,10 @@ limitations under the License. */ ...@@ -14,10 +14,10 @@ limitations under the License. */
#include "paddle/fluid/operators/quantize_op.h" #include "paddle/fluid/operators/quantize_op.h"
#include "paddle/fluid/framework/data_layout_transform.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/mkldnn_helper.h" #include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/platform/mkldnn_reuse.h" #include "paddle/phi/backends/onednn/onednn_reuse.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -106,5 +106,5 @@ namespace ops = paddle::operators; ...@@ -106,5 +106,5 @@ namespace ops = paddle::operators;
REGISTER_OP_KERNEL(quantize, REGISTER_OP_KERNEL(quantize,
MKLDNN, MKLDNN,
::paddle::platform::CPUPlace, ::phi::CPUPlace,
ops::QuantOpKernel<float>); ops::QuantOpKernel<float>);
...@@ -14,7 +14,6 @@ limitations under the License. */ ...@@ -14,7 +14,6 @@ limitations under the License. */
#include <iterator> // NOLINT #include <iterator> // NOLINT
#include "dnnl.hpp" // NOLINT #include "dnnl.hpp" // NOLINT
#include "paddle/fluid/framework/data_layout_transform.h"
#include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/operators/requantize_op.h" #include "paddle/fluid/operators/requantize_op.h"
#include "paddle/phi/backends/onednn/onednn_helper.h" #include "paddle/phi/backends/onednn/onednn_helper.h"
...@@ -115,7 +114,7 @@ namespace ops = paddle::operators; ...@@ -115,7 +114,7 @@ namespace ops = paddle::operators;
REGISTER_OP_KERNEL(requantize, REGISTER_OP_KERNEL(requantize,
MKLDNN, MKLDNN,
::paddle::platform::CPUPlace, ::phi::CPUPlace,
ops::ReQuantOpKernel<int8_t>, ops::ReQuantOpKernel<int8_t>,
ops::ReQuantOpKernel<uint8_t>, ops::ReQuantOpKernel<uint8_t>,
ops::ReQuantOpKernel<paddle::platform::bfloat16>); ops::ReQuantOpKernel<paddle::platform::bfloat16>);
...@@ -12,9 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,9 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/flatten_op.h" #include "paddle/fluid/operators/flatten_op.h"
#include "paddle/fluid/operators/squeeze_op.h" #include "paddle/fluid/operators/squeeze_op.h"
#include "paddle/fluid/platform/mkldnn_reuse.h" #include "paddle/phi/backends/onednn/onednn_reuse.h"
namespace { namespace {
enum class ReshapeKernelOpName { enum class ReshapeKernelOpName {
...@@ -357,7 +358,7 @@ namespace ops = paddle::operators; ...@@ -357,7 +358,7 @@ namespace ops = paddle::operators;
REGISTER_OP_KERNEL( REGISTER_OP_KERNEL(
squeeze, squeeze,
MKLDNN, MKLDNN,
paddle::platform::CPUPlace, phi::CPUPlace,
ops::ReshapeMKLDNNKernel<float, ReshapeKernelOpName::squeeze>, ops::ReshapeMKLDNNKernel<float, ReshapeKernelOpName::squeeze>,
ops::ReshapeMKLDNNKernel<paddle::platform::bfloat16, ops::ReshapeMKLDNNKernel<paddle::platform::bfloat16,
ReshapeKernelOpName::squeeze>); ReshapeKernelOpName::squeeze>);
...@@ -365,7 +366,7 @@ REGISTER_OP_KERNEL( ...@@ -365,7 +366,7 @@ REGISTER_OP_KERNEL(
REGISTER_OP_KERNEL( REGISTER_OP_KERNEL(
squeeze_grad, squeeze_grad,
MKLDNN, MKLDNN,
paddle::platform::CPUPlace, phi::CPUPlace,
ops::ReshapeGradMKLDNNKernel<float, ReshapeKernelOpName::squeeze>, ops::ReshapeGradMKLDNNKernel<float, ReshapeKernelOpName::squeeze>,
ops::ReshapeGradMKLDNNKernel<paddle::platform::bfloat16, ops::ReshapeGradMKLDNNKernel<paddle::platform::bfloat16,
ReshapeKernelOpName::squeeze>); ReshapeKernelOpName::squeeze>);
...@@ -373,7 +374,7 @@ REGISTER_OP_KERNEL( ...@@ -373,7 +374,7 @@ REGISTER_OP_KERNEL(
REGISTER_OP_KERNEL( REGISTER_OP_KERNEL(
reshape, reshape,
MKLDNN, MKLDNN,
paddle::platform::CPUPlace, phi::CPUPlace,
ops::ReshapeMKLDNNKernel<float, ReshapeKernelOpName::reshape>, ops::ReshapeMKLDNNKernel<float, ReshapeKernelOpName::reshape>,
ops::ReshapeMKLDNNKernel<paddle::platform::bfloat16, ops::ReshapeMKLDNNKernel<paddle::platform::bfloat16,
ReshapeKernelOpName::reshape>); ReshapeKernelOpName::reshape>);
...@@ -381,7 +382,7 @@ REGISTER_OP_KERNEL( ...@@ -381,7 +382,7 @@ REGISTER_OP_KERNEL(
REGISTER_OP_KERNEL( REGISTER_OP_KERNEL(
reshape_grad, reshape_grad,
MKLDNN, MKLDNN,
paddle::platform::CPUPlace, phi::CPUPlace,
ops::ReshapeGradMKLDNNKernel<float, ReshapeKernelOpName::reshape>, ops::ReshapeGradMKLDNNKernel<float, ReshapeKernelOpName::reshape>,
ops::ReshapeGradMKLDNNKernel<paddle::platform::bfloat16, ops::ReshapeGradMKLDNNKernel<paddle::platform::bfloat16,
ReshapeKernelOpName::reshape>); ReshapeKernelOpName::reshape>);
...@@ -389,7 +390,7 @@ REGISTER_OP_KERNEL( ...@@ -389,7 +390,7 @@ REGISTER_OP_KERNEL(
REGISTER_OP_KERNEL( REGISTER_OP_KERNEL(
reshape2_grad, reshape2_grad,
MKLDNN, MKLDNN,
paddle::platform::CPUPlace, phi::CPUPlace,
ops::ReshapeGradMKLDNNKernel<float, ReshapeKernelOpName::reshape2>, ops::ReshapeGradMKLDNNKernel<float, ReshapeKernelOpName::reshape2>,
ops::ReshapeGradMKLDNNKernel<paddle::platform::bfloat16, ops::ReshapeGradMKLDNNKernel<paddle::platform::bfloat16,
ReshapeKernelOpName::reshape2>); ReshapeKernelOpName::reshape2>);
...@@ -397,7 +398,7 @@ REGISTER_OP_KERNEL( ...@@ -397,7 +398,7 @@ REGISTER_OP_KERNEL(
REGISTER_OP_KERNEL( REGISTER_OP_KERNEL(
flatten, flatten,
MKLDNN, MKLDNN,
paddle::platform::CPUPlace, phi::CPUPlace,
ops::ReshapeMKLDNNKernel<float, ReshapeKernelOpName::flatten>, ops::ReshapeMKLDNNKernel<float, ReshapeKernelOpName::flatten>,
ops::ReshapeMKLDNNKernel<paddle::platform::bfloat16, ops::ReshapeMKLDNNKernel<paddle::platform::bfloat16,
ReshapeKernelOpName::flatten>); ReshapeKernelOpName::flatten>);
...@@ -405,7 +406,7 @@ REGISTER_OP_KERNEL( ...@@ -405,7 +406,7 @@ REGISTER_OP_KERNEL(
REGISTER_OP_KERNEL( REGISTER_OP_KERNEL(
flatten_grad, flatten_grad,
MKLDNN, MKLDNN,
paddle::platform::CPUPlace, phi::CPUPlace,
ops::ReshapeGradMKLDNNKernel<float, ReshapeKernelOpName::flatten>, ops::ReshapeGradMKLDNNKernel<float, ReshapeKernelOpName::flatten>,
ops::ReshapeGradMKLDNNKernel<paddle::platform::bfloat16, ops::ReshapeGradMKLDNNKernel<paddle::platform::bfloat16,
ReshapeKernelOpName::flatten>); ReshapeKernelOpName::flatten>);
...@@ -413,7 +414,7 @@ REGISTER_OP_KERNEL( ...@@ -413,7 +414,7 @@ REGISTER_OP_KERNEL(
REGISTER_OP_KERNEL( REGISTER_OP_KERNEL(
flatten2, flatten2,
MKLDNN, MKLDNN,
paddle::platform::CPUPlace, phi::CPUPlace,
ops::ReshapeMKLDNNKernel<float, ReshapeKernelOpName::flatten2>, ops::ReshapeMKLDNNKernel<float, ReshapeKernelOpName::flatten2>,
ops::ReshapeMKLDNNKernel<paddle::platform::bfloat16, ops::ReshapeMKLDNNKernel<paddle::platform::bfloat16,
ReshapeKernelOpName::flatten2>); ReshapeKernelOpName::flatten2>);
...@@ -421,7 +422,7 @@ REGISTER_OP_KERNEL( ...@@ -421,7 +422,7 @@ REGISTER_OP_KERNEL(
REGISTER_OP_KERNEL( REGISTER_OP_KERNEL(
flatten2_grad, flatten2_grad,
MKLDNN, MKLDNN,
paddle::platform::CPUPlace, phi::CPUPlace,
ops::ReshapeGradMKLDNNKernel<float, ReshapeKernelOpName::flatten2>, ops::ReshapeGradMKLDNNKernel<float, ReshapeKernelOpName::flatten2>,
ops::ReshapeGradMKLDNNKernel<paddle::platform::bfloat16, ops::ReshapeGradMKLDNNKernel<paddle::platform::bfloat16,
ReshapeKernelOpName::flatten2>); ReshapeKernelOpName::flatten2>);
...@@ -12,7 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/platform/mkldnn_reuse.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/phi/backends/onednn/onednn_reuse.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -38,7 +39,7 @@ class ShuffleChannelMKLDNNKernel : public framework::OpKernel<T> { ...@@ -38,7 +39,7 @@ class ShuffleChannelMKLDNNKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
const auto& dev_ctx = ctx.template device_context<phi::OneDNNContext>(); const auto& dev_ctx = ctx.template device_context<phi::OneDNNContext>();
const auto& mkldnn_engine = dev_ctx.GetEngine(); const auto& onednn_engine = dev_ctx.GetEngine();
const auto* x = ctx.Input<phi::DenseTensor>("X"); const auto* x = ctx.Input<phi::DenseTensor>("X");
auto* out = ctx.Output<phi::DenseTensor>("Out"); auto* out = ctx.Output<phi::DenseTensor>("Out");
...@@ -47,7 +48,7 @@ class ShuffleChannelMKLDNNKernel : public framework::OpKernel<T> { ...@@ -47,7 +48,7 @@ class ShuffleChannelMKLDNNKernel : public framework::OpKernel<T> {
const int group = x->dims()[1] / ctx.Attr<int>("group"); const int group = x->dims()[1] / ctx.Attr<int>("group");
ShuffleChannelMKLDNNHandler<T> handler( ShuffleChannelMKLDNNHandler<T> handler(
x, group, mkldnn_engine, ctx.GetPlace()); x, group, onednn_engine, ctx.GetPlace());
auto src_memory_p = handler.AcquireSrcMemory(x); auto src_memory_p = handler.AcquireSrcMemory(x);
auto dst_memory_p = handler.AcquireDstMemory(out); auto dst_memory_p = handler.AcquireDstMemory(out);
...@@ -69,6 +70,6 @@ class ShuffleChannelMKLDNNKernel : public framework::OpKernel<T> { ...@@ -69,6 +70,6 @@ class ShuffleChannelMKLDNNKernel : public framework::OpKernel<T> {
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_KERNEL(shuffle_channel, REGISTER_OP_KERNEL(shuffle_channel,
MKLDNN, MKLDNN,
paddle::platform::CPUPlace, phi::CPUPlace,
ops::ShuffleChannelMKLDNNKernel<float>, ops::ShuffleChannelMKLDNNKernel<float>,
ops::ShuffleChannelMKLDNNKernel<paddle::platform::bfloat16>); ops::ShuffleChannelMKLDNNKernel<paddle::platform::bfloat16>);
...@@ -22,9 +22,8 @@ ...@@ -22,9 +22,8 @@
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/platform/device_context.h" #include "paddle/phi/common/place.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/phi/core/enforce.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
USE_OP_ITSELF(elementwise_add); USE_OP_ITSELF(elementwise_add);
...@@ -51,7 +50,7 @@ class CacheTester { ...@@ -51,7 +50,7 @@ class CacheTester {
CacheTester() { CacheTester() {
// Clear oneDNN cache // Clear oneDNN cache
auto &pool = platform::DeviceContextPool::Instance(); auto &pool = platform::DeviceContextPool::Instance();
platform::CPUPlace place; phi::CPUPlace place;
onednn_dev_ctx_ = dynamic_cast<phi::OneDNNContext *>(pool.Get(place)); onednn_dev_ctx_ = dynamic_cast<phi::OneDNNContext *>(pool.Get(place));
onednn_dev_ctx_->ResetBlobMap(nullptr); onednn_dev_ctx_->ResetBlobMap(nullptr);
} }
...@@ -140,7 +139,7 @@ void RunOperator(const platform::Place &place, ...@@ -140,7 +139,7 @@ void RunOperator(const platform::Place &place,
TEST(test_conv2d_reuse_cache, cpu_place) { TEST(test_conv2d_reuse_cache, cpu_place) {
framework::DDim dims({1, 16, 32, 64}); framework::DDim dims({1, 16, 32, 64});
platform::CPUPlace p; phi::CPUPlace p;
CacheTester ct; CacheTester ct;
RunOperator<float>(p, "conv2d", dims, "input_signal"); RunOperator<float>(p, "conv2d", dims, "input_signal");
RunOperator<float>(p, "conv2d", dims, "input_signal"); RunOperator<float>(p, "conv2d", dims, "input_signal");
...@@ -152,7 +151,7 @@ TEST(test_conv2d_reuse_cache, cpu_place) { ...@@ -152,7 +151,7 @@ TEST(test_conv2d_reuse_cache, cpu_place) {
TEST(test_conv2d_noreuse_cache, cpu_place) { TEST(test_conv2d_noreuse_cache, cpu_place) {
framework::DDim dims({1, 16, 32, 64}); framework::DDim dims({1, 16, 32, 64});
platform::CPUPlace p; phi::CPUPlace p;
CacheTester ct; CacheTester ct;
RunOperator<float>(p, "conv2d", dims, "input_signal"); RunOperator<float>(p, "conv2d", dims, "input_signal");
RunOperator<float>(p, "conv2d", dims, "input_signal2"); RunOperator<float>(p, "conv2d", dims, "input_signal2");
......
...@@ -22,9 +22,8 @@ ...@@ -22,9 +22,8 @@
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/platform/device_context.h" #include "paddle/phi/common/place.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/phi/core/enforce.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
USE_OP_ITSELF(elementwise_add); USE_OP_ITSELF(elementwise_add);
...@@ -137,13 +136,13 @@ bool TestMain(const platform::Place &place, ...@@ -137,13 +136,13 @@ bool TestMain(const platform::Place &place,
TEST(test_softmax_inplace, cpu_place) { TEST(test_softmax_inplace, cpu_place) {
framework::DDim dims({32, 64}); framework::DDim dims({32, 64});
platform::CPUPlace p; phi::CPUPlace p;
ASSERT_TRUE(TestMain<float>(p, "softmax", dims, 1)); ASSERT_TRUE(TestMain<float>(p, "softmax", dims, 1));
} }
TEST(test_relu_inplace, cpu_place) { TEST(test_relu_inplace, cpu_place) {
framework::DDim dims({1, 12, 20, 20}); framework::DDim dims({1, 12, 20, 20});
platform::CPUPlace p; phi::CPUPlace p;
ASSERT_TRUE(TestMain<float>(p, "relu", dims, 1)); ASSERT_TRUE(TestMain<float>(p, "relu", dims, 1));
} }
......
...@@ -22,9 +22,8 @@ ...@@ -22,9 +22,8 @@
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/platform/device_context.h" #include "paddle/phi/common/place.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/phi/core/enforce.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
USE_OP_ITSELF(pool2d); USE_OP_ITSELF(pool2d);
...@@ -53,7 +52,7 @@ struct InputVars { ...@@ -53,7 +52,7 @@ struct InputVars {
TEST(test_pool2d_transpose_nhwc, cpu_place) { TEST(test_pool2d_transpose_nhwc, cpu_place) {
framework::DDim dims({1, 4, 8, 512}); // NHWC shape framework::DDim dims({1, 4, 8, 512}); // NHWC shape
framework::DDim expected_dims({1, 7, 512, 3}); // NHWC expected shape framework::DDim expected_dims({1, 7, 512, 3}); // NHWC expected shape
platform::CPUPlace p; phi::CPUPlace p;
framework::Scope scope; framework::Scope scope;
InputVars input_name = {"x", scope.Var("x")->GetMutable<phi::DenseTensor>()}; InputVars input_name = {"x", scope.Var("x")->GetMutable<phi::DenseTensor>()};
...@@ -109,7 +108,7 @@ TEST(test_pool2d_transpose_nhwc, cpu_place) { ...@@ -109,7 +108,7 @@ TEST(test_pool2d_transpose_nhwc, cpu_place) {
TEST(test_pool2d_relu_relu_nhwc, cpu_place) { TEST(test_pool2d_relu_relu_nhwc, cpu_place) {
framework::DDim dims({1, 4, 8, 512}); // NHWC shape framework::DDim dims({1, 4, 8, 512}); // NHWC shape
framework::DDim expected_dims({1, 512, 3, 7}); // NCHW expected shape framework::DDim expected_dims({1, 512, 3, 7}); // NCHW expected shape
platform::CPUPlace p; phi::CPUPlace p;
framework::Scope scope; framework::Scope scope;
InputVars input_name = {"x", scope.Var("x")->GetMutable<phi::DenseTensor>()}; InputVars input_name = {"x", scope.Var("x")->GetMutable<phi::DenseTensor>()};
...@@ -172,7 +171,7 @@ TEST(test_pool2d_relu_relu_nhwc, cpu_place) { ...@@ -172,7 +171,7 @@ TEST(test_pool2d_relu_relu_nhwc, cpu_place) {
TEST(test_pool2d_shape_nhwc, cpu_place) { TEST(test_pool2d_shape_nhwc, cpu_place) {
framework::DDim dims({1, 4, 8, 512}); // NHWC shape framework::DDim dims({1, 4, 8, 512}); // NHWC shape
std::vector<int32_t> expected_dims{1, 3, 7, 512}; // NHWC expected shape std::vector<int32_t> expected_dims{1, 3, 7, 512}; // NHWC expected shape
platform::CPUPlace p; phi::CPUPlace p;
framework::Scope scope; framework::Scope scope;
InputVars input_name = {"x", scope.Var("x")->GetMutable<phi::DenseTensor>()}; InputVars input_name = {"x", scope.Var("x")->GetMutable<phi::DenseTensor>()};
...@@ -227,7 +226,7 @@ TEST(test_pool2d_shape_nhwc, cpu_place) { ...@@ -227,7 +226,7 @@ TEST(test_pool2d_shape_nhwc, cpu_place) {
TEST(test_pool2d_crop_nhwc, cpu_place) { TEST(test_pool2d_crop_nhwc, cpu_place) {
framework::DDim dims({1, 4, 8, 512}); // NHWC shape framework::DDim dims({1, 4, 8, 512}); // NHWC shape
framework::DDim expected_dims({1, 3, 7, 512}); // NCHW expected shape framework::DDim expected_dims({1, 3, 7, 512}); // NCHW expected shape
platform::CPUPlace p; phi::CPUPlace p;
framework::Scope scope; framework::Scope scope;
InputVars input_name = {"x", scope.Var("x")->GetMutable<phi::DenseTensor>()}; InputVars input_name = {"x", scope.Var("x")->GetMutable<phi::DenseTensor>()};
......
...@@ -12,10 +12,9 @@ ...@@ -12,10 +12,9 @@
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/framework/data_layout_transform.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/memory/malloc.h"
#include "paddle/fluid/platform/mkldnn_reuse.h" #include "paddle/phi/backends/onednn/onednn_reuse.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -166,10 +165,10 @@ namespace ops = paddle::operators; ...@@ -166,10 +165,10 @@ namespace ops = paddle::operators;
REGISTER_OP_KERNEL(transpose, REGISTER_OP_KERNEL(transpose,
MKLDNN, MKLDNN,
::paddle::platform::CPUPlace, ::phi::CPUPlace,
ops::TransposeMKLDNNOpKernel<float>); ops::TransposeMKLDNNOpKernel<float>);
REGISTER_OP_KERNEL(transpose_grad, REGISTER_OP_KERNEL(transpose_grad,
MKLDNN, MKLDNN,
::paddle::platform::CPUPlace, ::phi::CPUPlace,
ops::TransposeMKLDNNGradOpKernel<float>); ops::TransposeMKLDNNGradOpKernel<float>);
...@@ -12,9 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,9 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
// NOTE(Ruibiao): Difficult to remove code from this header file because too
// many files rely on it through "mkldnn_reuse.h"
#pragma once #pragma once
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
......
...@@ -23,11 +23,10 @@ limitations under the License. */ ...@@ -23,11 +23,10 @@ limitations under the License. */
#include "dnnl.hpp" // NOLINT #include "dnnl.hpp" // NOLINT
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/phi/backends/onednn/onednn_helper.h" #include "paddle/phi/backends/onednn/onednn_helper.h"
#include "paddle/phi/common/place.h"
namespace paddle { namespace paddle {
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
using OneDNNMemoryFormat = dnnl::memory::format_tag;
using phi::OneDNNContext; using phi::OneDNNContext;
#endif #endif
namespace platform { namespace platform {
......
/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <algorithm>
#include <memory>
#include <sstream>
#include <string>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/data_layout_transform.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/pool_op.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/phi/backends/onednn/onednn_reuse.h"
namespace paddle {
namespace platform {
using memory = dnnl::memory;
static void AppendActivation(const framework::ExecutionContext& ctx,
dnnl::post_ops& post_ops, // NOLINT
float activation_scale = 1.0f) {
const auto invalid_attribute =
ctx.HasAttr("fuse_activation")
? ctx.Attr<std::string>("fuse_activation").empty()
: true;
if (invalid_attribute) return;
const auto fuse_activation = ctx.Attr<std::string>("fuse_activation");
const auto fuse_alpha =
ctx.HasAttr("fuse_alpha") ? ctx.Attr<float>("fuse_alpha") : 0.0f;
const auto fuse_beta =
ctx.HasAttr("fuse_beta") ? ctx.Attr<float>("fuse_beta") : 0.0f;
if (fuse_activation == "hard_sigmoid") {
post_ops.append_eltwise(activation_scale,
dnnl::algorithm::eltwise_linear,
fuse_alpha,
fuse_beta);
post_ops.append_eltwise(
activation_scale, dnnl::algorithm::eltwise_clip, 0.0f, 1.0f);
} else {
const std::unordered_map<std::string, dnnl::algorithm> activation_map = {
{"abs", dnnl::algorithm::eltwise_abs},
{"clip", dnnl::algorithm::eltwise_clip},
{"gelu", dnnl::algorithm::eltwise_gelu_erf},
{"gelu_erf", dnnl::algorithm::eltwise_gelu_erf},
{"gelu_tanh", dnnl::algorithm::eltwise_gelu_tanh},
{"hard_swish", dnnl::algorithm::eltwise_hardswish},
{"leaky_relu", dnnl::algorithm::eltwise_relu},
{"mish", dnnl::algorithm::eltwise_mish},
{"relu", dnnl::algorithm::eltwise_relu},
{"relu6", dnnl::algorithm::eltwise_bounded_relu},
{"sigmoid", dnnl::algorithm::eltwise_logistic},
{"sqrt", dnnl::algorithm::eltwise_sqrt},
{"swish", dnnl::algorithm::eltwise_swish},
{"tanh", dnnl::algorithm::eltwise_tanh}};
const auto& activation_type = activation_map.find(fuse_activation);
PADDLE_ENFORCE_NE(
activation_type,
activation_map.end(),
platform::errors::InvalidArgument(
"Activation '%s' not found in oneDNN algorithms mapper",
fuse_activation));
post_ops.append_eltwise(
activation_scale, activation_type->second, fuse_alpha, fuse_beta);
}
}
static void SetOutMemDescWithUnsqueeze2FuseSupport(
const framework::ExecutionContext& ctx,
phi::DenseTensor* out,
const dnnl::memory::desc& out_md) {
const std::vector<int>& fused_unsqueeze2_axes =
ctx.Attr<std::vector<int>>("fused_unsqueeze2_axes");
const std::vector<int64_t>& op_tz = out_md.dims();
std::vector<int64_t> unsqueezed_op_tz(
op_tz.size() + fused_unsqueeze2_axes.size(), 0);
for (const auto& axis : fused_unsqueeze2_axes) {
int positive_axis = axis < 0 ? unsqueezed_op_tz.size() + axis : axis;
unsqueezed_op_tz[positive_axis] = 1;
}
int j = 0;
for (size_t i = 0; i < unsqueezed_op_tz.size(); ++i) {
if (unsqueezed_op_tz[i] == 0) {
unsqueezed_op_tz[i] = op_tz[j++];
}
}
out->set_mem_desc(out_md.reshape(unsqueezed_op_tz));
out->Resize(phi::make_ddim(unsqueezed_op_tz));
}
static void SetOutMemDescWithReshape2FuseSupport(
const framework::ExecutionContext& ctx,
phi::DenseTensor* out,
const dnnl::memory::desc& out_md) {
std::vector<int64_t> fused_reshape2_shape(
ctx.Attr<std::vector<int>>("fused_reshape2_shape").begin(),
ctx.Attr<std::vector<int>>("fused_reshape2_shape").end());
const int out_shape_numel = out->numel();
const int new_shape_numel = std::accumulate(fused_reshape2_shape.begin(),
fused_reshape2_shape.end(),
1,
std::multiplies<int64_t>());
for (size_t i = 0; i < fused_reshape2_shape.size(); ++i) {
if (fused_reshape2_shape[i] == -1) {
fused_reshape2_shape[i] = -out_shape_numel / new_shape_numel;
break;
}
}
out->set_mem_desc(out_md.reshape(fused_reshape2_shape));
out->Resize(phi::make_ddim(fused_reshape2_shape));
}
static void SetOutMemDescWithLogicalLayoutFusesSupport(
const framework::ExecutionContext& ctx,
phi::DenseTensor* out,
const dnnl::memory::desc& out_md) {
if (ctx.HasAttr("fused_unsqueeze2_axes")) {
SetOutMemDescWithUnsqueeze2FuseSupport(ctx, out, out_md);
} else if (ctx.HasAttr("fused_reshape2_shape")) {
SetOutMemDescWithReshape2FuseSupport(ctx, out, out_md);
} else if (ctx.HasAttr("fused_squeeze2_axes")) {
out->set_mem_desc(out_md);
out->Resize(phi::make_ddim(out_md.dims()));
} else {
out->set_mem_desc(out_md);
}
}
template <typename XT, typename YT, typename OT>
class MatMulV2MKLDNNHandler
: public phi::funcs::OneDNNHandlerNoCachingT<XT, dnnl::matmul> {
public:
MatMulV2MKLDNNHandler(const framework::ExecutionContext& ctx,
const dnnl::engine engine,
paddle::platform::Place cpu_place,
const std::vector<int64_t>& x_org_dims,
bool trans_x,
const std::vector<int64_t>& y_org_dims,
bool trans_y,
bool is_output_fused,
const std::vector<int64_t>& x_strides_override,
const std::vector<int64_t>& y_strides_override)
: phi::funcs::OneDNNHandlerNoCachingT<XT, dnnl::matmul>(engine,
cpu_place) {
// M X K * K X N
std::vector<int64_t> x_dims(x_org_dims);
std::vector<int64_t> y_dims(y_org_dims);
const int MB_idx = x_dims.size() - 3;
const int H_idx = x_dims.size() - 2;
const int W_idx = x_dims.size() - 1;
if (trans_x) std::swap(x_dims[H_idx], x_dims[W_idx]);
if (trans_y) std::swap(y_dims[H_idx], y_dims[W_idx]);
const memory::dim M = x_dims[H_idx];
const memory::dim K = x_dims[W_idx];
const memory::dim N = y_dims[W_idx];
std::vector<int64_t> x_strides(x_dims.size() - 3, 1);
std::vector<int64_t> y_strides(x_dims.size() - 3, 1);
std::vector<int64_t> out_strides(x_dims.size() - 3, 1);
std::vector<int64_t> out_ddims(x_dims.size() - 3, 1);
x_strides.reserve(x_dims.size());
y_strides.reserve(x_dims.size());
out_strides.reserve(x_dims.size());
if (!x_strides_override.empty()) {
x_strides = x_strides_override;
} else {
if (!trans_x) {
x_strides.insert(x_strides.end(), {M * K, K, 1});
} else {
x_strides.insert(x_strides.end(), {M * K, 1, M});
}
}
if (!y_strides_override.empty()) {
y_strides = y_strides_override;
} else {
if (!trans_y) {
y_strides.insert(y_strides.end(), {N * K, N, 1});
} else {
y_strides.insert(y_strides.end(), {N * K, 1, K});
}
}
out_strides.insert(out_strides.end(), {M * N, N, 1});
out_ddims.insert(out_ddims.end(),
{std::max(x_dims[MB_idx], y_dims[MB_idx]), M, N});
for (int i = x_dims.size() - 4; i >= 0; --i) {
out_ddims[i] = std::max(x_dims[i], y_dims[i]);
if (x_strides_override.empty()) {
x_strides[i] = x_dims[i + 1] * x_strides[i + 1];
}
if (y_strides_override.empty()) {
y_strides[i] = y_dims[i + 1] * y_strides[i + 1];
}
out_strides[i] = out_ddims[i + 1] * out_strides[i + 1];
}
// TODO(jczaja): Why not for int8??
if (!phi::funcs::is_int8<OT>() && is_output_fused) {
out_strides = FakeTransposeStrides(out_ddims);
}
auto x_md =
memory::desc(x_dims, phi::funcs::OneDNNGetDataType<XT>(), x_strides);
auto y_md =
memory::desc(y_dims, phi::funcs::OneDNNGetDataType<YT>(), y_strides);
auto out_md = memory::desc(
out_ddims, phi::funcs::OneDNNGetDataType<OT>(), out_strides);
const dnnl::primitive_attr matmul_attrs = CreateMatmulAttrs(ctx);
this->AcquireForwardPrimitiveDescriptor(matmul_attrs, x_md, y_md, out_md);
}
float ComputeOutputScale(const framework::ExecutionContext& ctx) {
float alpha = ctx.HasAttr("alpha") ? ctx.Attr<float>("alpha") : 1.0f;
if (ctx.HasAttr("Scale_x") && ctx.HasAttr("Scale_y") &&
ctx.HasAttr("Scale_out")) {
float scale_x = ctx.Attr<float>("Scale_x");
float scale_y = ctx.Attr<float>("Scale_y");
bool force_fp32_out = ctx.HasAttr("force_fp32_output")
? ctx.Attr<bool>("force_fp32_output")
: false;
float scale_out = force_fp32_out ? 1.f : ctx.Attr<float>("Scale_out");
alpha *= scale_out / (scale_x * scale_y);
}
return alpha;
}
dnnl::primitive_attr CreateMatmulAttrs(
const framework::ExecutionContext& ctx) {
dnnl::primitive_attr matmul_attrs;
dnnl::post_ops post_operations;
float scale_out = ComputeOutputScale(ctx);
if (scale_out != 1.0f) {
matmul_attrs.set_output_scales(0, {scale_out});
}
if (ctx.HasInput("ResidualData")) {
auto* residual_data = ctx.Input<phi::DenseTensor>("ResidualData");
auto residual_data_tz = phi::vectorize(residual_data->dims());
auto residual_data_md = memory::desc(residual_data_tz,
phi::funcs::OneDNNGetDataType<OT>(),
dnnl::memory::format_tag::any);
post_operations.append_binary(dnnl::algorithm::binary_add,
residual_data_md);
if (ctx.HasAttr("Scale_in_eltwise")) {
float sum_scale = scale_out / ctx.Attr<float>("Scale_in_eltwise");
post_operations.append_sum(sum_scale);
}
}
AppendActivation(ctx, post_operations);
if (ctx.HasAttr("fused_output_scale")) {
float scale_alpha = ctx.Attr<float>("fused_output_scale");
post_operations.append_eltwise(
1.0, dnnl::algorithm::eltwise_linear, scale_alpha, 0.0f);
}
matmul_attrs.set_post_ops(post_operations);
return matmul_attrs;
}
std::vector<int64_t> FakeTransposeStrides(
const std::vector<int64_t>& matmul_out_dims) const {
// fuse matmul_v2 + transpose + reshape guarantees that output is 4D and
// transpose axis are: {0, 2, 1, 3}
std::vector<int64_t> transpose_axis = {0, 2, 1, 3};
std::vector<int64_t> fake_strides(transpose_axis.size());
int ndims = static_cast<int>(transpose_axis.size());
int total_stride = 1;
for (int i = ndims - 1; i >= 0; --i) {
fake_strides[transpose_axis[i]] = total_stride;
total_stride *= matmul_out_dims[transpose_axis[i]];
}
return fake_strides;
}
std::shared_ptr<memory> AcquireWeightsMemory(const phi::DenseTensor* input) {
const YT* input_data = input->data<YT>();
return this->AcquireMemoryFromPrimitive(
this->fwd_pd_->weights_desc(),
phi::funcs::to_void_cast<YT>(input_data));
}
std::shared_ptr<dnnl::memory> AcquireDstMemory(phi::DenseTensor* output) {
// We cannot use base AcquireDstMemory as it makes an allocation request
// base on DST memory primitive size. This is fine in general, but in MatMul
// we have primitive that covers only one batch of Data and then shift
// pointer for every new batch. Hence phi::DenseTensor size is bigger that
// dst memory primitive size. So would we request less memory that is there
// and it triggers an assertion. So as there is no 'any' format here we can
// leave default size of phi::DenseTensor as computed in ComputeInferShape
OT* ptr = output->mutable_data<OT>(this->place_);
return this->AcquireMemoryFromPrimitive(this->fwd_pd_->dst_desc(), ptr);
}
};
} // namespace platform
} // namespace paddle
...@@ -178,7 +178,7 @@ void ComputeINT8(const OneDNNContext& dev_ctx, ...@@ -178,7 +178,7 @@ void ComputeINT8(const OneDNNContext& dev_ctx,
const std::string& unique_name = const std::string& unique_name =
dev_ctx.GetInputsName("Input")[0] + dev_ctx.GetInputsName("Filter")[0]; dev_ctx.GetInputsName("Input")[0] + dev_ctx.GetInputsName("Filter")[0];
PD_VISIT_FLOAT_AND_INT8_TYPES( PD_VISIT_FLOAT_AND_INT8_TYPES(
filter->dtype(), "ConvMKLDNNHandlerT", ([&] { filter->dtype(), "ConvOneDNNHandlerT", ([&] {
onednn::ConvOneDNNHandlerT<T, data_t, T_out> handler(dev_ctx, onednn::ConvOneDNNHandlerT<T, data_t, T_out> handler(dev_ctx,
onednn_engine, onednn_engine,
dev_ctx.GetPlace(), dev_ctx.GetPlace(),
......
...@@ -40,7 +40,7 @@ class ConvOneDNNHandlerT ...@@ -40,7 +40,7 @@ class ConvOneDNNHandlerT
dnnl::convolution_backward_weights> { dnnl::convolution_backward_weights> {
public: public:
ConvOneDNNHandlerT(const OneDNNContext& dev_ctx, ConvOneDNNHandlerT(const OneDNNContext& dev_ctx,
const dnnl::engine mkldnn_engine, const dnnl::engine onednn_engine,
Place cpu_place, Place cpu_place,
const phi::DenseTensor* input, const phi::DenseTensor* input,
const phi::DenseTensor* filter, const phi::DenseTensor* filter,
...@@ -63,7 +63,7 @@ class ConvOneDNNHandlerT ...@@ -63,7 +63,7 @@ class ConvOneDNNHandlerT
dnnl::convolution_backward_data, dnnl::convolution_backward_data,
dnnl::convolution_backward_weights>( dnnl::convolution_backward_weights>(
dev_ctx, dev_ctx,
mkldnn_engine, onednn_engine,
cpu_place, cpu_place,
funcs::CreateKey( funcs::CreateKey(
dev_ctx, phi::vectorize(input->dims()), unique_name)) { dev_ctx, phi::vectorize(input->dims()), unique_name)) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册