未验证 提交 4d5a5533 编写于 作者: S Sławomir Siwek 提交者: GitHub

[PHI decoupling] Remove fluid imports from MKLDNN code (#48981)

* fix wrong handler name

* mkldnn_engine -> onednn_engine

* remove fluid/errors.h imports

* remove fluid/enforce.h imports

* remove note and unnecessary import

* remove fluid/pretty_log.h imports

* remove fluid/place.h imports

* remove fluid/data_layout_transform.h imports

* remove fluid/device_context.h imports

* remove mkldnn_helper code

* remove fluid/mkldnn_reuse.h imports

* pretty_log import
上级 32633c8e
......@@ -16,8 +16,8 @@
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/string/pretty_log.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/utils/string/pretty_log.h"
namespace paddle {
namespace framework {
......
......@@ -19,7 +19,6 @@
#include "paddle/fluid/framework/op_desc.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/platform/errors.h"
namespace paddle {
namespace framework {
......
......@@ -28,7 +28,7 @@ namespace ir {
void ComputePropagateScalesMkldnnPass::GetTensorFromVector(
const std::vector<float>& data_v, phi::DenseTensor* tensor) const {
const int size = static_cast<int>(data_v.size());
auto* data = tensor->mutable_data<float>({size}, platform::CPUPlace());
auto* data = tensor->mutable_data<float>({size}, phi::CPUPlace());
for (int i = 0; i < size; i++) {
data[i] = data_v[i];
}
......@@ -123,7 +123,7 @@ void ComputePropagateScalesMkldnnPass::ComputeVarScales(
std::vector<int64_t> reshape_dims = {dims[0], volume};
tmp_tensor.Resize(phi::make_ddim(reshape_dims));
auto* weight_data = weight_tensor->data<float>();
auto* tmp_data = tmp_tensor.mutable_data<float>(platform::CPUPlace());
auto* tmp_data = tmp_tensor.mutable_data<float>(phi::CPUPlace());
for (int i = 0; i < weight_tensor->numel(); i++) {
tmp_data[i] = std::abs(weight_data[i]);
}
......@@ -365,7 +365,7 @@ void ComputePropagateScalesMkldnnPass::UpdateScaleOpInOutScales(
auto pair = iter->second;
const auto tensor = pair.second;
tmp_tensor.Resize(tensor.dims());
auto* data = tmp_tensor.mutable_data<float>(platform::CPUPlace());
auto* data = tmp_tensor.mutable_data<float>(phi::CPUPlace());
auto* src_data = tensor.data<float>();
for (int i = 0; i < tensor.numel(); i++) {
if (out_iter != var_quant_scales->end()) {
......
......@@ -17,7 +17,7 @@
#include "paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.h"
#include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/phi/common/place.h"
namespace paddle {
namespace framework {
......@@ -119,7 +119,7 @@ class ComputePropagateScalesMkldnnPassTest : public testing::Test {
const ProgramDesc& prog,
Scope* scope,
const std::initializer_list<std::string>& variable_names) {
auto place = paddle::platform::CPUPlace();
auto place = phi::CPUPlace();
NaiveExecutor exe{place};
exe.CreateVariables(prog, 0, true, scope);
......@@ -148,19 +148,19 @@ class ComputePropagateScalesMkldnnPassTest : public testing::Test {
auto* wx_tensor = wx_var->GetMutable<phi::DenseTensor>();
wx_tensor->Resize(phi::make_dim(wx.size(), wx[0].size()));
for (size_t i = 0; i < wx.size(); i++)
std::copy(begin(wx[i]),
std::copy(
begin(wx[i]),
end(wx[i]),
wx_tensor->mutable_data<float>(platform::CPUPlace()) +
i * wx[0].size());
wx_tensor->mutable_data<float>(phi::CPUPlace()) + i * wx[0].size());
auto* wh_var = scope.FindVar(wh_var_names);
auto* wh_tensor = wh_var->GetMutable<phi::DenseTensor>();
wh_tensor->Resize(phi::make_dim(wh.size(), wh[0].size()));
for (size_t i = 0; i < wh.size(); i++)
std::copy(begin(wh[i]),
std::copy(
begin(wh[i]),
end(wh[i]),
wh_tensor->mutable_data<float>(platform::CPUPlace()) +
i * wh[0].size());
wh_tensor->mutable_data<float>(phi::CPUPlace()) + i * wh[0].size());
if (type == "gru") {
ComputeGruWeightScales(
graph, &scope, wx_name, wh_name, &var_quant_scales);
......@@ -283,7 +283,7 @@ TEST_F(ComputePropagateScalesMkldnnPassTest, get_scales_function) {
var_tensor.Resize(phi::make_dim(values.size(), 1));
std::copy(begin(values),
end(values),
var_tensor.mutable_data<float>(platform::CPUPlace()));
var_tensor.mutable_data<float>(phi::CPUPlace()));
std::vector<float> results = GetScales(&var_tensor, 0);
ASSERT_EQ(results.size(), std::size_t(1));
......@@ -310,7 +310,7 @@ TEST_F(ComputePropagateScalesMkldnnPassTest, compute_var_scales) {
weight_tensor->Resize(phi::make_dim(1, values.size()));
std::copy(begin(values),
end(values),
weight_tensor->mutable_data<float>(platform::CPUPlace()));
weight_tensor->mutable_data<float>(phi::CPUPlace()));
auto max_val = *std::max_element(values.begin(), values.end());
......@@ -338,7 +338,7 @@ TEST_F(ComputePropagateScalesMkldnnPassTest, update_relu_output_scales) {
StringPairMap var_quant_scales;
for (auto& var_name : conv_variable_names) {
phi::DenseTensor tensor;
auto* data = tensor.mutable_data<float>({1}, platform::CPUPlace());
auto* data = tensor.mutable_data<float>({1}, phi::CPUPlace());
data[0] = 10;
auto pair = std::make_pair(false, tensor);
var_quant_scales.insert(std::make_pair(var_name, pair));
......
......@@ -15,8 +15,8 @@
#include "paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/mkldnn_reuse.h"
#include "paddle/fluid/string/pretty_log.h"
#include "paddle/phi/backends/onednn/onednn_reuse.h"
#include "paddle/utils/string/pretty_log.h"
namespace paddle {
namespace framework {
......
......@@ -80,7 +80,7 @@ void recompute_bias_and_weights(const Scope* scope,
ac_bias_tensor.data<float>(), ac_bias_tensor.numel(), 1);
EigenVectorArrayMap eltwise_y_in_array(
eltwise_y_in_tensor->mutable_data<float>(platform::CPUPlace()),
eltwise_y_in_tensor->mutable_data<float>(phi::CPUPlace()),
eltwise_y_in_tensor->numel(),
1);
......@@ -91,7 +91,7 @@ void recompute_bias_and_weights(const Scope* scope,
scope->FindVar(conv_weight->Name())->GetMutable<phi::DenseTensor>();
auto weights_shape = weights->dims();
auto weights_shape_2d = phi::flatten_to_2d(weights_shape, 1);
auto* weights_data = weights->mutable_data<float>(platform::CPUPlace());
auto* weights_data = weights->mutable_data<float>(phi::CPUPlace());
EigenMatrixArrayMap weights_array_2d(
weights_data, weights_shape_2d[0], weights_shape_2d[1]);
......@@ -233,7 +233,7 @@ void ConvAffineChannelFusePass::ApplyImpl(ir::Graph* graph) const {
auto* eltwise_y_in_tensor =
scope->Var(eltwise_y_in_node->Name())->GetMutable<phi::DenseTensor>();
eltwise_y_in_tensor->Resize(ac_bias_tensor->dims());
std::fill_n(eltwise_y_in_tensor->mutable_data<float>(platform::CPUPlace()),
std::fill_n(eltwise_y_in_tensor->mutable_data<float>(phi::CPUPlace()),
eltwise_y_in_tensor->numel(),
0.0f);
......
......@@ -19,8 +19,8 @@
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/string/pretty_log.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/utils/string/pretty_log.h"
namespace paddle {
namespace framework {
......@@ -263,7 +263,7 @@ phi::DenseTensor tensor_apply_eltwise(const phi::DenseTensor& vec_a,
vec_y.Resize(vec_a.dims());
const float* a = vec_a.data<float>();
const float* b = vec_b.data<float>();
float* y = vec_y.mutable_data<float>(platform::CPUPlace());
float* y = vec_y.mutable_data<float>(phi::CPUPlace());
for (int i = 0; i < vec_a.numel(); i++) {
y[i] = f(a[i], b[i]);
}
......
......@@ -19,7 +19,7 @@
#include "paddle/fluid/framework/op_proto_maker.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/imperative/type_defs.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/phi/common/place.h"
namespace paddle {
namespace framework {
......@@ -112,7 +112,7 @@ void InitTensorHolder(Scope* scope,
void MainTest(bool convWithExistingBias) {
auto prog = BuildProgramDesc(convWithExistingBias);
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
auto place = paddle::platform::CPUPlace();
auto place = phi::CPUPlace();
NaiveExecutor exe{place};
Scope scope;
// Init scope, as it is used in pass
......
......@@ -16,7 +16,7 @@
#include "paddle/fluid/framework/ir/graph_traits.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/string/pretty_log.h"
#include "paddle/utils/string/pretty_log.h"
namespace paddle {
namespace framework {
......
......@@ -18,7 +18,7 @@ limitations under the License. */
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/string/pretty_log.h"
#include "paddle/utils/string/pretty_log.h"
namespace paddle {
namespace framework {
......
......@@ -17,7 +17,6 @@
#include "paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.h"
#include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/imperative/type_defs.h"
#include "paddle/fluid/platform/place.h"
namespace paddle {
namespace framework {
......@@ -68,23 +67,16 @@ void SetOp(ProgramDesc* prog,
static const std::initializer_list<std::string> variable_names{
"z", "a", "b", "c", "d", "e", "f", "g", "h", "i"};
void PreparePass(std::unique_ptr<ir::Graph>& graph,
int* original_nodes_num,
int* current_nodes_num) {
auto pass = PassRegistry::Instance().Get("cpu_bfloat16_pass");
*original_nodes_num = graph->Nodes().size();
graph.reset(pass->Apply(graph.release()));
*current_nodes_num = graph->Nodes().size();
}
void MainTest(const ProgramDesc& prog,
const int& quant_count,
const int& dequant_count,
const int& added_nodes_count) {
auto graph = std::make_unique<ir::Graph>(prog);
int original_nodes_num, current_nodes_num;
PreparePass(graph, &original_nodes_num, &current_nodes_num);
auto pass = PassRegistry::Instance().Get("cpu_bfloat16_pass");
int original_nodes_num = graph->Nodes().size();
graph.reset(pass->Apply(graph.release()));
int current_nodes_num = graph->Nodes().size();
int quantize_nodes_count = 0;
int dequantize_nodes_count = 0;
......
......@@ -19,7 +19,7 @@ limitations under the License. */
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/string/pretty_log.h"
#include "paddle/utils/string/pretty_log.h"
namespace paddle {
namespace framework {
......
......@@ -20,7 +20,7 @@
#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/string/pretty_log.h"
#include "paddle/utils/string/pretty_log.h"
namespace paddle {
namespace framework {
......@@ -1204,8 +1204,7 @@ void CPUQuantizePass::QuantizeMultiGru(Graph* graph) const {
auto* w_scale_tensor_dst =
scope->Var(w_scale_node->Name())->GetMutable<phi::DenseTensor>();
w_scale_tensor_dst->Resize(scale_tensor_src.dims());
auto* dst_data =
w_scale_tensor_dst->mutable_data<float>(platform::CPUPlace());
auto* dst_data = w_scale_tensor_dst->mutable_data<float>(phi::CPUPlace());
EigenVectorArrayMapFloat eigen_tensor_dst{dst_data,
w_scale_tensor_dst->numel()};
eigen_tensor_dst =
......
......@@ -19,7 +19,7 @@
#include "paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h" // NOLINT
#include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/imperative/type_defs.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/phi/common/place.h"
namespace paddle {
namespace framework {
......@@ -146,7 +146,7 @@ void PreparePass(std::unique_ptr<ir::Graph>* graph,
int* current_nodes_num,
std::string var_without_scale = "",
std::string var_signed = "") {
auto place = paddle::platform::CPUPlace();
auto place = phi::CPUPlace();
NaiveExecutor exe{place};
Scope scope;
exe.CreateVariables(prog, 0, true, &scope);
......
......@@ -18,9 +18,9 @@
#include <string>
#include <vector>
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/string/pretty_log.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/utils/string/pretty_log.h"
namespace paddle {
namespace framework {
......
......@@ -16,7 +16,7 @@
#include "paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.h"
#include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/phi/common/place.h"
namespace paddle {
namespace framework {
......@@ -722,7 +722,7 @@ void InitTensorHolder(Scope* scope,
}
void PrepareGraph(std::unique_ptr<ir::Graph>* graph, const ProgramDesc& prog) {
auto place = paddle::platform::CPUPlace();
auto place = phi::CPUPlace();
NaiveExecutor exe{place};
Scope scope;
exe.CreateVariables(prog, 0, true, &scope);
......
......@@ -16,9 +16,9 @@
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/mkldnn_reuse.h"
#include "paddle/fluid/string/pretty_log.h"
#include "paddle/phi/backends/onednn/onednn_reuse.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/utils/string/pretty_log.h"
namespace paddle {
namespace framework {
......
......@@ -15,8 +15,8 @@
#include "paddle/fluid/framework/ir/mkldnn/fc_act_mkldnn_fuse_pass.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/mkldnn_reuse.h"
#include "paddle/fluid/string/pretty_log.h"
#include "paddle/phi/backends/onednn/onednn_reuse.h"
#include "paddle/utils/string/pretty_log.h"
namespace paddle {
namespace framework {
......
......@@ -16,7 +16,7 @@
#include "paddle/fluid/framework/ir/graph_traits.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/string/pretty_log.h"
#include "paddle/utils/string/pretty_log.h"
namespace paddle {
namespace framework {
......
......@@ -14,8 +14,8 @@
#include "paddle/fluid/framework/ir/mkldnn/fc_mkldnn_pass.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/string/pretty_log.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/utils/string/pretty_log.h"
namespace paddle {
namespace framework {
......
......@@ -15,8 +15,8 @@
#include "paddle/fluid/framework/ir/mkldnn/int8_scale_calculation_mkldnn_pass.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/phi/core/enforce.h"
namespace paddle {
namespace framework {
......
......@@ -17,7 +17,7 @@
#include <string>
#include <vector>
#include "paddle/fluid/platform/enforce.h"
#include "paddle/phi/core/enforce.h"
namespace paddle {
namespace framework {
......
......@@ -15,8 +15,8 @@
#include "paddle/fluid/framework/ir/mkldnn/layer_norm_onednn_optimization_pass.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/mkldnn_reuse.h"
#include "paddle/fluid/string/pretty_log.h"
#include "paddle/phi/backends/onednn/onednn_reuse.h"
#include "paddle/utils/string/pretty_log.h"
namespace paddle {
namespace framework {
......@@ -77,7 +77,7 @@ void LayerNormOneDNNOptimizationPass::ApplyImpl(Graph *graph) const {
scale_shift_tensor->Resize(phi::make_ddim({channels * 2}));
memcpy(scale_shift_tensor->mutable_data<float>(platform::CPUPlace()),
memcpy(scale_shift_tensor->mutable_data<float>(phi::CPUPlace()),
ln_scale_tensor->data<float>(),
channels * sizeof(float));
......
......@@ -15,8 +15,8 @@
#include "paddle/fluid/framework/ir/mkldnn/matmul_activation_mkldnn_fuse_pass.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/mkldnn_reuse.h"
#include "paddle/fluid/string/pretty_log.h"
#include "paddle/phi/backends/onednn/onednn_reuse.h"
#include "paddle/utils/string/pretty_log.h"
namespace paddle {
namespace framework {
......
......@@ -16,7 +16,7 @@
#include "paddle/fluid/framework/ir/graph_traits.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/string/pretty_log.h"
#include "paddle/utils/string/pretty_log.h"
namespace paddle {
namespace framework {
......
......@@ -13,9 +13,9 @@
// limitations under the License.
#include "paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_mkldnn_fuse_pass.h"
#include <paddle/fluid/string/pretty_log.h>
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/utils/string/pretty_log.h"
namespace paddle {
namespace framework {
......
......@@ -23,7 +23,7 @@
#include "paddle/fluid/framework/ir/pass_tester_helper.h"
#include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/phi/common/place.h"
#include "paddle/phi/core/kernel_registry.h"
PD_DECLARE_KERNEL(conv2d_transpose, CPU, ALL_LAYOUT);
......@@ -195,7 +195,7 @@ class MKLDNNConvBatchNormPassTest {
void FillTensorWithRandomData(phi::DenseTensor* tnsr,
float lowb,
float upb,
platform::CPUPlace place) {
phi::CPUPlace place) {
float* ptr = tnsr->mutable_data<float>(place);
// Initialize input data
std::uniform_real_distribution<float> dist(static_cast<float>(lowb),
......@@ -219,7 +219,7 @@ class MKLDNNConvBatchNormPassTest {
std::unique_ptr<ir::Graph> graph(new ir::Graph(base_prog));
Scope scope;
auto place = paddle::platform::CPUPlace();
auto place = phi::CPUPlace();
NaiveExecutor exe{place};
auto pass = PassRegistry::Instance().Get(
......
......@@ -140,7 +140,7 @@ static void GetInfoFromTheFirstOp(ir::Graph* graph,
op_desc->GetAttr(vector_name));
phi::DenseTensor tensor;
const int size = static_cast<int>(scales_vector.size());
auto data = tensor.mutable_data<double>({size}, platform::CPUPlace());
auto data = tensor.mutable_data<double>({size}, phi::CPUPlace());
std::copy(scales_vector.begin(), scales_vector.end(), data);
auto pair = std::make_pair(is_unsigned, tensor);
info_map->insert(std::make_pair(var_name, pair));
......
......@@ -18,8 +18,8 @@
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/platform/errors.h"
#include "paddle/fluid/string/pretty_log.h"
#include "paddle/phi/core/errors.h"
#include "paddle/utils/string/pretty_log.h"
namespace paddle {
namespace framework {
......@@ -44,12 +44,11 @@ std::vector<std::string> JoinInputs(Node* op1,
void MultiGRUFusePass::ApplyImpl(ir::Graph* graph) const {
VLOG(3) << "Fusing two concatenated multi_gru ops.";
PADDLE_ENFORCE_NOT_NULL(graph,
platform::errors::InvalidArgument(
phi::errors::InvalidArgument(
"Pointer to graph argument cannot be NULL."));
FusePassBase::Init(name_scope_, graph);
PADDLE_ENFORCE_NOT_NULL(
param_scope(),
platform::errors::InvalidArgument("Scope cannot be nullptr."));
param_scope(), phi::errors::InvalidArgument("Scope cannot be nullptr."));
GraphPatternDetector gpd;
patterns::TwoFusionGruConcat pattern{gpd.mutable_pattern(), name_scope_};
......
......@@ -21,9 +21,9 @@
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/platform/errors.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/string/pretty_log.h"
#include "paddle/phi/core/errors.h"
#include "paddle/utils/string/pretty_log.h"
namespace paddle {
namespace framework {
......@@ -48,12 +48,11 @@ std::vector<std::string> JoinInputs(Node* op1,
void MultiGruSeqFusePass::ApplyImpl(ir::Graph* graph) const {
VLOG(3) << "Fusing two consecutive multi_gru ops.";
PADDLE_ENFORCE_NOT_NULL(graph,
platform::errors::InvalidArgument(
phi::errors::InvalidArgument(
"Pointer to graph argument cannot be NULL."));
FusePassBase::Init(name_scope_, graph);
PADDLE_ENFORCE_NOT_NULL(
param_scope(),
platform::errors::InvalidArgument("Scope cannot be nullptr."));
param_scope(), phi::errors::InvalidArgument("Scope cannot be nullptr."));
GraphPatternDetector gpd;
patterns::MultiGruSeq pattern{gpd.mutable_pattern(), name_scope_};
......
......@@ -15,8 +15,8 @@
#include "paddle/fluid/framework/ir/mkldnn/operator_reshape2_onednn_fuse_pass.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/mkldnn_reuse.h"
#include "paddle/fluid/string/pretty_log.h"
#include "paddle/phi/backends/onednn/onednn_reuse.h"
#include "paddle/utils/string/pretty_log.h"
namespace paddle {
namespace framework {
......
......@@ -15,8 +15,8 @@
#include "paddle/fluid/framework/ir/mkldnn/operator_scale_onednn_fuse_pass.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/mkldnn_reuse.h"
#include "paddle/fluid/string/pretty_log.h"
#include "paddle/phi/backends/onednn/onednn_reuse.h"
#include "paddle/utils/string/pretty_log.h"
namespace paddle {
namespace framework {
......
......@@ -15,8 +15,8 @@
#include "paddle/fluid/framework/ir/mkldnn/operator_unsqueeze2_onednn_fuse_pass.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/mkldnn_reuse.h"
#include "paddle/fluid/string/pretty_log.h"
#include "paddle/phi/backends/onednn/onednn_reuse.h"
#include "paddle/utils/string/pretty_log.h"
namespace paddle {
namespace framework {
......
......@@ -16,7 +16,7 @@
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/string/pretty_log.h"
#include "paddle/utils/string/pretty_log.h"
namespace paddle {
namespace framework {
......
......@@ -16,7 +16,7 @@
#include "paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass.h" // NOLINT
#include "paddle/fluid/imperative/type_defs.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/phi/common/place.h"
namespace paddle {
namespace framework {
......
......@@ -430,8 +430,8 @@ void QuantDequantMkldnnPass::TransposeWeight(phi::DenseTensor* input) const {
phi::DenseTensor trans_tensor;
trans_tensor.Resize(out_dims);
float* trans_data = trans_tensor.mutable_data<float>(platform::CPUPlace());
float* in_data = input->mutable_data<float>(platform::CPUPlace());
float* trans_data = trans_tensor.mutable_data<float>(phi::CPUPlace());
float* in_data = input->mutable_data<float>(phi::CPUPlace());
for (int64_t out_idx = 0; out_idx < count; ++out_idx) {
int64_t in_idx = 0;
......@@ -493,8 +493,7 @@ void QuantDequantMkldnnPass::ConvertFromINT8ToFP32(
weight_tensor->clear(); // clear int weight
weight_tensor->Resize(phi::make_ddim(phi::vectorize(weight_dims)));
auto* new_weight_data =
weight_tensor->mutable_data<float>(platform::CPUPlace());
auto* new_weight_data = weight_tensor->mutable_data<float>(phi::CPUPlace());
memcpy(new_weight_data,
weight_data.data(),
weight_tensor->numel() * sizeof(float));
......@@ -536,8 +535,7 @@ void QuantDequantMkldnnPass::ConvertFromINT8ToFP32(
}
weight_tensor->clear(); // clear int weight
weight_tensor->Resize(phi::make_ddim(phi::vectorize(weight_dims)));
auto* new_weight_data =
weight_tensor->mutable_data<float>(platform::CPUPlace());
auto* new_weight_data = weight_tensor->mutable_data<float>(phi::CPUPlace());
memcpy(new_weight_data,
weight_data.data(),
weight_tensor->numel() * sizeof(float));
......@@ -582,8 +580,7 @@ void QuantDequantMkldnnPass::DequantizeOpWeights(
weight_var_name,
op_desc->Type()));
auto* weight_tensor = var->GetMutable<phi::DenseTensor>();
float* fp32_weight_data =
weight_tensor->mutable_data<float>(platform::CPUPlace());
float* fp32_weight_data = weight_tensor->mutable_data<float>(phi::CPUPlace());
ConvertFromINT8ToFP32(
scales, weight_tensor, nullptr, fp32_weight_data, weight_var_name);
}
......@@ -628,7 +625,7 @@ void QuantDequantMkldnnPass::DequantizeOpWeightsFromONNXFormat(
op_desc->Type()));
auto* weight_tensor = var->GetMutable<phi::DenseTensor>();
int8_t* int8_weight_data =
weight_tensor->mutable_data<int8_t>(platform::CPUPlace());
weight_tensor->mutable_data<int8_t>(phi::CPUPlace());
ConvertFromINT8ToFP32(
scales, weight_tensor, int8_weight_data, nullptr, weight_var_name);
......
......@@ -14,8 +14,8 @@
#include "paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/string/pretty_log.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/utils/string/pretty_log.h"
namespace paddle {
namespace framework {
......
......@@ -26,7 +26,7 @@ void AddVarToScope(Scope* param_scope,
const DDim& dims) {
auto* tensor = param_scope->Var(name)->GetMutable<phi::DenseTensor>();
tensor->Resize(dims);
tensor->mutable_data<float>(platform::CPUPlace());
tensor->mutable_data<float>(phi::CPUPlace());
}
Scope* CreateParamScope() {
......
......@@ -20,7 +20,7 @@ limitations under the License. */
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/string/pretty_log.h"
#include "paddle/utils/string/pretty_log.h"
namespace paddle {
namespace framework {
......
......@@ -28,7 +28,7 @@ void AddVarToScope(Scope* param_scope,
const DDim& dims) {
auto* tensor = param_scope->Var(name)->GetMutable<phi::DenseTensor>();
tensor->Resize(dims);
tensor->mutable_data<float>(platform::CPUPlace());
tensor->mutable_data<float>(phi::CPUPlace());
}
Scope* CreateParamScope() {
......
......@@ -16,9 +16,9 @@
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/mkldnn_reuse.h"
#include "paddle/fluid/string/pretty_log.h"
#include "paddle/phi/backends/onednn/onednn_reuse.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/utils/string/pretty_log.h"
namespace paddle {
namespace framework {
......
......@@ -13,8 +13,8 @@
// limitations under the License.
#include "paddle/fluid/framework/ir/mkldnn/squeeze2_transpose2_onednn_fuse_pass.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/mkldnn_reuse.h"
#include "paddle/fluid/string/pretty_log.h"
#include "paddle/phi/backends/onednn/onednn_reuse.h"
#include "paddle/utils/string/pretty_log.h"
namespace paddle {
namespace framework {
......
......@@ -30,14 +30,14 @@
#include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/string/pretty_log.h"
#include "paddle/phi/common/place.h"
#include "paddle/utils/string/pretty_log.h"
namespace paddle {
using framework::Variable;
using framework::ir::Graph;
using platform::CPUPlace;
using phi::CPUPlace;
using ConstEigenVectorArrayMap =
Eigen::Map<const Eigen::Array<float, Eigen::Dynamic, 1>>;
using EigenMatrixDoubleArray =
......
......@@ -111,7 +111,7 @@ TEST_F(MkldnnQuantizerTest, histogram_inverted_min_max) {
var_tensor.Resize(phi::make_dim(values.size()));
std::copy(begin(values),
end(values),
var_tensor.mutable_data<float>(platform::CPUPlace()));
var_tensor.mutable_data<float>(phi::CPUPlace()));
ASSERT_THROW(Histogram(var_tensor, max_val, min_val, 3),
platform::EnforceNotMet);
......@@ -127,7 +127,7 @@ TEST_F(MkldnnQuantizerTest, histogram_non_negative_to_3) {
var_tensor.Resize(phi::make_dim(values.size()));
std::copy(begin(values),
end(values),
var_tensor.mutable_data<float>(platform::CPUPlace()));
var_tensor.mutable_data<float>(phi::CPUPlace()));
std::vector<int> histogram;
float bin_width;
......@@ -151,7 +151,7 @@ TEST_F(MkldnnQuantizerTest, histogram_positive_and_negative_to_3) {
var_tensor.Resize(phi::make_dim(values.size()));
std::copy(begin(values),
end(values),
var_tensor.mutable_data<float>(platform::CPUPlace()));
var_tensor.mutable_data<float>(phi::CPUPlace()));
std::vector<int> histogram;
float bin_width;
......@@ -175,7 +175,7 @@ TEST_F(MkldnnQuantizerTest, histogram_zero_bins) {
var_tensor.Resize(phi::make_dim(values.size()));
std::copy(begin(values),
end(values),
var_tensor.mutable_data<float>(platform::CPUPlace()));
var_tensor.mutable_data<float>(phi::CPUPlace()));
ASSERT_THROW(Histogram(var_tensor, min_val, max_val, 0),
platform::EnforceNotMet);
......@@ -188,7 +188,7 @@ TEST_F(MkldnnQuantizerTest, histogram_empty) {
// zero tensor
phi::DenseTensor var_tensor;
var_tensor.Resize({0});
var_tensor.mutable_data<double>(platform::CPUPlace());
var_tensor.mutable_data<double>(phi::CPUPlace());
ASSERT_THROW(Histogram(var_tensor, -1, 1, 1), platform::EnforceNotMet);
}
......@@ -200,7 +200,7 @@ TEST_F(MkldnnQuantizerTest, kl_scaling_factor_signed) {
var_tensor.Resize(phi::make_dim(values.size()));
std::copy(begin(values),
end(values),
var_tensor.mutable_data<float>(platform::CPUPlace()));
var_tensor.mutable_data<float>(phi::CPUPlace()));
bool is_unsigned;
phi::DenseTensor lod_tensor;
......@@ -220,7 +220,7 @@ TEST_F(MkldnnQuantizerTest, max_scaling_factor_signed) {
var_tensor.Resize(phi::make_dim(values.size()));
std::copy(begin(values),
end(values),
var_tensor.mutable_data<float>(platform::CPUPlace()));
var_tensor.mutable_data<float>(phi::CPUPlace()));
bool is_unsigned;
phi::DenseTensor lod_tensor;
......@@ -240,7 +240,7 @@ TEST_F(MkldnnQuantizerTest, max_scaling_factor_unsigned) {
var_tensor.Resize(phi::make_dim(values.size()));
std::copy(begin(values),
end(values),
var_tensor.mutable_data<float>(platform::CPUPlace()));
var_tensor.mutable_data<float>(phi::CPUPlace()));
bool is_unsigned;
phi::DenseTensor lod_tensor;
......@@ -260,10 +260,10 @@ TEST_F(MkldnnQuantizerTest, max_scaling_factor_chwise_unsigned) {
phi::DenseTensor var_tensor;
var_tensor.Resize(phi::make_dim(channels, 1, 1, values.size()));
for (int i = 0; i < channels; i++)
std::copy(begin(values),
std::copy(
begin(values),
end(values),
var_tensor.mutable_data<float>(platform::CPUPlace()) +
i * values.size());
var_tensor.mutable_data<float>(phi::CPUPlace()) + i * values.size());
bool is_unsigned;
phi::DenseTensor lod_tensor;
......@@ -284,7 +284,7 @@ TEST_F(MkldnnQuantizerTest, kl_scaling_factor_unsigned) {
var_tensor.Resize(phi::make_dim(values.size()));
std::copy(begin(values),
end(values),
var_tensor.mutable_data<float>(platform::CPUPlace()));
var_tensor.mutable_data<float>(phi::CPUPlace()));
bool is_unsigned;
phi::DenseTensor lod_tensor;
......@@ -312,14 +312,14 @@ TEST_F(MkldnnQuantizerTest, max_ch_gru_scaling_factor) {
std::copy(
begin(wx[i]),
end(wx[i]),
wx_tensor.mutable_data<float>(platform::CPUPlace()) + i * wx[0].size());
wx_tensor.mutable_data<float>(phi::CPUPlace()) + i * wx[0].size());
wh_tensor.Resize(phi::make_dim(wh.size(), wh[0].size()));
for (size_t i = 0; i < wh.size(); i++)
std::copy(
begin(wh[i]),
end(wh[i]),
wh_tensor.mutable_data<float>(platform::CPUPlace()) + i * wh[0].size());
wh_tensor.mutable_data<float>(phi::CPUPlace()) + i * wh[0].size());
bool is_unsigned;
std::tie(is_unsigned, lod_tensor) =
......@@ -342,14 +342,14 @@ TEST_F(MkldnnQuantizerTest, max_ch_lstm_scaling_factor) {
std::copy(
begin(wx[i]),
end(wx[i]),
wx_tensor.mutable_data<float>(platform::CPUPlace()) + i * wx[0].size());
wx_tensor.mutable_data<float>(phi::CPUPlace()) + i * wx[0].size());
wh_tensor.Resize(phi::make_dim(wh.size(), wh[0].size()));
for (size_t i = 0; i < wh.size(); i++)
std::copy(
begin(wh[i]),
end(wh[i]),
wh_tensor.mutable_data<float>(platform::CPUPlace()) + i * wh[0].size());
wh_tensor.mutable_data<float>(phi::CPUPlace()) + i * wh[0].size());
bool is_unsigned;
std::tie(is_unsigned, lod_tensor) =
......
......@@ -18,8 +18,7 @@ limitations under the License. */
#include <iostream>
#include "paddle/fluid/inference/tests/api/tester_helper.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/phi/common/place.h"
DEFINE_string(infer_shape, "", "data shape file");
DEFINE_int32(sample, 20, "number of sample");
......@@ -78,7 +77,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs,
#ifdef PADDLE_WITH_MKLDNN
int GetNumCachedObjects(void) {
auto &pool = platform::DeviceContextPool::Instance();
platform::CPUPlace place;
phi::CPUPlace place;
auto onednn_dev_ctx = dynamic_cast<phi::OneDNNContext *>(pool.Get(place));
return onednn_dev_ctx->GetCachedObjectsNumber();
}
......
......@@ -15,6 +15,7 @@ limitations under the License. */
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/operators/fused/fusion_gru_op.h"
#include "paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h"
#include "paddle/phi/backends/onednn/onednn_reuse.h"
#include "paddle/phi/core/expect.h"
namespace paddle {
......@@ -24,13 +25,14 @@ using phi::OneDNNContext;
using phi::funcs::OneDNNGetDataType;
using phi::funcs::OneDNNMemDesc;
using phi::funcs::RNNReorderType;
using OneDNNMemoryFormat = dnnl::memory::format_tag;
template <typename T, typename T_out = T>
class GRUMKLDNNHandler : public RNNMKLDNNHandler<T, dnnl::gru_forward, T_out> {
public:
GRUMKLDNNHandler(const paddle::framework::ExecutionContext& ctx,
const OneDNNContext& dev_ctx,
const dnnl::engine mkldnn_engine,
const dnnl::engine onednn_engine,
platform::Place cpu_place,
const phi::DenseTensor* input,
const phi::DenseTensor* weight_h,
......@@ -44,7 +46,7 @@ class GRUMKLDNNHandler : public RNNMKLDNNHandler<T, dnnl::gru_forward, T_out> {
: RNNMKLDNNHandler<T, dnnl::gru_forward, T_out>(
ctx,
dev_ctx,
mkldnn_engine,
onednn_engine,
ctx.GetPlace(),
input,
weight_h,
......@@ -256,7 +258,7 @@ class FusionGRUMKLDNNKernel : public framework::OpKernel<T> {
template <typename Tout = T>
void RunKernel(const framework::ExecutionContext& ctx) const {
auto& dev_ctx = ctx.template device_context<OneDNNContext>();
const auto& mkldnn_engine = dev_ctx.GetEngine();
const auto& onednn_engine = dev_ctx.GetEngine();
// Get Tensors
const auto* input = ctx.Input<phi::DenseTensor>("X");
......@@ -294,7 +296,7 @@ class FusionGRUMKLDNNKernel : public framework::OpKernel<T> {
GRUMKLDNNHandler<T, Tout> handler(
ctx,
dev_ctx,
mkldnn_engine,
onednn_engine,
ctx.GetPlace(),
input,
weight_h,
......@@ -379,7 +381,7 @@ class FusionGRUMKLDNNKernel : public framework::OpKernel<T> {
namespace ops = paddle::operators;
REGISTER_OP_KERNEL(fusion_gru,
MKLDNN,
paddle::platform::CPUPlace,
phi::CPUPlace,
ops::FusionGRUMKLDNNKernel<float>,
ops::FusionGRUMKLDNNKernel<paddle::platform::bfloat16>,
ops::FusionGRUMKLDNNKernel<uint8_t>);
......@@ -24,6 +24,7 @@ using phi::OneDNNContext;
using phi::funcs::OneDNNGetDataType;
using phi::funcs::OneDNNMemDesc;
using phi::funcs::RNNReorderType;
using OneDNNMemoryFormat = dnnl::memory::format_tag;
template <typename T, typename T_out = T>
class LSTMMKLDNNHandler
......@@ -31,7 +32,7 @@ class LSTMMKLDNNHandler
public:
LSTMMKLDNNHandler(const paddle::framework::ExecutionContext& ctx,
const OneDNNContext& dev_ctx,
const dnnl::engine mkldnn_engine,
const dnnl::engine onednn_engine,
platform::Place cpu_place,
const phi::DenseTensor* input,
const phi::DenseTensor* weight_h,
......@@ -46,7 +47,7 @@ class LSTMMKLDNNHandler
: RNNMKLDNNHandler<T, dnnl::lstm_forward, T_out>(
ctx,
dev_ctx,
mkldnn_engine,
onednn_engine,
ctx.GetPlace(),
input,
weight_h,
......@@ -338,7 +339,7 @@ class FusionLSTMMKLDNNKernel : public framework::OpKernel<T> {
template <typename Tout = T>
void RunKernel(const framework::ExecutionContext& ctx) const {
auto& dev_ctx = ctx.template device_context<OneDNNContext>();
const auto& mkldnn_engine = dev_ctx.GetEngine();
const auto& onednn_engine = dev_ctx.GetEngine();
// Get Tensors
const auto* input = ctx.Input<phi::DenseTensor>("X");
......@@ -379,7 +380,7 @@ class FusionLSTMMKLDNNKernel : public framework::OpKernel<T> {
LSTMMKLDNNHandler<T, Tout> handler(
ctx,
dev_ctx,
mkldnn_engine,
onednn_engine,
ctx.GetPlace(),
input,
weight_h,
......@@ -474,7 +475,7 @@ class FusionLSTMMKLDNNKernel : public framework::OpKernel<T> {
namespace ops = paddle::operators;
REGISTER_OP_KERNEL(fusion_lstm,
MKLDNN,
paddle::platform::CPUPlace,
phi::CPUPlace,
ops::FusionLSTMMKLDNNKernel<float>,
ops::FusionLSTMMKLDNNKernel<paddle::platform::bfloat16>,
ops::FusionLSTMMKLDNNKernel<uint8_t>);
......@@ -14,7 +14,8 @@ limitations under the License. */
#pragma once
#include "paddle/fluid/platform/mkldnn_reuse.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/phi/backends/onednn/onednn_reuse.h"
namespace paddle {
namespace operators {
......@@ -22,13 +23,14 @@ namespace operators {
using phi::funcs::CreateKey;
using phi::funcs::OneDNNGetDataType;
using phi::funcs::RNNReorderType;
using OneDNNMemoryFormat = dnnl::memory::format_tag;
template <typename T, typename T_alg, typename T_out = T>
class RNNMKLDNNHandler : public phi::funcs::OneDNNHandlerT<T, T_alg> {
public:
RNNMKLDNNHandler(const paddle::framework::ExecutionContext& ctx,
const phi::OneDNNContext& dev_ctx,
const dnnl::engine mkldnn_engine,
const dnnl::engine onednn_engine,
platform::Place cpu_place,
const phi::DenseTensor* input,
const phi::DenseTensor* weight_h,
......
......@@ -18,10 +18,10 @@ limitations under the License. */
#include "dnnl.hpp" // NOLINT
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/fused/multi_gru_op.h"
#include "paddle/fluid/platform/errors.h"
#include "paddle/fluid/platform/mkldnn_reuse.h"
#include "paddle/phi/backends/onednn/onednn_reuse.h"
namespace paddle {
namespace operators {
......@@ -31,6 +31,7 @@ using phi::funcs::OneDNNGetDataType;
using phi::funcs::OneDNNMemDesc;
using Direction = dnnl::rnn_direction;
using phi::OneDNNContext;
using OneDNNMemoryFormat = dnnl::memory::format_tag;
namespace {
......@@ -721,6 +722,6 @@ class MultiGRUMKLDNNKernel : public framework::OpKernel<T> {
namespace ops = paddle::operators;
REGISTER_OP_KERNEL(multi_gru,
MKLDNN,
paddle::platform::CPUPlace,
phi::CPUPlace,
ops::MultiGRUMKLDNNKernel<float>,
ops::MultiGRUMKLDNNKernel<uint8_t>);
......@@ -14,11 +14,10 @@ limitations under the License. */
#include "paddle/fluid/operators/dequantize_op.h"
#include "paddle/fluid/framework/data_layout_transform.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/errors.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/platform/mkldnn_reuse.h"
#include "paddle/phi/backends/onednn/onednn_reuse.h"
#include "paddle/phi/core/errors.h"
namespace paddle {
namespace operators {
......@@ -39,11 +38,11 @@ class DeQuantOpKernel : public framework::OpKernel<T> {
auto* out = ctx.Output<phi::DenseTensor>("Output");
PADDLE_ENFORCE(quantization_scale != 0.0f,
platform::errors::InvalidArgument(
phi::errors::InvalidArgument(
"Dequantization scale must be different than 0.0f"));
PADDLE_ENFORCE(quantization_shift <= 255 && quantization_shift >= 0,
platform::errors::InvalidArgument(
phi::errors::InvalidArgument(
"Dequantization shift must be lower or equal to ",
"255 and greater or equal to 0, but got %f",
quantization_shift));
......@@ -91,7 +90,7 @@ namespace ops = paddle::operators;
REGISTER_OP_KERNEL(dequantize,
MKLDNN,
::paddle::platform::CPUPlace,
::phi::CPUPlace,
ops::DeQuantOpKernel<uint8_t>,
ops::DeQuantOpKernel<int8_t>,
ops::DeQuantOpKernel<paddle::platform::bfloat16>);
......@@ -14,9 +14,10 @@ limitations under the License. */
#include <memory>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/fc_op.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/platform/mkldnn_reuse.h"
#include "paddle/phi/backends/onednn/onednn_reuse.h"
namespace paddle {
namespace operators {
......@@ -51,10 +52,10 @@ class FCMKLDNNHandler
const phi::DenseTensor* bias,
phi::DenseTensor* out,
const int in_num_col_dims,
dnnl::engine mkldnn_engine,
dnnl::engine onednn_engine,
platform::Place cpu_place)
: phi::funcs::OneDNNHandlerNoCachingT<T_in, dnnl::inner_product_forward>(
mkldnn_engine, cpu_place),
onednn_engine, cpu_place),
dev_ctx_(dev_ctx) {
this->memory_key_ = ctx.InputName("W");
......@@ -122,7 +123,7 @@ class FCMKLDNNHandler
post_operations.append_eltwise(
activation_scale, dnnl::algorithm::eltwise_relu, 0.0f, 0.0f);
}
platform::AppendActivation(ctx, post_operations, activation_scale);
AppendActivation(ctx, post_operations, activation_scale);
if (ctx.HasAttr("fused_output_scale")) {
float scale_alpha = ctx.Attr<float>("fused_output_scale");
......@@ -154,6 +155,59 @@ class FCMKLDNNHandler
}
}
void AppendActivation(const ExecutionContext& ctx,
dnnl::post_ops& post_ops, // NOLINT
float activation_scale = 1.0f) {
const auto invalid_attribute =
ctx.HasAttr("fuse_activation")
? ctx.Attr<std::string>("fuse_activation").empty()
: true;
if (invalid_attribute) return;
const auto fuse_activation = ctx.Attr<std::string>("fuse_activation");
const auto fuse_alpha =
ctx.HasAttr("fuse_alpha") ? ctx.Attr<float>("fuse_alpha") : 0.0f;
const auto fuse_beta =
ctx.HasAttr("fuse_beta") ? ctx.Attr<float>("fuse_beta") : 0.0f;
if (fuse_activation == "hard_sigmoid") {
post_ops.append_eltwise(activation_scale,
dnnl::algorithm::eltwise_linear,
fuse_alpha,
fuse_beta);
post_ops.append_eltwise(
activation_scale, dnnl::algorithm::eltwise_clip, 0.0f, 1.0f);
} else {
const std::unordered_map<std::string, dnnl::algorithm> activation_map = {
{"abs", dnnl::algorithm::eltwise_abs},
{"clip", dnnl::algorithm::eltwise_clip},
{"gelu", dnnl::algorithm::eltwise_gelu_erf},
{"gelu_erf", dnnl::algorithm::eltwise_gelu_erf},
{"gelu_tanh", dnnl::algorithm::eltwise_gelu_tanh},
{"hard_swish", dnnl::algorithm::eltwise_hardswish},
{"leaky_relu", dnnl::algorithm::eltwise_relu},
{"mish", dnnl::algorithm::eltwise_mish},
{"relu", dnnl::algorithm::eltwise_relu},
{"relu6", dnnl::algorithm::eltwise_bounded_relu},
{"sigmoid", dnnl::algorithm::eltwise_logistic},
{"sqrt", dnnl::algorithm::eltwise_sqrt},
{"swish", dnnl::algorithm::eltwise_swish},
{"tanh", dnnl::algorithm::eltwise_tanh}};
const auto& activation_type = activation_map.find(fuse_activation);
PADDLE_ENFORCE_NE(
activation_type,
activation_map.end(),
platform::errors::InvalidArgument(
"Activation '%s' not found in oneDNN algorithms mapper",
fuse_activation));
post_ops.append_eltwise(
activation_scale, activation_type->second, fuse_alpha, fuse_beta);
}
}
// Correct output scale, to take into account scaling of input and weights
// Since the data that comes out of input and weight multiplication is
// scaled with its own scales, this data needs to be divided by
......@@ -396,10 +450,76 @@ class FCMKLDNNKernel : public framework::OpKernel<T_in> {
}
}
void SetOutMemDescWithUnsqueeze2FuseSupport(
const framework::ExecutionContext& ctx,
phi::DenseTensor* out,
const dnnl::memory::desc& out_md) const {
const std::vector<int>& fused_unsqueeze2_axes =
ctx.Attr<std::vector<int>>("fused_unsqueeze2_axes");
const std::vector<int64_t>& op_tz = out_md.dims();
std::vector<int64_t> unsqueezed_op_tz(
op_tz.size() + fused_unsqueeze2_axes.size(), 0);
for (const auto& axis : fused_unsqueeze2_axes) {
int positive_axis = axis < 0 ? unsqueezed_op_tz.size() + axis : axis;
unsqueezed_op_tz[positive_axis] = 1;
}
int j = 0;
for (size_t i = 0; i < unsqueezed_op_tz.size(); ++i) {
if (unsqueezed_op_tz[i] == 0) {
unsqueezed_op_tz[i] = op_tz[j++];
}
}
out->set_mem_desc(out_md.reshape(unsqueezed_op_tz));
out->Resize(phi::make_ddim(unsqueezed_op_tz));
}
void SetOutMemDescWithReshape2FuseSupport(
const framework::ExecutionContext& ctx,
phi::DenseTensor* out,
const dnnl::memory::desc& out_md) const {
std::vector<int64_t> fused_reshape2_shape(
ctx.Attr<std::vector<int>>("fused_reshape2_shape").begin(),
ctx.Attr<std::vector<int>>("fused_reshape2_shape").end());
const int out_shape_numel = out->numel();
const int new_shape_numel = std::accumulate(fused_reshape2_shape.begin(),
fused_reshape2_shape.end(),
1,
std::multiplies<int64_t>());
for (size_t i = 0; i < fused_reshape2_shape.size(); ++i) {
if (fused_reshape2_shape[i] == -1) {
fused_reshape2_shape[i] = -out_shape_numel / new_shape_numel;
break;
}
}
out->set_mem_desc(out_md.reshape(fused_reshape2_shape));
out->Resize(phi::make_ddim(fused_reshape2_shape));
}
void SetOutMemDescWithLogicalLayoutFusesSupport(
const framework::ExecutionContext& ctx,
phi::DenseTensor* out,
const dnnl::memory::desc& out_md) const {
if (ctx.HasAttr("fused_unsqueeze2_axes")) {
SetOutMemDescWithUnsqueeze2FuseSupport(ctx, out, out_md);
} else if (ctx.HasAttr("fused_reshape2_shape")) {
SetOutMemDescWithReshape2FuseSupport(ctx, out, out_md);
} else if (ctx.HasAttr("fused_squeeze2_axes")) {
out->set_mem_desc(out_md);
out->Resize(phi::make_ddim(out_md.dims()));
} else {
out->set_mem_desc(out_md);
}
}
template <typename T_out, typename T_w>
void RunKernel(const framework::ExecutionContext& ctx) const {
const auto& dev_ctx = ctx.template device_context<OneDNNContext>();
const auto& mkldnn_engine = dev_ctx.GetEngine();
const auto& onednn_engine = dev_ctx.GetEngine();
const auto* x = ctx.Input<phi::DenseTensor>("Input");
const auto* weights = ctx.Input<phi::DenseTensor>("W");
......@@ -433,7 +553,7 @@ class FCMKLDNNKernel : public framework::OpKernel<T_in> {
inner_product_cache->inner_product_p);
src_memory_p =
std::make_shared<dnnl::memory>(inner_product_cache->src_mem);
PrepareSrcMem(fc_p, src_memory_p, x, mkldnn_engine);
PrepareSrcMem(fc_p, src_memory_p, x, onednn_engine);
weights_memory_p =
std::make_shared<dnnl::memory>(inner_product_cache->weights_mem);
......@@ -463,7 +583,7 @@ class FCMKLDNNKernel : public framework::OpKernel<T_in> {
bias,
out,
in_col_dims,
mkldnn_engine,
onednn_engine,
ctx.GetPlace());
src_memory_p = handler.AcquireSrcMemoryWithReorder(x);
......@@ -504,7 +624,7 @@ class FCMKLDNNKernel : public framework::OpKernel<T_in> {
dev_ctx.SetBlob(cache_key, ip_cache);
}
platform::SetOutMemDescWithLogicalLayoutFusesSupport(
SetOutMemDescWithLogicalLayoutFusesSupport(
ctx,
out,
dst_memory_p->get_desc().reshape(phi::vectorize(out->dims())));
......@@ -541,7 +661,7 @@ namespace ops = paddle::operators;
REGISTER_OP_KERNEL(fc,
MKLDNN,
::paddle::platform::CPUPlace,
::phi::CPUPlace,
ops::FCMKLDNNKernel<float>,
ops::FCMKLDNNKernel<paddle::platform::bfloat16>,
ops::FCMKLDNNKernel<uint8_t>,
......
......@@ -12,9 +12,8 @@
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/data_layout_transform.h"
#include "paddle/fluid/operators/interpolate_op.h"
#include "paddle/fluid/platform/mkldnn_reuse.h"
#include "paddle/phi/backends/onednn/onednn_reuse.h"
namespace paddle {
namespace operators {
......@@ -25,6 +24,7 @@ using dnnl::reorder;
using dnnl::resampling_forward;
using dnnl::stream;
using phi::DataLayout;
using OneDNNMemoryFormat = dnnl::memory::format_tag;
template <typename T = float>
class InterpolateOneDNNHandler
......@@ -131,7 +131,7 @@ class InterpolateOneDNNKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
const auto& dev_ctx = ctx.template device_context<phi::OneDNNContext>();
const auto& mkldnn_engine = dev_ctx.GetEngine();
const auto& onednn_engine = dev_ctx.GetEngine();
const auto* x = ctx.Input<phi::DenseTensor>("X");
auto* out = ctx.Output<phi::DenseTensor>("Out");
......@@ -146,7 +146,7 @@ class InterpolateOneDNNKernel : public framework::OpKernel<T> {
out->Resize(dim_out);
InterpolateOneDNNHandler<T> handler(
algo, mkldnn_engine, ctx.GetPlace(), x, out);
algo, onednn_engine, ctx.GetPlace(), x, out);
auto src_memory_p = handler.AcquireSrcMemory(x);
auto dst_memory_p = handler.AcquireDstMemory(out);
......@@ -170,11 +170,11 @@ namespace ops = paddle::operators;
REGISTER_OP_KERNEL(nearest_interp,
MKLDNN,
::paddle::platform::CPUPlace,
::phi::CPUPlace,
ops::InterpolateOneDNNKernel<float>,
ops::InterpolateOneDNNKernel<int8_t>,
ops::InterpolateOneDNNKernel<uint8_t>);
REGISTER_OP_KERNEL(bilinear_interp,
MKLDNN,
::paddle::platform::CPUPlace,
::phi::CPUPlace,
ops::InterpolateOneDNNKernel<float>);
......@@ -13,7 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/mkldnn_reuse.h"
#include "paddle/phi/backends/onednn/onednn_reuse.h"
#include "paddle/phi/common/data_type.h"
namespace paddle {
......@@ -99,7 +100,7 @@ class LayerNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
const bool is_test = ctx.Attr<bool>("is_test");
auto& dev_ctx = ctx.template device_context<phi::OneDNNContext>();
const auto& mkldnn_engine = dev_ctx.GetEngine();
const auto& onednn_engine = dev_ctx.GetEngine();
auto src_tz = phi::vectorize(x->dims());
PADDLE_ENFORCE_EQ(begin_norm_axis,
......@@ -117,7 +118,7 @@ class LayerNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
}
LayerNormOneDNNHandler<T> handler(
src_tz, epsilon, flags, is_test, x, mkldnn_engine, ctx.GetPlace());
src_tz, epsilon, flags, is_test, x, onednn_engine, ctx.GetPlace());
auto src_memory = handler.AcquireSrcMemory(x);
auto dst_memory = handler.AcquireDstMemory(out);
......@@ -159,6 +160,6 @@ class LayerNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
namespace ops = paddle::operators;
REGISTER_OP_KERNEL(layer_norm,
MKLDNN,
::paddle::platform::CPUPlace,
::phi::CPUPlace,
ops::LayerNormMKLDNNOpKernel<float>,
ops::LayerNormMKLDNNOpKernel<paddle::platform::bfloat16>);
......@@ -12,7 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/mkldnn_reuse.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/phi/backends/onednn/onednn_reuse.h"
namespace paddle {
namespace operators {
......@@ -25,13 +26,13 @@ class LRNOneDNNHandler
OneDNNHandlerNoCachingT<T, dnnl::lrn_forward, dnnl::lrn_backward> {
public:
LRNOneDNNHandler(const framework::ExecutionContext& ctx,
const dnnl::engine mkldnn_engine,
const dnnl::engine onednn_engine,
platform::Place cpu_place,
const phi::DenseTensor* input)
: phi::funcs::
OneDNNHandlerNoCachingT<T, dnnl::lrn_forward, dnnl::lrn_backward>(
mkldnn_engine, cpu_place) {
onednn_engine, cpu_place) {
const int n = ctx.Attr<int>("n");
// MKL-DNN implements LRN in a caffe way:
// http://caffe.berkeleyvision.org/tutorial/layers/lrn.html
......@@ -56,14 +57,14 @@ class LRNOneDNNHandler
}
LRNOneDNNHandler(const framework::ExecutionContext& ctx,
const dnnl::engine mkldnn_engine,
const dnnl::engine onednn_engine,
platform::Place cpu_place,
const phi::DenseTensor* in_x,
const phi::DenseTensor* out_grad,
phi::DenseTensor* in_x_grad)
: phi::funcs::
OneDNNHandlerNoCachingT<T, dnnl::lrn_forward, dnnl::lrn_backward>(
mkldnn_engine, cpu_place) {
onednn_engine, cpu_place) {
PADDLE_ENFORCE_EQ(
ctx.Attr<bool>("is_test"),
false,
......@@ -125,13 +126,13 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
paddle::platform::errors::PreconditionNotMet(
"Operator DNNL LRN must use CPUPlace"));
auto& dev_ctx = ctx.template device_context<OneDNNContext>();
const auto& mkldnn_engine = dev_ctx.GetEngine();
const auto& onednn_engine = dev_ctx.GetEngine();
auto x = ctx.Input<phi::DenseTensor>("X");
auto out = ctx.Output<phi::DenseTensor>("Out");
auto mid = ctx.Output<phi::DenseTensor>("MidOut");
LRNOneDNNHandler<T> handler(ctx, mkldnn_engine, ctx.GetPlace(), x);
LRNOneDNNHandler<T> handler(ctx, onednn_engine, ctx.GetPlace(), x);
auto src_memory = handler.AcquireSrcMemory(x);
auto dst_memory = handler.AcquireDstMemory(out);
......@@ -179,10 +180,10 @@ class LRNMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
auto in_x_grad = ctx.Output<phi::DenseTensor>(framework::GradVarName("X"));
auto& dev_ctx = ctx.template device_context<OneDNNContext>();
const auto& mkldnn_engine = dev_ctx.GetEngine();
const auto& onednn_engine = dev_ctx.GetEngine();
LRNOneDNNHandler<T> handler(
ctx, mkldnn_engine, ctx.GetPlace(), in_x, out_grad, in_x_grad);
ctx, onednn_engine, ctx.GetPlace(), in_x, out_grad, in_x_grad);
auto src_memory = handler.AcquireSrcMemory(in_x);
auto workspace = handler.AcquireBackwardWorkspaceMemory(mid);
......@@ -207,11 +208,8 @@ class LRNMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
namespace ops = paddle::operators;
REGISTER_OP_KERNEL(lrn,
MKLDNN,
paddle::platform::CPUPlace,
ops::LRNMKLDNNOpKernel<float>);
REGISTER_OP_KERNEL(lrn, MKLDNN, phi::CPUPlace, ops::LRNMKLDNNOpKernel<float>);
REGISTER_OP_KERNEL(lrn_grad,
MKLDNN,
paddle::platform::CPUPlace,
phi::CPUPlace,
ops::LRNMKLDNNGradOpKernel<float>);
......@@ -14,14 +14,13 @@ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/mkldnn_reuse.h"
#include "paddle/phi/backends/onednn/onednn_reuse.h"
#include "paddle/phi/kernels/funcs/blas/blas.h"
namespace {
using dnnl::memory;
using paddle::framework::ExecutionContext;
using paddle::framework::GradVarName;
using paddle::platform::MatMulV2MKLDNNHandler;
using phi::OneDNNContext;
using phi::vectorize;
using phi::funcs::OneDNNGetDataType;
......@@ -82,6 +81,239 @@ phi::DDim GetDimForInput(const ExecutionContext &ctx, std::string input_name) {
return input_dims;
}
template <typename XT, typename YT, typename OT>
class MatMulV2MKLDNNHandler
: public phi::funcs::OneDNNHandlerNoCachingT<XT, dnnl::matmul> {
public:
MatMulV2MKLDNNHandler(const ExecutionContext &ctx,
const dnnl::engine engine,
paddle::platform::Place cpu_place,
const std::vector<int64_t> &x_org_dims,
bool trans_x,
const std::vector<int64_t> &y_org_dims,
bool trans_y,
bool is_output_fused,
const std::vector<int64_t> &x_strides_override,
const std::vector<int64_t> &y_strides_override)
: phi::funcs::OneDNNHandlerNoCachingT<XT, dnnl::matmul>(engine,
cpu_place) {
// M X K * K X N
std::vector<int64_t> x_dims(x_org_dims);
std::vector<int64_t> y_dims(y_org_dims);
const int MB_idx = x_dims.size() - 3;
const int H_idx = x_dims.size() - 2;
const int W_idx = x_dims.size() - 1;
if (trans_x) std::swap(x_dims[H_idx], x_dims[W_idx]);
if (trans_y) std::swap(y_dims[H_idx], y_dims[W_idx]);
const memory::dim M = x_dims[H_idx];
const memory::dim K = x_dims[W_idx];
const memory::dim N = y_dims[W_idx];
std::vector<int64_t> x_strides(x_dims.size() - 3, 1);
std::vector<int64_t> y_strides(x_dims.size() - 3, 1);
std::vector<int64_t> out_strides(x_dims.size() - 3, 1);
std::vector<int64_t> out_ddims(x_dims.size() - 3, 1);
x_strides.reserve(x_dims.size());
y_strides.reserve(x_dims.size());
out_strides.reserve(x_dims.size());
if (!x_strides_override.empty()) {
x_strides = x_strides_override;
} else {
if (!trans_x) {
x_strides.insert(x_strides.end(), {M * K, K, 1});
} else {
x_strides.insert(x_strides.end(), {M * K, 1, M});
}
}
if (!y_strides_override.empty()) {
y_strides = y_strides_override;
} else {
if (!trans_y) {
y_strides.insert(y_strides.end(), {N * K, N, 1});
} else {
y_strides.insert(y_strides.end(), {N * K, 1, K});
}
}
out_strides.insert(out_strides.end(), {M * N, N, 1});
out_ddims.insert(out_ddims.end(),
{std::max(x_dims[MB_idx], y_dims[MB_idx]), M, N});
for (int i = x_dims.size() - 4; i >= 0; --i) {
out_ddims[i] = std::max(x_dims[i], y_dims[i]);
if (x_strides_override.empty()) {
x_strides[i] = x_dims[i + 1] * x_strides[i + 1];
}
if (y_strides_override.empty()) {
y_strides[i] = y_dims[i + 1] * y_strides[i + 1];
}
out_strides[i] = out_ddims[i + 1] * out_strides[i + 1];
}
// TODO(jczaja): Why not for int8??
if (!phi::funcs::is_int8<OT>() && is_output_fused) {
out_strides = FakeTransposeStrides(out_ddims);
}
auto x_md =
memory::desc(x_dims, phi::funcs::OneDNNGetDataType<XT>(), x_strides);
auto y_md =
memory::desc(y_dims, phi::funcs::OneDNNGetDataType<YT>(), y_strides);
auto out_md = memory::desc(
out_ddims, phi::funcs::OneDNNGetDataType<OT>(), out_strides);
const dnnl::primitive_attr matmul_attrs = CreateMatmulAttrs(ctx);
this->AcquireForwardPrimitiveDescriptor(matmul_attrs, x_md, y_md, out_md);
}
void AppendActivation(const ExecutionContext &ctx,
dnnl::post_ops &post_ops, // NOLINT
float activation_scale = 1.0f) {
const auto invalid_attribute =
ctx.HasAttr("fuse_activation")
? ctx.Attr<std::string>("fuse_activation").empty()
: true;
if (invalid_attribute) return;
const auto fuse_activation = ctx.Attr<std::string>("fuse_activation");
const auto fuse_alpha =
ctx.HasAttr("fuse_alpha") ? ctx.Attr<float>("fuse_alpha") : 0.0f;
const auto fuse_beta =
ctx.HasAttr("fuse_beta") ? ctx.Attr<float>("fuse_beta") : 0.0f;
if (fuse_activation == "hard_sigmoid") {
post_ops.append_eltwise(activation_scale,
dnnl::algorithm::eltwise_linear,
fuse_alpha,
fuse_beta);
post_ops.append_eltwise(
activation_scale, dnnl::algorithm::eltwise_clip, 0.0f, 1.0f);
} else {
const std::unordered_map<std::string, dnnl::algorithm> activation_map = {
{"abs", dnnl::algorithm::eltwise_abs},
{"clip", dnnl::algorithm::eltwise_clip},
{"gelu", dnnl::algorithm::eltwise_gelu_erf},
{"gelu_erf", dnnl::algorithm::eltwise_gelu_erf},
{"gelu_tanh", dnnl::algorithm::eltwise_gelu_tanh},
{"hard_swish", dnnl::algorithm::eltwise_hardswish},
{"leaky_relu", dnnl::algorithm::eltwise_relu},
{"mish", dnnl::algorithm::eltwise_mish},
{"relu", dnnl::algorithm::eltwise_relu},
{"relu6", dnnl::algorithm::eltwise_bounded_relu},
{"sigmoid", dnnl::algorithm::eltwise_logistic},
{"sqrt", dnnl::algorithm::eltwise_sqrt},
{"swish", dnnl::algorithm::eltwise_swish},
{"tanh", dnnl::algorithm::eltwise_tanh}};
const auto &activation_type = activation_map.find(fuse_activation);
PADDLE_ENFORCE_NE(
activation_type,
activation_map.end(),
phi::errors::InvalidArgument(
"Activation '%s' not found in oneDNN algorithms mapper",
fuse_activation));
post_ops.append_eltwise(
activation_scale, activation_type->second, fuse_alpha, fuse_beta);
}
}
float ComputeOutputScale(const ExecutionContext &ctx) {
float alpha = ctx.HasAttr("alpha") ? ctx.Attr<float>("alpha") : 1.0f;
if (ctx.HasAttr("Scale_x") && ctx.HasAttr("Scale_y") &&
ctx.HasAttr("Scale_out")) {
float scale_x = ctx.Attr<float>("Scale_x");
float scale_y = ctx.Attr<float>("Scale_y");
bool force_fp32_out = ctx.HasAttr("force_fp32_output")
? ctx.Attr<bool>("force_fp32_output")
: false;
float scale_out = force_fp32_out ? 1.f : ctx.Attr<float>("Scale_out");
alpha *= scale_out / (scale_x * scale_y);
}
return alpha;
}
dnnl::primitive_attr CreateMatmulAttrs(const ExecutionContext &ctx) {
dnnl::primitive_attr matmul_attrs;
dnnl::post_ops post_operations;
float scale_out = ComputeOutputScale(ctx);
if (scale_out != 1.0f) {
matmul_attrs.set_output_scales(0, {scale_out});
}
if (ctx.HasInput("ResidualData")) {
auto *residual_data = ctx.Input<phi::DenseTensor>("ResidualData");
auto residual_data_tz = phi::vectorize(residual_data->dims());
auto residual_data_md = memory::desc(residual_data_tz,
phi::funcs::OneDNNGetDataType<OT>(),
dnnl::memory::format_tag::any);
post_operations.append_binary(dnnl::algorithm::binary_add,
residual_data_md);
if (ctx.HasAttr("Scale_in_eltwise")) {
float sum_scale = scale_out / ctx.Attr<float>("Scale_in_eltwise");
post_operations.append_sum(sum_scale);
}
}
AppendActivation(ctx, post_operations);
if (ctx.HasAttr("fused_output_scale")) {
float scale_alpha = ctx.Attr<float>("fused_output_scale");
post_operations.append_eltwise(
1.0, dnnl::algorithm::eltwise_linear, scale_alpha, 0.0f);
}
matmul_attrs.set_post_ops(post_operations);
return matmul_attrs;
}
std::vector<int64_t> FakeTransposeStrides(
const std::vector<int64_t> &matmul_out_dims) const {
// fuse matmul_v2 + transpose + reshape guarantees that output is 4D and
// transpose axis are: {0, 2, 1, 3}
std::vector<int64_t> transpose_axis = {0, 2, 1, 3};
std::vector<int64_t> fake_strides(transpose_axis.size());
int ndims = static_cast<int>(transpose_axis.size());
int total_stride = 1;
for (int i = ndims - 1; i >= 0; --i) {
fake_strides[transpose_axis[i]] = total_stride;
total_stride *= matmul_out_dims[transpose_axis[i]];
}
return fake_strides;
}
std::shared_ptr<memory> AcquireWeightsMemory(const phi::DenseTensor *input) {
const YT *input_data = input->data<YT>();
return this->AcquireMemoryFromPrimitive(
this->fwd_pd_->weights_desc(),
phi::funcs::to_void_cast<YT>(input_data));
}
std::shared_ptr<dnnl::memory> AcquireDstMemory(phi::DenseTensor *output) {
// We cannot use base AcquireDstMemory as it makes an allocation request
// base on DST memory primitive size. This is fine in general, but in MatMul
// we have primitive that covers only one batch of Data and then shift
// pointer for every new batch. Hence phi::DenseTensor size is bigger that
// dst memory primitive size. So would we request less memory that is there
// and it triggers an assertion. So as there is no 'any' format here we can
// leave default size of phi::DenseTensor as computed in ComputeInferShape
OT *ptr = output->mutable_data<OT>(this->place_);
return this->AcquireMemoryFromPrimitive(this->fwd_pd_->dst_desc(), ptr);
}
};
template <typename XT, typename YT, typename OT>
class MatMulMKLDNNHandler
: public phi::funcs::OneDNNHandlerNoCachingT<XT, dnnl::matmul> {
......@@ -696,7 +928,7 @@ class MatMulGradMKLDNNKernel : public paddle::framework::OpKernel<T> {
REGISTER_OP_KERNEL(matmul,
MKLDNN,
::paddle::platform::CPUPlace,
::phi::CPUPlace,
MatMulMKLDNNKernel<float>,
MatMulMKLDNNKernel<paddle::platform::bfloat16>,
MatMulMKLDNNKernel<int8_t>,
......@@ -704,6 +936,6 @@ REGISTER_OP_KERNEL(matmul,
REGISTER_OP_KERNEL(matmul_grad,
MKLDNN,
::paddle::platform::CPUPlace,
::phi::CPUPlace,
MatMulGradMKLDNNKernel<float>,
MatMulGradMKLDNNKernel<paddle::platform::bfloat16>);
......@@ -14,10 +14,10 @@ limitations under the License. */
#include "paddle/fluid/operators/quantize_op.h"
#include "paddle/fluid/framework/data_layout_transform.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/platform/mkldnn_reuse.h"
#include "paddle/phi/backends/onednn/onednn_reuse.h"
namespace paddle {
namespace operators {
......@@ -106,5 +106,5 @@ namespace ops = paddle::operators;
REGISTER_OP_KERNEL(quantize,
MKLDNN,
::paddle::platform::CPUPlace,
::phi::CPUPlace,
ops::QuantOpKernel<float>);
......@@ -14,7 +14,6 @@ limitations under the License. */
#include <iterator> // NOLINT
#include "dnnl.hpp" // NOLINT
#include "paddle/fluid/framework/data_layout_transform.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/operators/requantize_op.h"
#include "paddle/phi/backends/onednn/onednn_helper.h"
......@@ -115,7 +114,7 @@ namespace ops = paddle::operators;
REGISTER_OP_KERNEL(requantize,
MKLDNN,
::paddle::platform::CPUPlace,
::phi::CPUPlace,
ops::ReQuantOpKernel<int8_t>,
ops::ReQuantOpKernel<uint8_t>,
ops::ReQuantOpKernel<paddle::platform::bfloat16>);
......@@ -12,9 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/flatten_op.h"
#include "paddle/fluid/operators/squeeze_op.h"
#include "paddle/fluid/platform/mkldnn_reuse.h"
#include "paddle/phi/backends/onednn/onednn_reuse.h"
namespace {
enum class ReshapeKernelOpName {
......@@ -357,7 +358,7 @@ namespace ops = paddle::operators;
REGISTER_OP_KERNEL(
squeeze,
MKLDNN,
paddle::platform::CPUPlace,
phi::CPUPlace,
ops::ReshapeMKLDNNKernel<float, ReshapeKernelOpName::squeeze>,
ops::ReshapeMKLDNNKernel<paddle::platform::bfloat16,
ReshapeKernelOpName::squeeze>);
......@@ -365,7 +366,7 @@ REGISTER_OP_KERNEL(
REGISTER_OP_KERNEL(
squeeze_grad,
MKLDNN,
paddle::platform::CPUPlace,
phi::CPUPlace,
ops::ReshapeGradMKLDNNKernel<float, ReshapeKernelOpName::squeeze>,
ops::ReshapeGradMKLDNNKernel<paddle::platform::bfloat16,
ReshapeKernelOpName::squeeze>);
......@@ -373,7 +374,7 @@ REGISTER_OP_KERNEL(
REGISTER_OP_KERNEL(
reshape,
MKLDNN,
paddle::platform::CPUPlace,
phi::CPUPlace,
ops::ReshapeMKLDNNKernel<float, ReshapeKernelOpName::reshape>,
ops::ReshapeMKLDNNKernel<paddle::platform::bfloat16,
ReshapeKernelOpName::reshape>);
......@@ -381,7 +382,7 @@ REGISTER_OP_KERNEL(
REGISTER_OP_KERNEL(
reshape_grad,
MKLDNN,
paddle::platform::CPUPlace,
phi::CPUPlace,
ops::ReshapeGradMKLDNNKernel<float, ReshapeKernelOpName::reshape>,
ops::ReshapeGradMKLDNNKernel<paddle::platform::bfloat16,
ReshapeKernelOpName::reshape>);
......@@ -389,7 +390,7 @@ REGISTER_OP_KERNEL(
REGISTER_OP_KERNEL(
reshape2_grad,
MKLDNN,
paddle::platform::CPUPlace,
phi::CPUPlace,
ops::ReshapeGradMKLDNNKernel<float, ReshapeKernelOpName::reshape2>,
ops::ReshapeGradMKLDNNKernel<paddle::platform::bfloat16,
ReshapeKernelOpName::reshape2>);
......@@ -397,7 +398,7 @@ REGISTER_OP_KERNEL(
REGISTER_OP_KERNEL(
flatten,
MKLDNN,
paddle::platform::CPUPlace,
phi::CPUPlace,
ops::ReshapeMKLDNNKernel<float, ReshapeKernelOpName::flatten>,
ops::ReshapeMKLDNNKernel<paddle::platform::bfloat16,
ReshapeKernelOpName::flatten>);
......@@ -405,7 +406,7 @@ REGISTER_OP_KERNEL(
REGISTER_OP_KERNEL(
flatten_grad,
MKLDNN,
paddle::platform::CPUPlace,
phi::CPUPlace,
ops::ReshapeGradMKLDNNKernel<float, ReshapeKernelOpName::flatten>,
ops::ReshapeGradMKLDNNKernel<paddle::platform::bfloat16,
ReshapeKernelOpName::flatten>);
......@@ -413,7 +414,7 @@ REGISTER_OP_KERNEL(
REGISTER_OP_KERNEL(
flatten2,
MKLDNN,
paddle::platform::CPUPlace,
phi::CPUPlace,
ops::ReshapeMKLDNNKernel<float, ReshapeKernelOpName::flatten2>,
ops::ReshapeMKLDNNKernel<paddle::platform::bfloat16,
ReshapeKernelOpName::flatten2>);
......@@ -421,7 +422,7 @@ REGISTER_OP_KERNEL(
REGISTER_OP_KERNEL(
flatten2_grad,
MKLDNN,
paddle::platform::CPUPlace,
phi::CPUPlace,
ops::ReshapeGradMKLDNNKernel<float, ReshapeKernelOpName::flatten2>,
ops::ReshapeGradMKLDNNKernel<paddle::platform::bfloat16,
ReshapeKernelOpName::flatten2>);
......@@ -12,7 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/mkldnn_reuse.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/phi/backends/onednn/onednn_reuse.h"
namespace paddle {
namespace operators {
......@@ -38,7 +39,7 @@ class ShuffleChannelMKLDNNKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
const auto& dev_ctx = ctx.template device_context<phi::OneDNNContext>();
const auto& mkldnn_engine = dev_ctx.GetEngine();
const auto& onednn_engine = dev_ctx.GetEngine();
const auto* x = ctx.Input<phi::DenseTensor>("X");
auto* out = ctx.Output<phi::DenseTensor>("Out");
......@@ -47,7 +48,7 @@ class ShuffleChannelMKLDNNKernel : public framework::OpKernel<T> {
const int group = x->dims()[1] / ctx.Attr<int>("group");
ShuffleChannelMKLDNNHandler<T> handler(
x, group, mkldnn_engine, ctx.GetPlace());
x, group, onednn_engine, ctx.GetPlace());
auto src_memory_p = handler.AcquireSrcMemory(x);
auto dst_memory_p = handler.AcquireDstMemory(out);
......@@ -69,6 +70,6 @@ class ShuffleChannelMKLDNNKernel : public framework::OpKernel<T> {
namespace ops = paddle::operators;
REGISTER_OP_KERNEL(shuffle_channel,
MKLDNN,
paddle::platform::CPUPlace,
phi::CPUPlace,
ops::ShuffleChannelMKLDNNKernel<float>,
ops::ShuffleChannelMKLDNNKernel<paddle::platform::bfloat16>);
......@@ -22,9 +22,8 @@
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/phi/common/place.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/kernel_registry.h"
USE_OP_ITSELF(elementwise_add);
......@@ -51,7 +50,7 @@ class CacheTester {
CacheTester() {
// Clear oneDNN cache
auto &pool = platform::DeviceContextPool::Instance();
platform::CPUPlace place;
phi::CPUPlace place;
onednn_dev_ctx_ = dynamic_cast<phi::OneDNNContext *>(pool.Get(place));
onednn_dev_ctx_->ResetBlobMap(nullptr);
}
......@@ -140,7 +139,7 @@ void RunOperator(const platform::Place &place,
TEST(test_conv2d_reuse_cache, cpu_place) {
framework::DDim dims({1, 16, 32, 64});
platform::CPUPlace p;
phi::CPUPlace p;
CacheTester ct;
RunOperator<float>(p, "conv2d", dims, "input_signal");
RunOperator<float>(p, "conv2d", dims, "input_signal");
......@@ -152,7 +151,7 @@ TEST(test_conv2d_reuse_cache, cpu_place) {
TEST(test_conv2d_noreuse_cache, cpu_place) {
framework::DDim dims({1, 16, 32, 64});
platform::CPUPlace p;
phi::CPUPlace p;
CacheTester ct;
RunOperator<float>(p, "conv2d", dims, "input_signal");
RunOperator<float>(p, "conv2d", dims, "input_signal2");
......
......@@ -22,9 +22,8 @@
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/phi/common/place.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/kernel_registry.h"
USE_OP_ITSELF(elementwise_add);
......@@ -137,13 +136,13 @@ bool TestMain(const platform::Place &place,
TEST(test_softmax_inplace, cpu_place) {
framework::DDim dims({32, 64});
platform::CPUPlace p;
phi::CPUPlace p;
ASSERT_TRUE(TestMain<float>(p, "softmax", dims, 1));
}
TEST(test_relu_inplace, cpu_place) {
framework::DDim dims({1, 12, 20, 20});
platform::CPUPlace p;
phi::CPUPlace p;
ASSERT_TRUE(TestMain<float>(p, "relu", dims, 1));
}
......
......@@ -22,9 +22,8 @@
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/phi/common/place.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/kernel_registry.h"
USE_OP_ITSELF(pool2d);
......@@ -53,7 +52,7 @@ struct InputVars {
TEST(test_pool2d_transpose_nhwc, cpu_place) {
framework::DDim dims({1, 4, 8, 512}); // NHWC shape
framework::DDim expected_dims({1, 7, 512, 3}); // NHWC expected shape
platform::CPUPlace p;
phi::CPUPlace p;
framework::Scope scope;
InputVars input_name = {"x", scope.Var("x")->GetMutable<phi::DenseTensor>()};
......@@ -109,7 +108,7 @@ TEST(test_pool2d_transpose_nhwc, cpu_place) {
TEST(test_pool2d_relu_relu_nhwc, cpu_place) {
framework::DDim dims({1, 4, 8, 512}); // NHWC shape
framework::DDim expected_dims({1, 512, 3, 7}); // NCHW expected shape
platform::CPUPlace p;
phi::CPUPlace p;
framework::Scope scope;
InputVars input_name = {"x", scope.Var("x")->GetMutable<phi::DenseTensor>()};
......@@ -172,7 +171,7 @@ TEST(test_pool2d_relu_relu_nhwc, cpu_place) {
TEST(test_pool2d_shape_nhwc, cpu_place) {
framework::DDim dims({1, 4, 8, 512}); // NHWC shape
std::vector<int32_t> expected_dims{1, 3, 7, 512}; // NHWC expected shape
platform::CPUPlace p;
phi::CPUPlace p;
framework::Scope scope;
InputVars input_name = {"x", scope.Var("x")->GetMutable<phi::DenseTensor>()};
......@@ -227,7 +226,7 @@ TEST(test_pool2d_shape_nhwc, cpu_place) {
TEST(test_pool2d_crop_nhwc, cpu_place) {
framework::DDim dims({1, 4, 8, 512}); // NHWC shape
framework::DDim expected_dims({1, 3, 7, 512}); // NCHW expected shape
platform::CPUPlace p;
phi::CPUPlace p;
framework::Scope scope;
InputVars input_name = {"x", scope.Var("x")->GetMutable<phi::DenseTensor>()};
......
......@@ -12,10 +12,9 @@
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/data_layout_transform.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/memory/malloc.h"
#include "paddle/fluid/platform/mkldnn_reuse.h"
#include "paddle/phi/backends/onednn/onednn_reuse.h"
namespace paddle {
namespace operators {
......@@ -166,10 +165,10 @@ namespace ops = paddle::operators;
REGISTER_OP_KERNEL(transpose,
MKLDNN,
::paddle::platform::CPUPlace,
::phi::CPUPlace,
ops::TransposeMKLDNNOpKernel<float>);
REGISTER_OP_KERNEL(transpose_grad,
MKLDNN,
::paddle::platform::CPUPlace,
::phi::CPUPlace,
ops::TransposeMKLDNNGradOpKernel<float>);
......@@ -12,9 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
// NOTE(Ruibiao): Difficult to remove code from this header file because too
// many files rely on it through "mkldnn_reuse.h"
#pragma once
#include "paddle/fluid/framework/op_registry.h"
......
......@@ -23,11 +23,10 @@ limitations under the License. */
#include "dnnl.hpp" // NOLINT
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/phi/backends/onednn/onednn_helper.h"
#include "paddle/phi/common/place.h"
namespace paddle {
#ifdef PADDLE_WITH_MKLDNN
using OneDNNMemoryFormat = dnnl::memory::format_tag;
using phi::OneDNNContext;
#endif
namespace platform {
......
/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <algorithm>
#include <memory>
#include <sstream>
#include <string>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/data_layout_transform.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/pool_op.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/phi/backends/onednn/onednn_reuse.h"
namespace paddle {
namespace platform {
using memory = dnnl::memory;
static void AppendActivation(const framework::ExecutionContext& ctx,
dnnl::post_ops& post_ops, // NOLINT
float activation_scale = 1.0f) {
const auto invalid_attribute =
ctx.HasAttr("fuse_activation")
? ctx.Attr<std::string>("fuse_activation").empty()
: true;
if (invalid_attribute) return;
const auto fuse_activation = ctx.Attr<std::string>("fuse_activation");
const auto fuse_alpha =
ctx.HasAttr("fuse_alpha") ? ctx.Attr<float>("fuse_alpha") : 0.0f;
const auto fuse_beta =
ctx.HasAttr("fuse_beta") ? ctx.Attr<float>("fuse_beta") : 0.0f;
if (fuse_activation == "hard_sigmoid") {
post_ops.append_eltwise(activation_scale,
dnnl::algorithm::eltwise_linear,
fuse_alpha,
fuse_beta);
post_ops.append_eltwise(
activation_scale, dnnl::algorithm::eltwise_clip, 0.0f, 1.0f);
} else {
const std::unordered_map<std::string, dnnl::algorithm> activation_map = {
{"abs", dnnl::algorithm::eltwise_abs},
{"clip", dnnl::algorithm::eltwise_clip},
{"gelu", dnnl::algorithm::eltwise_gelu_erf},
{"gelu_erf", dnnl::algorithm::eltwise_gelu_erf},
{"gelu_tanh", dnnl::algorithm::eltwise_gelu_tanh},
{"hard_swish", dnnl::algorithm::eltwise_hardswish},
{"leaky_relu", dnnl::algorithm::eltwise_relu},
{"mish", dnnl::algorithm::eltwise_mish},
{"relu", dnnl::algorithm::eltwise_relu},
{"relu6", dnnl::algorithm::eltwise_bounded_relu},
{"sigmoid", dnnl::algorithm::eltwise_logistic},
{"sqrt", dnnl::algorithm::eltwise_sqrt},
{"swish", dnnl::algorithm::eltwise_swish},
{"tanh", dnnl::algorithm::eltwise_tanh}};
const auto& activation_type = activation_map.find(fuse_activation);
PADDLE_ENFORCE_NE(
activation_type,
activation_map.end(),
platform::errors::InvalidArgument(
"Activation '%s' not found in oneDNN algorithms mapper",
fuse_activation));
post_ops.append_eltwise(
activation_scale, activation_type->second, fuse_alpha, fuse_beta);
}
}
static void SetOutMemDescWithUnsqueeze2FuseSupport(
const framework::ExecutionContext& ctx,
phi::DenseTensor* out,
const dnnl::memory::desc& out_md) {
const std::vector<int>& fused_unsqueeze2_axes =
ctx.Attr<std::vector<int>>("fused_unsqueeze2_axes");
const std::vector<int64_t>& op_tz = out_md.dims();
std::vector<int64_t> unsqueezed_op_tz(
op_tz.size() + fused_unsqueeze2_axes.size(), 0);
for (const auto& axis : fused_unsqueeze2_axes) {
int positive_axis = axis < 0 ? unsqueezed_op_tz.size() + axis : axis;
unsqueezed_op_tz[positive_axis] = 1;
}
int j = 0;
for (size_t i = 0; i < unsqueezed_op_tz.size(); ++i) {
if (unsqueezed_op_tz[i] == 0) {
unsqueezed_op_tz[i] = op_tz[j++];
}
}
out->set_mem_desc(out_md.reshape(unsqueezed_op_tz));
out->Resize(phi::make_ddim(unsqueezed_op_tz));
}
static void SetOutMemDescWithReshape2FuseSupport(
const framework::ExecutionContext& ctx,
phi::DenseTensor* out,
const dnnl::memory::desc& out_md) {
std::vector<int64_t> fused_reshape2_shape(
ctx.Attr<std::vector<int>>("fused_reshape2_shape").begin(),
ctx.Attr<std::vector<int>>("fused_reshape2_shape").end());
const int out_shape_numel = out->numel();
const int new_shape_numel = std::accumulate(fused_reshape2_shape.begin(),
fused_reshape2_shape.end(),
1,
std::multiplies<int64_t>());
for (size_t i = 0; i < fused_reshape2_shape.size(); ++i) {
if (fused_reshape2_shape[i] == -1) {
fused_reshape2_shape[i] = -out_shape_numel / new_shape_numel;
break;
}
}
out->set_mem_desc(out_md.reshape(fused_reshape2_shape));
out->Resize(phi::make_ddim(fused_reshape2_shape));
}
static void SetOutMemDescWithLogicalLayoutFusesSupport(
const framework::ExecutionContext& ctx,
phi::DenseTensor* out,
const dnnl::memory::desc& out_md) {
if (ctx.HasAttr("fused_unsqueeze2_axes")) {
SetOutMemDescWithUnsqueeze2FuseSupport(ctx, out, out_md);
} else if (ctx.HasAttr("fused_reshape2_shape")) {
SetOutMemDescWithReshape2FuseSupport(ctx, out, out_md);
} else if (ctx.HasAttr("fused_squeeze2_axes")) {
out->set_mem_desc(out_md);
out->Resize(phi::make_ddim(out_md.dims()));
} else {
out->set_mem_desc(out_md);
}
}
template <typename XT, typename YT, typename OT>
class MatMulV2MKLDNNHandler
: public phi::funcs::OneDNNHandlerNoCachingT<XT, dnnl::matmul> {
public:
MatMulV2MKLDNNHandler(const framework::ExecutionContext& ctx,
const dnnl::engine engine,
paddle::platform::Place cpu_place,
const std::vector<int64_t>& x_org_dims,
bool trans_x,
const std::vector<int64_t>& y_org_dims,
bool trans_y,
bool is_output_fused,
const std::vector<int64_t>& x_strides_override,
const std::vector<int64_t>& y_strides_override)
: phi::funcs::OneDNNHandlerNoCachingT<XT, dnnl::matmul>(engine,
cpu_place) {
// M X K * K X N
std::vector<int64_t> x_dims(x_org_dims);
std::vector<int64_t> y_dims(y_org_dims);
const int MB_idx = x_dims.size() - 3;
const int H_idx = x_dims.size() - 2;
const int W_idx = x_dims.size() - 1;
if (trans_x) std::swap(x_dims[H_idx], x_dims[W_idx]);
if (trans_y) std::swap(y_dims[H_idx], y_dims[W_idx]);
const memory::dim M = x_dims[H_idx];
const memory::dim K = x_dims[W_idx];
const memory::dim N = y_dims[W_idx];
std::vector<int64_t> x_strides(x_dims.size() - 3, 1);
std::vector<int64_t> y_strides(x_dims.size() - 3, 1);
std::vector<int64_t> out_strides(x_dims.size() - 3, 1);
std::vector<int64_t> out_ddims(x_dims.size() - 3, 1);
x_strides.reserve(x_dims.size());
y_strides.reserve(x_dims.size());
out_strides.reserve(x_dims.size());
if (!x_strides_override.empty()) {
x_strides = x_strides_override;
} else {
if (!trans_x) {
x_strides.insert(x_strides.end(), {M * K, K, 1});
} else {
x_strides.insert(x_strides.end(), {M * K, 1, M});
}
}
if (!y_strides_override.empty()) {
y_strides = y_strides_override;
} else {
if (!trans_y) {
y_strides.insert(y_strides.end(), {N * K, N, 1});
} else {
y_strides.insert(y_strides.end(), {N * K, 1, K});
}
}
out_strides.insert(out_strides.end(), {M * N, N, 1});
out_ddims.insert(out_ddims.end(),
{std::max(x_dims[MB_idx], y_dims[MB_idx]), M, N});
for (int i = x_dims.size() - 4; i >= 0; --i) {
out_ddims[i] = std::max(x_dims[i], y_dims[i]);
if (x_strides_override.empty()) {
x_strides[i] = x_dims[i + 1] * x_strides[i + 1];
}
if (y_strides_override.empty()) {
y_strides[i] = y_dims[i + 1] * y_strides[i + 1];
}
out_strides[i] = out_ddims[i + 1] * out_strides[i + 1];
}
// TODO(jczaja): Why not for int8??
if (!phi::funcs::is_int8<OT>() && is_output_fused) {
out_strides = FakeTransposeStrides(out_ddims);
}
auto x_md =
memory::desc(x_dims, phi::funcs::OneDNNGetDataType<XT>(), x_strides);
auto y_md =
memory::desc(y_dims, phi::funcs::OneDNNGetDataType<YT>(), y_strides);
auto out_md = memory::desc(
out_ddims, phi::funcs::OneDNNGetDataType<OT>(), out_strides);
const dnnl::primitive_attr matmul_attrs = CreateMatmulAttrs(ctx);
this->AcquireForwardPrimitiveDescriptor(matmul_attrs, x_md, y_md, out_md);
}
float ComputeOutputScale(const framework::ExecutionContext& ctx) {
float alpha = ctx.HasAttr("alpha") ? ctx.Attr<float>("alpha") : 1.0f;
if (ctx.HasAttr("Scale_x") && ctx.HasAttr("Scale_y") &&
ctx.HasAttr("Scale_out")) {
float scale_x = ctx.Attr<float>("Scale_x");
float scale_y = ctx.Attr<float>("Scale_y");
bool force_fp32_out = ctx.HasAttr("force_fp32_output")
? ctx.Attr<bool>("force_fp32_output")
: false;
float scale_out = force_fp32_out ? 1.f : ctx.Attr<float>("Scale_out");
alpha *= scale_out / (scale_x * scale_y);
}
return alpha;
}
dnnl::primitive_attr CreateMatmulAttrs(
const framework::ExecutionContext& ctx) {
dnnl::primitive_attr matmul_attrs;
dnnl::post_ops post_operations;
float scale_out = ComputeOutputScale(ctx);
if (scale_out != 1.0f) {
matmul_attrs.set_output_scales(0, {scale_out});
}
if (ctx.HasInput("ResidualData")) {
auto* residual_data = ctx.Input<phi::DenseTensor>("ResidualData");
auto residual_data_tz = phi::vectorize(residual_data->dims());
auto residual_data_md = memory::desc(residual_data_tz,
phi::funcs::OneDNNGetDataType<OT>(),
dnnl::memory::format_tag::any);
post_operations.append_binary(dnnl::algorithm::binary_add,
residual_data_md);
if (ctx.HasAttr("Scale_in_eltwise")) {
float sum_scale = scale_out / ctx.Attr<float>("Scale_in_eltwise");
post_operations.append_sum(sum_scale);
}
}
AppendActivation(ctx, post_operations);
if (ctx.HasAttr("fused_output_scale")) {
float scale_alpha = ctx.Attr<float>("fused_output_scale");
post_operations.append_eltwise(
1.0, dnnl::algorithm::eltwise_linear, scale_alpha, 0.0f);
}
matmul_attrs.set_post_ops(post_operations);
return matmul_attrs;
}
std::vector<int64_t> FakeTransposeStrides(
const std::vector<int64_t>& matmul_out_dims) const {
// fuse matmul_v2 + transpose + reshape guarantees that output is 4D and
// transpose axis are: {0, 2, 1, 3}
std::vector<int64_t> transpose_axis = {0, 2, 1, 3};
std::vector<int64_t> fake_strides(transpose_axis.size());
int ndims = static_cast<int>(transpose_axis.size());
int total_stride = 1;
for (int i = ndims - 1; i >= 0; --i) {
fake_strides[transpose_axis[i]] = total_stride;
total_stride *= matmul_out_dims[transpose_axis[i]];
}
return fake_strides;
}
std::shared_ptr<memory> AcquireWeightsMemory(const phi::DenseTensor* input) {
const YT* input_data = input->data<YT>();
return this->AcquireMemoryFromPrimitive(
this->fwd_pd_->weights_desc(),
phi::funcs::to_void_cast<YT>(input_data));
}
std::shared_ptr<dnnl::memory> AcquireDstMemory(phi::DenseTensor* output) {
// We cannot use base AcquireDstMemory as it makes an allocation request
// base on DST memory primitive size. This is fine in general, but in MatMul
// we have primitive that covers only one batch of Data and then shift
// pointer for every new batch. Hence phi::DenseTensor size is bigger that
// dst memory primitive size. So would we request less memory that is there
// and it triggers an assertion. So as there is no 'any' format here we can
// leave default size of phi::DenseTensor as computed in ComputeInferShape
OT* ptr = output->mutable_data<OT>(this->place_);
return this->AcquireMemoryFromPrimitive(this->fwd_pd_->dst_desc(), ptr);
}
};
} // namespace platform
} // namespace paddle
......@@ -178,7 +178,7 @@ void ComputeINT8(const OneDNNContext& dev_ctx,
const std::string& unique_name =
dev_ctx.GetInputsName("Input")[0] + dev_ctx.GetInputsName("Filter")[0];
PD_VISIT_FLOAT_AND_INT8_TYPES(
filter->dtype(), "ConvMKLDNNHandlerT", ([&] {
filter->dtype(), "ConvOneDNNHandlerT", ([&] {
onednn::ConvOneDNNHandlerT<T, data_t, T_out> handler(dev_ctx,
onednn_engine,
dev_ctx.GetPlace(),
......
......@@ -40,7 +40,7 @@ class ConvOneDNNHandlerT
dnnl::convolution_backward_weights> {
public:
ConvOneDNNHandlerT(const OneDNNContext& dev_ctx,
const dnnl::engine mkldnn_engine,
const dnnl::engine onednn_engine,
Place cpu_place,
const phi::DenseTensor* input,
const phi::DenseTensor* filter,
......@@ -63,7 +63,7 @@ class ConvOneDNNHandlerT
dnnl::convolution_backward_data,
dnnl::convolution_backward_weights>(
dev_ctx,
mkldnn_engine,
onednn_engine,
cpu_place,
funcs::CreateKey(
dev_ctx, phi::vectorize(input->dims()), unique_name)) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册