未验证 提交 985f2a4a 编写于 作者: K kangguangli 提交者: GitHub

Transfer transfer_layout from fluid to phi (#45261)

* remove fluid kernel and activate phi kernel

* fix parameter error

* transfer mkldnn part

* modify header file path

* fix compile error

* transfer special case

* fix lod setting and special case for layout setting

* add testcase and refine code
上级 9382159d
...@@ -18,6 +18,10 @@ ...@@ -18,6 +18,10 @@
#include "paddle/phi/core/kernel_context.h" #include "paddle/phi/core/kernel_context.h"
#include "paddle/phi/core/kernel_factory.h" #include "paddle/phi/core/kernel_factory.h"
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/phi/backends/onednn/onednn_context.h"
#endif
namespace paddle { namespace paddle {
namespace framework { namespace framework {
namespace interpreter { namespace interpreter {
...@@ -200,11 +204,27 @@ std::shared_ptr<OperatorBase> TransferLayout(const std::string& var_name, ...@@ -200,11 +204,27 @@ std::shared_ptr<OperatorBase> TransferLayout(const std::string& var_name,
framework::Scope* local_scope, framework::Scope* local_scope,
bool is_fetch_v2) { bool is_fetch_v2) {
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
// NOTE(zhiqiu): hot fix, follow the same logic in DataCopy() in fetch_op.cc // NOTE(zhiqiu): hot fix, follow the same logic in DataCopy() in fetch_op.cc
if (in_layout == framework::DataLayout::kMKLDNN && if (in_layout == framework::DataLayout::kMKLDNN &&
var_name == framework::GradVarName("Filter") && is_fetch_v2) { var_name == framework::GradVarName("Filter") && is_fetch_v2) {
VLOG(4) << "Match special case(Filter && fetch_v2) " << var_name;
out_layout = framework::DataLayout::kNCHW; out_layout = framework::DataLayout::kNCHW;
} }
if (in_layout == framework::DataLayout::MKLDNN &&
out_layout != framework::DataLayout::MKLDNN) {
auto target_layout = phi::OneDNNContext::tls().get_cur_paddle_data_layout();
VLOG(4) << "TransDataLayoutFromMKLDNN: " << in_layout << "->"
<< target_layout;
if (out_layout == DataLayout::kNCHW &&
var_name == framework::GradVarName("Filter")) {
VLOG(4) << "Match special case(Filter) " << var_name;
target_layout = out_layout;
}
out_layout = target_layout;
}
#endif #endif
// 1. Generate new_var_name and Initialize it // 1. Generate new_var_name and Initialize it
......
...@@ -16,7 +16,11 @@ ...@@ -16,7 +16,11 @@
#include <string> #include <string>
#include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/phi/core/infermeta_utils.h"
#include "paddle/phi/infermeta/unary.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -37,34 +41,6 @@ class TransferLayoutOp : public framework::OperatorWithKernel { ...@@ -37,34 +41,6 @@ class TransferLayoutOp : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext *ctx) const override {
OP_INOUT_CHECK(ctx->HasInputs("X"), "Input", "X", "TransferLayout");
OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "TransferLayout");
auto dst_layout = ctx->Attrs().Get<int>("dst_layout");
auto low_bound = static_cast<int>(framework::DataLayout::kAnyLayout);
auto upper_bound = static_cast<int>(framework::DataLayout::kMKLDNN);
PADDLE_ENFORCE_GE(
dst_layout,
low_bound,
platform::errors::PreconditionNotMet(
"Required dst_layout >= %d, but received dst_layout = %d",
low_bound,
dst_layout));
PADDLE_ENFORCE_LE(
dst_layout,
upper_bound,
platform::errors::PreconditionNotMet(
"Required dst_layout <= %d, but received dst_layout = %d",
upper_bound,
dst_layout));
// TODO(Aurelius84): Out's ddim is different with X because they have
// different layout
ctx->SetOutputDim("Out", ctx->GetInputDim("X"));
ctx->ShareLoD("X", /*->*/ "Out");
}
protected: protected:
framework::OpKernelType GetExpectedKernelType( framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext &ctx) const override { const framework::ExecutionContext &ctx) const override {
...@@ -142,18 +118,18 @@ class TransferLayoutOpProtoMaker : public framework::OpProtoAndCheckerMaker { ...@@ -142,18 +118,18 @@ class TransferLayoutOpProtoMaker : public framework::OpProtoAndCheckerMaker {
namespace ops = paddle::operators; namespace ops = paddle::operators;
namespace plat = paddle::platform; namespace plat = paddle::platform;
DECLARE_INFER_SHAPE_FUNCTOR(transfer_layout,
TransferLayoutInferShapeFunctor,
PD_INFER_META(phi::TransferLayoutInferMeta));
REGISTER_OPERATOR( REGISTER_OPERATOR(
transfer_layout, transfer_layout,
ops::TransferLayoutOp, ops::TransferLayoutOp,
ops::TransferLayoutOpProtoMaker, ops::TransferLayoutOpProtoMaker,
ops::TransferLayoutInferVarType, ops::TransferLayoutInferVarType,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>, paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>); paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>,
TransferLayoutInferShapeFunctor);
// dtype is not important
REGISTER_OP_CPU_KERNEL_FUNCTOR(transfer_layout,
float,
ops::TransferLayoutKernel);
REGISTER_OP_VERSION(transfer_layout) REGISTER_OP_VERSION(transfer_layout)
.AddCheckpoint(R"ROC(refine transfer_layout, add src_layout attribute)ROC", .AddCheckpoint(R"ROC(refine transfer_layout, add src_layout attribute)ROC",
paddle::framework::compatible::OpVersionDesc().NewAttr( paddle::framework::compatible::OpVersionDesc().NewAttr(
......
...@@ -3649,11 +3649,13 @@ void TraceInferMeta( ...@@ -3649,11 +3649,13 @@ void TraceInferMeta(
} }
void TransferLayoutInferMeta(const MetaTensor& x, void TransferLayoutInferMeta(const MetaTensor& x,
DataLayout layout, int src_layout,
int dst_layout,
MetaTensor* out) { MetaTensor* out) {
out->set_dims(x.dims()); out->set_dims(x.dims());
out->set_dtype(x.dtype()); out->set_dtype(x.dtype());
out->set_layout(layout); out->set_layout(static_cast<DataLayout>(dst_layout));
out->share_lod(x);
} }
void TransposeInferMeta(const MetaTensor& x, void TransposeInferMeta(const MetaTensor& x,
......
...@@ -519,7 +519,8 @@ void TraceInferMeta( ...@@ -519,7 +519,8 @@ void TraceInferMeta(
const MetaTensor& x, int offset, int axis1, int axis2, MetaTensor* out); const MetaTensor& x, int offset, int axis1, int axis2, MetaTensor* out);
void TransferLayoutInferMeta(const MetaTensor& x, void TransferLayoutInferMeta(const MetaTensor& x,
DataLayout layout, int src_layout,
int dst_layout,
MetaTensor* out); MetaTensor* out);
void TransposeInferMeta(const MetaTensor& x, void TransposeInferMeta(const MetaTensor& x,
......
...@@ -66,7 +66,8 @@ set(COMMON_KERNEL_DEPS ...@@ -66,7 +66,8 @@ set(COMMON_KERNEL_DEPS
phi_dynload_warpctc phi_dynload_warpctc
sequence_padding sequence_padding
sequence_scale sequence_scale
fft) fft
phi_data_layout_transform)
set(COMMON_KERNEL_DEPS set(COMMON_KERNEL_DEPS
${COMMON_KERNEL_DEPS} ${COMMON_KERNEL_DEPS}
......
...@@ -17,6 +17,11 @@ math_library(segment_pooling) ...@@ -17,6 +17,11 @@ math_library(segment_pooling)
math_library(sequence2batch) math_library(sequence2batch)
math_library(matrix_solve DEPS dense_tensor eigen3 blas math_function) math_library(matrix_solve DEPS dense_tensor eigen3 blas math_function)
cc_library(
phi_data_layout_transform
SRCS data_layout_transform.cc
DEPS tensor)
if(WITH_GPU OR WITH_ROCM) if(WITH_GPU OR WITH_ROCM)
if(MKL_FOUND AND WITH_ONEMKL) if(MKL_FOUND AND WITH_ONEMKL)
math_library(fft spectral_op.cu DEPS dynload_cuda dynload_mklrt math_library(fft spectral_op.cu DEPS dynload_cuda dynload_mklrt
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/funcs/data_layout_transform.h"
#include "glog/logging.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"
#include "paddle/phi/backends/all_context.h"
#include "paddle/phi/backends/onednn/onednn_context.h"
#include "paddle/phi/common/bfloat16.h"
#include "paddle/phi/common/layout.h"
#include "paddle/phi/common/place.h"
#include "paddle/phi/core/dense_tensor.h"
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/phi/kernels/funcs/onednn/mkldnn_helper.h"
#include "paddle/phi/kernels/funcs/onednn/mkldnn_reuse.h"
#endif
namespace phi {
namespace funcs {
#ifdef PADDLE_WITH_MKLDNN
void* GetDataFromTensor(const DenseTensor& tensor,
dnnl::memory::data_type type) {
switch (type) {
case dnnl::memory::data_type::f32:
return to_void_cast(tensor.data<float>());
case dnnl::memory::data_type::s8:
return to_void_cast(tensor.data<int8_t>());
case dnnl::memory::data_type::u8:
return to_void_cast(tensor.data<unsigned char>());
case dnnl::memory::data_type::s32:
return to_void_cast(tensor.data<int32_t>());
case dnnl::memory::data_type::bf16:
return to_void_cast(tensor.data<dtype::bfloat16>());
default:
PADDLE_THROW(errors::InvalidArgument("Wrong mkldnn type provided."));
}
}
void innerTransDataLayoutFromMKLDNN(DataLayout in_layout,
DataLayout out_layout,
const DenseTensor& in,
DenseTensor* out,
Place place,
bool always_copy) {
// Set default as NCHW in case not specified
out_layout = out_layout == DataLayout::ANY ? DataLayout::NCHW : out_layout;
auto& pool = DeviceContextPool::Instance();
auto* dev_ctx = dynamic_cast<OneDNNContext*>(pool.Get(place));
auto& cpu_engine = dev_ctx->GetEngine();
auto in_tz = vectorize<int64_t>(in.dims());
auto out_tz = in_tz;
auto in_type = ToMKLDNNDataType(in.dtype());
PADDLE_ENFORCE_NE(
in_type,
MKLDNNDataType::undef,
errors::InvalidArgument("Input tensor type (%s) is not supported.",
in.dtype()));
auto out_format =
MKLDNNFormatForSize(in_tz.size(), ToMKLDNNFormat(out_layout));
dnnl::memory::desc out_mem_desc(out_tz, in_type, out_format);
// output tensor has the same dims as input. Reorder don't change dims
out->set_mem_desc(out_mem_desc);
out->Resize(in.dims());
if ((in.mem_desc() != out->mem_desc()) || always_copy) {
void* in_data = GetDataFromTensor(in, in_type);
ReorderMKLDNNHandler handler(in_tz, in.dtype(), in_type, cpu_engine);
auto reorder_src_memory_p =
handler.AcquireSrcMemory(in.mem_desc(), in_data);
auto reorder_dst_memory_p =
handler.AcquireDstMemory(out, out->mem_desc(), place);
auto reorder_p =
handler.AcquireReorder(reorder_dst_memory_p, reorder_src_memory_p);
auto& astream = OneDNNContext::tls().get_stream();
::paddle::platform::RecordEvent record_reorder(
"ext_reorder",
::paddle::platform::TracerEventType::UserDefined,
2,
::paddle::platform::EventRole::kUniqueOp);
reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p);
astream.wait();
} else {
out->ShareDataWith(in);
}
// For exepected NHWC data format we need to reshape the Output tensor
// As MKL-DNN description was in NCHW and paddle is expecting NHWC
MatchShapeToLayout(out, in_layout, out_layout);
out->set_layout(DataLayout::kNCHW);
VLOG(10) << "out->layout: " << out->layout() << " in->dims: " << in.dims()
<< " out->dims: " << out->dims();
// reset format since the out tensor will be feed to non-MKLDNN OPkernel
out->set_format(MKLDNNMemoryFormat::undef);
}
#endif
} // namespace funcs
} // namespace phi
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#ifdef PADDLE_WITH_MKLDNN
#include "dnnl.hpp" // NOLINT
#endif
#include "paddle/phi/common/data_type.h"
#include "paddle/phi/common/layout.h"
#include "paddle/phi/common/place.h"
#include "paddle/phi/core/dense_tensor.h"
namespace phi {
namespace funcs {
#ifdef PADDLE_WITH_MKLDNN
using MKLDNNDataType = dnnl::memory::data_type;
using MKLDNNMemoryFormat = dnnl::memory::format_tag;
inline MKLDNNMemoryFormat ToMKLDNNFormat(const DataLayout& layout) {
switch (layout) {
case DataLayout::NHWC:
return MKLDNNMemoryFormat::nhwc;
case DataLayout::NCHW:
return MKLDNNMemoryFormat::nchw;
case DataLayout::NCDHW:
return MKLDNNMemoryFormat::ncdhw;
case DataLayout::NDHWC:
return MKLDNNMemoryFormat::ndhwc;
default:
PADDLE_THROW(errors::InvalidArgument(
"Fail to convert layout %s to MKLDNN format.",
::paddle::framework::DataLayoutToString(layout)));
}
}
// Caution: proto::VarType::Type -> phi::DataType after transfer
inline MKLDNNDataType ToMKLDNNDataType(DataType type) {
static std::unordered_map<DataType, MKLDNNDataType> dict{
{DataType::FLOAT32, MKLDNNDataType::f32},
{DataType::INT8, MKLDNNDataType::s8},
{DataType::UINT8, MKLDNNDataType::u8},
{DataType::INT32, MKLDNNDataType::s32},
{DataType::BFLOAT16, MKLDNNDataType::bf16}};
auto iter = dict.find(type);
if (iter != dict.end()) return iter->second;
return MKLDNNDataType::undef;
}
void innerTransDataLayoutFromMKLDNN(DataLayout in_layout,
DataLayout out_layout,
const DenseTensor& in,
DenseTensor* out,
Place place,
bool always_copy = false);
void* GetDataFromTensor(const DenseTensor& tensor, MKLDNNDataType type);
#endif
} // namespace funcs
} // namespace phi
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "dnnl.hpp" // NOLINT
#include "glog/logging.h"
#include "paddle/phi/backends/onednn/onednn_context.h"
#include "paddle/phi/common/layout.h"
#include "paddle/phi/common/place.h"
#include "paddle/phi/core/dense_tensor.h"
namespace phi {
namespace funcs {
using MKLDNNMemoryFormat = dnnl::memory::format_tag;
using MKLDNNDataType = dnnl::memory::data_type;
template <typename Type>
void* to_void_cast(const Type* t) {
return static_cast<void*>(const_cast<Type*>(t));
}
inline MKLDNNMemoryFormat MKLDNNFormatForSize(size_t dims_size,
MKLDNNMemoryFormat data_format) {
if (dims_size == 1) {
return MKLDNNMemoryFormat::x;
} else if (dims_size == 2) {
return MKLDNNMemoryFormat::nc;
} else if (dims_size == 3) {
if (data_format == MKLDNNMemoryFormat::nchw) {
return MKLDNNMemoryFormat::ncw;
} else if (data_format == MKLDNNMemoryFormat::nhwc) {
return MKLDNNMemoryFormat::nwc;
}
} else if (dims_size == 4) {
if (data_format == MKLDNNMemoryFormat::goihw) {
return MKLDNNMemoryFormat::oihw;
}
} else if (dims_size == 5) {
if (data_format == MKLDNNMemoryFormat::goidhw) {
return MKLDNNMemoryFormat::oidhw;
}
if (data_format == MKLDNNMemoryFormat::nchw) {
return MKLDNNMemoryFormat::ncdhw;
} else if (data_format == MKLDNNMemoryFormat::nhwc) {
return MKLDNNMemoryFormat::ndhwc;
}
} else if (dims_size == 6) {
if (data_format == MKLDNNMemoryFormat::nchw) {
return MKLDNNMemoryFormat::abcdef;
}
}
return data_format;
}
inline void MatchShapeToLayout(DenseTensor* tensor_in,
DataLayout from,
DataLayout to) {
auto print_dims = [](const std::vector<int>& dims) {
std::ostringstream oss;
if (!dims.empty()) {
oss << "[";
// Convert all but the last element to avoid a trailing ","
std::copy(
dims.begin(), dims.end() - 1, std::ostream_iterator<int>(oss, ","));
// Now add the last element with no delimiter
oss << dims.back() << "]";
}
return oss.str();
};
// In these data layouts, channel dimension is either on 2nd position: nChw or
// at last nhwC, so for dim==2 these layouts are the same and nothing should
// be done. Similarly for dim==1 when you have just one possible combination.
if (tensor_in->dims().size() < 3) {
VLOG(3) << "Keeping MKLDNN/NHWC/NDHWC output_shape"
<< print_dims(phi::vectorize<int>(tensor_in->dims()));
return;
}
switch (from) {
case DataLayout::MKLDNN:
if ((to == DataLayout::NHWC) || (to == DataLayout::NDHWC)) {
auto dims = phi::vectorize<int>(tensor_in->dims());
std::rotate(dims.begin() + 1, dims.begin() + 2, dims.end());
tensor_in->Resize(phi::make_ddim(dims));
VLOG(3) << "Rotating Shape from: MKLDNN to: NHWC/NDHWC output_shape"
<< print_dims(dims);
}
break;
case DataLayout::NHWC:
case DataLayout::NDHWC:
if (to == DataLayout::MKLDNN) {
auto dims = phi::vectorize<int>(tensor_in->dims());
std::rotate(dims.begin() + 1, dims.end() - 1, dims.end());
tensor_in->Resize(phi::make_ddim(dims));
VLOG(3) << "Rotating Shape from: NHWC/NDHWC to: MKLDNN output_shape"
<< print_dims(dims);
}
break;
default:
break;
}
}
struct mkldnn_dummy_primitive {
struct primitive_desc {};
struct desc {};
};
inline dnnl::memory::desc MKLDNNMemDesc(const std::vector<int64_t>& dims,
dnnl::memory::data_type data_type,
MKLDNNMemoryFormat format) {
return dnnl::memory::desc({dims}, data_type, format);
}
} // namespace funcs
} // namespace phi
...@@ -20,11 +20,12 @@ limitations under the License. */ ...@@ -20,11 +20,12 @@ limitations under the License. */
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "paddle/fluid/framework/data_layout_transform.h" #include "paddle/fluid/platform/profiler/event_tracing.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/phi/backends/onednn/onednn_context.h" #include "paddle/phi/backends/onednn/onednn_context.h"
#include "paddle/phi/common/data_type.h"
#include "paddle/phi/common/place.h" #include "paddle/phi/common/place.h"
#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/kernels/funcs/onednn/mkldnn_helper.h"
namespace phi { namespace phi {
namespace funcs { namespace funcs {
...@@ -33,10 +34,12 @@ using user_function = std::function<std::shared_ptr<float>(const float*)>; ...@@ -33,10 +34,12 @@ using user_function = std::function<std::shared_ptr<float>(const float*)>;
using memory = dnnl::memory; using memory = dnnl::memory;
using Place = phi::Place; using Place = phi::Place;
using MKLDNNMemoryFormat = dnnl::memory::format_tag;
template <typename T, template <typename T,
typename TForward, typename TForward,
typename TBackward = paddle::platform::mkldnn_dummy_primitive, typename TBackward = mkldnn_dummy_primitive,
typename TBackward_params = paddle::platform::mkldnn_dummy_primitive> typename TBackward_params = mkldnn_dummy_primitive>
class MKLDNNHandlerNoCachingT { class MKLDNNHandlerNoCachingT {
public: public:
MKLDNNHandlerNoCachingT(dnnl::engine engine, Place cpu_place) MKLDNNHandlerNoCachingT(dnnl::engine engine, Place cpu_place)
...@@ -62,8 +65,8 @@ class MKLDNNHandlerNoCachingT { ...@@ -62,8 +65,8 @@ class MKLDNNHandlerNoCachingT {
std::shared_ptr<dnnl::memory> AcquireSrcMemory(const DenseTensor* input) { std::shared_ptr<dnnl::memory> AcquireSrcMemory(const DenseTensor* input) {
const T* input_data = input->data<T>(); const T* input_data = input->data<T>();
return this->AcquireMemoryFromPrimitive( return this->AcquireMemoryFromPrimitive(fwd_pd_->src_desc(),
fwd_pd_->src_desc(), paddle::platform::to_void_cast<T>(input_data)); to_void_cast<T>(input_data));
} }
template <typename T_out = T> template <typename T_out = T>
...@@ -81,16 +84,15 @@ class MKLDNNHandlerNoCachingT { ...@@ -81,16 +84,15 @@ class MKLDNNHandlerNoCachingT {
template <typename T_out = T> template <typename T_out = T>
std::shared_ptr<dnnl::memory> AcquireDstMemory(const DenseTensor* output) { std::shared_ptr<dnnl::memory> AcquireDstMemory(const DenseTensor* output) {
const T_out* output_data = output->data<T_out>(); const T_out* output_data = output->data<T_out>();
return this->AcquireMemoryFromPrimitive( return this->AcquireMemoryFromPrimitive(bwd_pd_->dst_desc(),
bwd_pd_->dst_desc(), to_void_cast<T_out>(output_data));
paddle::platform::to_void_cast<T_out>(output_data));
} }
std::shared_ptr<dnnl::memory> AcquireDiffDstMemory( std::shared_ptr<dnnl::memory> AcquireDiffDstMemory(
const DenseTensor* diffdst) { const DenseTensor* diffdst) {
const T* ptr = diffdst->data<T>(); const T* ptr = diffdst->data<T>();
return this->AcquireMemoryFromPrimitive( return this->AcquireMemoryFromPrimitive(bwd_pd_->diff_dst_desc(),
bwd_pd_->diff_dst_desc(), paddle::platform::to_void_cast<T>(ptr)); to_void_cast<T>(ptr));
} }
std::shared_ptr<dnnl::memory> AcquireDiffSrcMemory(DenseTensor* diffsrc) { std::shared_ptr<dnnl::memory> AcquireDiffSrcMemory(DenseTensor* diffsrc) {
...@@ -291,10 +293,110 @@ class ActivationMKLDNNHandler ...@@ -291,10 +293,110 @@ class ActivationMKLDNNHandler
std::shared_ptr<dnnl::memory> AcquireBackwardSrcMemory( std::shared_ptr<dnnl::memory> AcquireBackwardSrcMemory(
const DenseTensor* input) { const DenseTensor* input) {
const T* input_data = input->data<T>(); const T* input_data = input->data<T>();
return this->AcquireMemoryFromPrimitive( return this->AcquireMemoryFromPrimitive(this->bwd_pd_->src_desc(),
this->bwd_pd_->src_desc(), to_void_cast<T>(input_data));
paddle::platform::to_void_cast<T>(input_data)); }
};
class ReorderMKLDNNHandler {
public:
ReorderMKLDNNHandler(std::vector<int64_t>& dims, // NOLINT
DataType ptype,
dnnl::memory::data_type dtype,
dnnl::engine engine)
: dims_(dims),
ptype_(ptype),
ptype_dst_(ptype),
dtype_(dtype),
dtype_dst_(dtype),
engine_(engine) {}
ReorderMKLDNNHandler(std::vector<int64_t>& dims, // NOLINT
DataType ptype,
dnnl::memory::data_type dtype,
DataType ptype_dst,
dnnl::memory::data_type dtype_dst,
dnnl::engine engine)
: dims_(dims),
ptype_(ptype),
ptype_dst_(ptype_dst),
dtype_(dtype),
dtype_dst_(dtype_dst),
engine_(engine) {}
std::shared_ptr<dnnl::memory> AcquireSrcMemory(const dnnl::memory::desc& md,
void* ptr) {
return std::make_shared<dnnl::memory>(md, engine_, ptr);
}
std::shared_ptr<dnnl::memory> AcquireSrcMemory(const MKLDNNMemoryFormat& fmt,
void* ptr) {
auto md = dnnl::memory::desc(dims_, dtype_, fmt);
return std::make_shared<dnnl::memory>(md, engine_, ptr);
}
std::shared_ptr<dnnl::memory> AcquireSubmemory(
const std::vector<int64_t>& dims,
const std::vector<int64_t>& offset,
const std::shared_ptr<dnnl::memory>& mem_p) {
auto sub_md = mem_p->get_desc().submemory_desc(dims, {offset});
auto sub_mem_p = std::make_shared<dnnl::memory>(
sub_md, engine_, mem_p->get_data_handle());
return sub_mem_p;
}
std::shared_ptr<dnnl::memory> AcquireDstMemory(DenseTensor* output,
const MKLDNNMemoryFormat& fmt,
Place place) {
auto dst_md = MKLDNNMemDesc(dims_, dtype_dst_, fmt);
auto dst_data = output->mutable_data(place, ptype_dst_, dst_md.get_size());
return std::make_shared<dnnl::memory>(dst_md, engine_, dst_data);
} }
std::shared_ptr<dnnl::memory> AcquireDstMemory(
DenseTensor* output, const dnnl::memory::desc& src_md, Place place) {
if (ptype_dst_ == ptype_) {
auto dst_data =
output->mutable_data(place, ptype_dst_, src_md.get_size());
return std::make_shared<dnnl::memory>(src_md, engine_, dst_data);
} else {
auto dst_md = src_md;
dst_md.data.data_type = static_cast<dnnl_data_type_t>(dtype_dst_);
auto dst_data =
output->mutable_data(place, ptype_dst_, dst_md.get_size());
return std::make_shared<dnnl::memory>(dst_md, engine_, dst_data);
}
}
std::shared_ptr<dnnl::memory> AcquireDstMemory(
DenseTensor* output,
const std::vector<int64_t>& dims,
const MKLDNNMemoryFormat& fmt,
Place place) {
auto dst_md = MKLDNNMemDesc(dims, dtype_dst_, fmt);
auto dst_data = output->mutable_data(place, ptype_dst_, dst_md.get_size());
return std::make_shared<dnnl::memory>(dst_md, engine_, dst_data);
}
std::shared_ptr<dnnl::reorder> AcquireReorder(
std::shared_ptr<dnnl::memory> dst_memory_p,
std::shared_ptr<dnnl::memory> src_memory_p) {
return std::make_shared<dnnl::reorder>(*(src_memory_p), *(dst_memory_p));
}
std::shared_ptr<dnnl::reorder> AcquireReorder(
std::shared_ptr<dnnl::memory> dst_memory_p,
std::shared_ptr<dnnl::memory> src_memory_p,
const dnnl::primitive_attr& attrs) {
return std::make_shared<dnnl::reorder>(
*(src_memory_p), *(dst_memory_p), attrs);
}
private:
std::vector<int64_t> dims_;
DataType ptype_, ptype_dst_;
dnnl::memory::data_type dtype_, dtype_dst_;
dnnl::engine engine_;
}; };
} // namespace funcs } // namespace funcs
......
...@@ -14,11 +14,17 @@ limitations under the License. */ ...@@ -14,11 +14,17 @@ limitations under the License. */
#include "paddle/phi/kernels/transfer_layout_kernel.h" #include "paddle/phi/kernels/transfer_layout_kernel.h"
#include <sstream>
#include <string>
#include "paddle/phi/backends/all_context.h" #include "paddle/phi/backends/all_context.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/visit_type.h" #include "paddle/phi/core/visit_type.h"
#include "paddle/phi/kernels/funcs/data_layout_transform.h"
#include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/math_function.h"
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/phi/kernels/funcs/onednn/mkldnn_helper.h"
#endif
namespace phi { namespace phi {
std::vector<int> GetAxis(const DataLayout& from, const DataLayout& to) { std::vector<int> GetAxis(const DataLayout& from, const DataLayout& to) {
...@@ -46,7 +52,7 @@ void CastDataLayout(const Context& dev_ctx, ...@@ -46,7 +52,7 @@ void CastDataLayout(const Context& dev_ctx,
} }
template <typename Context> template <typename Context>
void TransferLayoutKernel(const Context& dev_ctx, void TransferLayoutGeneral(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
DataLayout dst_layout, DataLayout dst_layout,
DenseTensor* out) { DenseTensor* out) {
...@@ -60,16 +66,110 @@ void TransferLayoutKernel(const Context& dev_ctx, ...@@ -60,16 +66,110 @@ void TransferLayoutKernel(const Context& dev_ctx,
dst_dim[i] = src_dim[axis[i]]; dst_dim[i] = src_dim[axis[i]];
} }
out->ResizeAndAllocate(phi::make_ddim(dst_dim)); out->Resize(phi::make_ddim(dst_dim));
dev_ctx.Alloc(out, x.dtype());
PD_VISIT_ALL_TYPES(x.dtype(), "CastDataLayout", ([&] { PD_VISIT_ALL_TYPES(x.dtype(), "CastDataLayout", ([&] {
CastDataLayout<data_t, Context>(dev_ctx, x, axis, out); CastDataLayout<data_t, Context>(dev_ctx, x, axis, out);
})); }));
} }
#ifdef PADDLE_WITH_MKLDNN
template <typename Context>
void TransferLayoutMKLDNN(const Context& dev_ctx,
const DenseTensor& x,
DataLayout src_layout,
DataLayout dst_layout,
DenseTensor* out) {
auto print_tensor_meta = [](const DenseTensor& x) {
std::ostringstream oss;
oss << "[";
oss << "layout:" << x.layout() << " ,";
oss << "dims:" << x.dims() << " ,";
if (x.IsInitialized()) oss << "place:" << x.place();
oss << "]";
return oss.str();
};
VLOG(10) << " x: " << print_tensor_meta(x);
VLOG(10) << " out: " << print_tensor_meta(*out) << " " << out;
// NOTE(zhiqiu): to handle the special case in ApplyDataTransform() in
// data_transfer.cc
if (!x.IsInitialized() && src_layout == DataLayout::MKLDNN &&
dst_layout == DataLayout::NHWC) {
VLOG(4) << src_layout << "->" << dst_layout << " " << x.layout();
out->Resize(x.dims());
out->set_layout(dst_layout);
funcs::MatchShapeToLayout(out, src_layout, dst_layout);
return;
}
if (src_layout != DataLayout::MKLDNN && dst_layout == DataLayout::MKLDNN) {
// Case1 - transform from Non-MKLDNN OPKernel to MKLDNN OPKernel
// Just set layout/format. No real transform occur
auto out_format = funcs::MKLDNNFormatForSize(
x.dims().size(), funcs::ToMKLDNNFormat(src_layout));
out->ShareDataWith(x);
// For NHWC data we need reshape of tensors as MKL-DNN
// is expecting NHWC dims description order
if (src_layout == DataLayout::NHWC) {
VLOG(4) << "NHWC";
funcs::MatchShapeToLayout(out, src_layout, dst_layout);
OneDNNContext::tls().set_cur_paddle_data_layout(src_layout);
}
out->set_layout(DataLayout::MKLDNN);
out->set_format(out_format);
} else if (src_layout == DataLayout::MKLDNN &&
dst_layout != DataLayout::MKLDNN) {
// Case2 - transfrom from MKLDNN OPKernel to Non-MKLDNN OPKernel
// Do transform via MKLDNN lib
funcs::innerTransDataLayoutFromMKLDNN(
src_layout, dst_layout, x, out, dev_ctx.GetPlace());
} else if (src_layout == DataLayout::MKLDNN &&
dst_layout == DataLayout::MKLDNN) {
PADDLE_ENFORCE_NE(
src_layout,
dst_layout,
errors::PreconditionNotMet(
"No layout transform needed between two MKLDNN OPKernels."));
} else {
TransferLayoutGeneral<Context>(dev_ctx, x, dst_layout, out);
}
}
#endif
template <typename Context>
void TransferLayoutKernel(const Context& dev_ctx,
const DenseTensor& x,
int src_layout,
int dst_layout,
DenseTensor* out) {
PADDLE_ENFORCE_NE(src_layout,
dst_layout,
errors::PreconditionNotMet(
"No layout transform needed between same layout."));
VLOG(10) << "TransDataLayout from " << static_cast<DataLayout>(src_layout)
<< " -> " << static_cast<DataLayout>(dst_layout);
#ifdef PADDLE_WITH_MKLDNN
TransferLayoutMKLDNN<Context>(dev_ctx,
x,
static_cast<DataLayout>(src_layout),
static_cast<DataLayout>(dst_layout),
out);
#else
TransferLayoutGeneral<Context>(
dev_ctx, x, static_cast<DataLayout>(dst_layout), out);
#endif
}
} // namespace phi } // namespace phi
PD_REGISTER_GENERAL_KERNEL(phi_transfer_layout, PD_REGISTER_GENERAL_KERNEL(transfer_layout,
CPU, CPU,
ALL_LAYOUT, ALL_LAYOUT,
phi::TransferLayoutKernel<phi::CPUContext>, phi::TransferLayoutKernel<phi::CPUContext>,
......
...@@ -23,7 +23,8 @@ namespace phi { ...@@ -23,7 +23,8 @@ namespace phi {
template <typename Context> template <typename Context>
void TransferLayoutKernel(const Context& dev_ctx, void TransferLayoutKernel(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
DataLayout dst_layout, int src_layout,
int dst_layout,
DenseTensor* out); DenseTensor* out);
template <typename Context> template <typename Context>
...@@ -32,7 +33,11 @@ DenseTensor TransferLayout(const Context& dev_ctx, ...@@ -32,7 +33,11 @@ DenseTensor TransferLayout(const Context& dev_ctx,
DataLayout dst_layout) { DataLayout dst_layout) {
phi::DenseTensor dense_out = phi::DenseTensor dense_out =
phi::Empty(dev_ctx, {x.dtype(), x.dims(), dst_layout}); phi::Empty(dev_ctx, {x.dtype(), x.dims(), dst_layout});
TransferLayoutKernel<Context>(dev_ctx, x, dst_layout, &dense_out); TransferLayoutKernel<Context>(dev_ctx,
x,
static_cast<int>(x.layout()),
static_cast<int>(dst_layout),
&dense_out);
return dense_out; return dense_out;
} }
......
...@@ -134,3 +134,8 @@ cc_test( ...@@ -134,3 +134,8 @@ cc_test(
test_memcpy_dev_api test_memcpy_dev_api
SRCS test_memcpy_dev_api.cc SRCS test_memcpy_dev_api.cc
DEPS phi phi_api_utils) DEPS phi phi_api_utils)
cc_test(
test_transfer_layout_dev_api
SRCS test_transfer_layout_dev_api.cc
DEPS phi phi_api_utils)
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gtest/gtest.h>
#include <memory>
#include "paddle/phi/api/lib/utils/allocator.h"
#include "paddle/phi/backends/all_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/common/place.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/device_context.h"
#include "paddle/phi/infermeta/unary.h"
#include "paddle/phi/kernels/transfer_layout_kernel.h"
namespace phi {
namespace tests {
#ifdef PADDLE_WITH_MKLDNN
TEST(DEV_API, transfer_layout) {
// 1. create tensor
const int n = 2;
const int c = 3;
const int h = 4;
const int w = 5;
DenseTensor x;
MetaTensor meta_x(&x);
meta_x.set_dtype(DataType::FLOAT32);
meta_x.set_layout(DataLayout::MKLDNN);
meta_x.set_dims(make_ddim({n, c, h, w}));
DenseTensor out;
// 2. test API
auto& pool = phi::DeviceContextPool::Instance();
auto place = phi::CPUPlace();
auto* dev_ctx = static_cast<const phi::CPUContext*>(pool.GetByPlace(place));
MetaTensor meta_out(&out);
TransferLayoutInferMeta(x,
static_cast<int>(x.layout()),
static_cast<int>(DataLayout::NHWC),
&meta_out);
TransferLayoutKernel<CPUContext>(*dev_ctx,
x,
static_cast<int>(x.layout()),
static_cast<int>(DataLayout::NHWC),
&out);
// 3. check result
std::vector<int64_t> expect_shape = {12, 3};
ASSERT_EQ(out.dims(), make_ddim({n, h, w, c}));
ASSERT_EQ(out.dims().size(), 4);
ASSERT_EQ(out.meta().dtype, DataType::FLOAT32);
ASSERT_EQ(out.meta().layout, DataLayout::NHWC);
}
#endif
} // namespace tests
} // namespace phi
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册