未验证 提交 c923e6c9 编写于 作者: C Chen Weihang 提交者: GitHub

Adapting device-specific Extra Attributes for the PHI kernel (#46342)

* add extra attr property set

* add type_info for all context

* add onednn context to all context

* fix context compile error

* simplify conv kernel args

* pass runtime attr into dev_ctx

* fix marco error

* clear conv_grad_kernel extra args

* merge conv_grad_grad into conv_grad

* clear conv2d_grad_grad extra attrs

* clear yaml and eager extra attr

* fix conv1d error

* change to thread local

* fix npu compile failed

* try to fix windows compile failed

* add conv2d onednn phi kernel

* fix ci bugs (#36)

* fix compile bugs (#38)

* fix extra input transform bug (#39)

* support dynamic created attr (#40)

* reset extra info gen code

* rm conv_grad_grad kernel

* reimpl pass attr adapting

* add int attr support

* remove vector inputnames creating

* fix map at error

* Update paddle/phi/kernels/onednn/conv_grad_kernel.cc
Co-authored-by: NSławomir Siwek <slawomir.siwek@intel.com>

* remove useless extra attrs

* replace mkldnn_engine by onednn_engine
Co-authored-by: NYuanRisheng <yuanrisheng@baidu.com>
Co-authored-by: NSławomir Siwek <slawomir.siwek@intel.com>
上级 f82d7e3c
...@@ -24,10 +24,7 @@ paddle::experimental::Tensor conv2d_ad_func( ...@@ -24,10 +24,7 @@ paddle::experimental::Tensor conv2d_ad_func(
const paddle::experimental::Tensor& filter, const paddle::experimental::Tensor& filter,
std::vector<int> strides, std::vector<int> strides,
std::vector<int> paddings, std::vector<int> paddings,
std::string paddding_algorithm, std::string padding_algorithm,
int groups,
std::vector<int> dilations, std::vector<int> dilations,
std::string data_format, int groups,
bool use_addto, std::string data_format);
int workspace_size_MB,
bool exhaustive_search);
...@@ -29,13 +29,10 @@ paddle::experimental::Tensor conv2d_ad_func( ...@@ -29,13 +29,10 @@ paddle::experimental::Tensor conv2d_ad_func(
const paddle::experimental::Tensor& filter, const paddle::experimental::Tensor& filter,
std::vector<int> strides, std::vector<int> strides,
std::vector<int> paddings, std::vector<int> paddings,
std::string paddding_algorithm, std::string padding_algorithm,
int groups,
std::vector<int> dilations, std::vector<int> dilations,
std::string data_format, int groups,
bool use_addto, std::string data_format) {
int workspace_size_MB,
bool exhaustive_search) {
// Dygraph Record Event // Dygraph Record Event
paddle::platform::RecordEvent dygraph_entrance_record_event( paddle::platform::RecordEvent dygraph_entrance_record_event(
"conv2d dygraph", paddle::platform::TracerEventType::Operator, 1); "conv2d dygraph", paddle::platform::TracerEventType::Operator, 1);
...@@ -64,13 +61,10 @@ paddle::experimental::Tensor conv2d_ad_func( ...@@ -64,13 +61,10 @@ paddle::experimental::Tensor conv2d_ad_func(
new_filter, new_filter,
strides, strides,
paddings, paddings,
paddding_algorithm, padding_algorithm,
groups,
dilations, dilations,
data_format, groups,
use_addto, data_format);
workspace_size_MB,
exhaustive_search);
} }
} }
...@@ -92,13 +86,10 @@ paddle::experimental::Tensor conv2d_ad_func( ...@@ -92,13 +86,10 @@ paddle::experimental::Tensor conv2d_ad_func(
filter, filter,
strides, strides,
paddings, paddings,
paddding_algorithm, padding_algorithm,
groups,
dilations, dilations,
data_format, groups,
use_addto, data_format);
workspace_size_MB,
exhaustive_search);
transformer->SetOutTensorLayout(&out); transformer->SetOutTensorLayout(&out);
if (need_tune) { if (need_tune) {
egr::Controller::Instance().EnableLayoutAutoTune(); egr::Controller::Instance().EnableLayoutAutoTune();
...@@ -119,13 +110,10 @@ paddle::experimental::Tensor conv2d_ad_func( ...@@ -119,13 +110,10 @@ paddle::experimental::Tensor conv2d_ad_func(
filter, filter,
strides, strides,
paddings, paddings,
paddding_algorithm, padding_algorithm,
groups,
dilations, dilations,
data_format, groups,
use_addto, data_format);
workspace_size_MB,
exhaustive_search);
// Check NaN and Inf if needed // Check NaN and Inf if needed
if (FLAGS_check_nan_inf) { if (FLAGS_check_nan_inf) {
egr::CheckTensorHasNanOrInf("conv2d", api_result); egr::CheckTensorHasNanOrInf("conv2d", api_result);
...@@ -157,13 +145,10 @@ paddle::experimental::Tensor conv2d_ad_func( ...@@ -157,13 +145,10 @@ paddle::experimental::Tensor conv2d_ad_func(
// SetAttributes if needed // SetAttributes if needed
grad_node->SetAttributestrides(strides); grad_node->SetAttributestrides(strides);
grad_node->SetAttributepaddings(paddings); grad_node->SetAttributepaddings(paddings);
grad_node->SetAttributepaddding_algorithm(paddding_algorithm); grad_node->SetAttributepadding_algorithm(padding_algorithm);
grad_node->SetAttributegroups(groups); grad_node->SetAttributegroups(groups);
grad_node->SetAttributedilations(dilations); grad_node->SetAttributedilations(dilations);
grad_node->SetAttributedata_format(data_format); grad_node->SetAttributedata_format(data_format);
grad_node->SetAttributeuse_addto(use_addto);
grad_node->SetAttributeworkspace_size_MB(workspace_size_MB);
grad_node->SetAttributeexhaustive_search(exhaustive_search);
// Set TensorWrappers for Forward Inputs if needed // Set TensorWrappers for Forward Inputs if needed
grad_node->SetTensorWrapperinput(input); grad_node->SetTensorWrapperinput(input);
grad_node->SetTensorWrapperfilter(filter); grad_node->SetTensorWrapperfilter(filter);
......
...@@ -46,13 +46,10 @@ Conv2dGradNodeFinal::operator()( ...@@ -46,13 +46,10 @@ Conv2dGradNodeFinal::operator()(
auto& grad_out = hooked_grads[0][0]; auto& grad_out = hooked_grads[0][0];
auto& strides = this->strides_; auto& strides = this->strides_;
auto& paddings = this->paddings_; auto& paddings = this->paddings_;
auto& paddding_algorithm = this->paddding_algorithm_; auto& padding_algorithm = this->padding_algorithm_;
auto& groups = this->groups_; auto& groups = this->groups_;
auto& dilations = this->dilations_; auto& dilations = this->dilations_;
auto& data_format = this->data_format_; auto& data_format = this->data_format_;
auto& use_addto = this->use_addto_;
auto& workspace_size_MB = this->workspace_size_MB_;
auto& exhaustive_search = this->exhaustive_search_;
// Prepare Grad function call // Prepare Grad function call
const auto& out_metas = OutputMeta(); const auto& out_metas = OutputMeta();
...@@ -87,13 +84,10 @@ Conv2dGradNodeFinal::operator()( ...@@ -87,13 +84,10 @@ Conv2dGradNodeFinal::operator()(
grad_out, grad_out,
strides, strides,
paddings, paddings,
paddding_algorithm, padding_algorithm,
groups,
dilations, dilations,
groups,
data_format, data_format,
use_addto,
workspace_size_MB,
exhaustive_search,
api_output_0, api_output_0,
api_output_1); api_output_1);
// Check NaN and Inf id needed // Check NaN and Inf id needed
...@@ -134,13 +128,10 @@ Conv2dGradNodeFinal::operator()( ...@@ -134,13 +128,10 @@ Conv2dGradNodeFinal::operator()(
// SetAttributes if needed // SetAttributes if needed
grad_node->SetAttributestrides(strides); grad_node->SetAttributestrides(strides);
grad_node->SetAttributepaddings(paddings); grad_node->SetAttributepaddings(paddings);
grad_node->SetAttributepaddding_algorithm(paddding_algorithm); grad_node->SetAttributepadding_algorithm(padding_algorithm);
grad_node->SetAttributegroups(groups); grad_node->SetAttributegroups(groups);
grad_node->SetAttributedilations(dilations); grad_node->SetAttributedilations(dilations);
grad_node->SetAttributedata_format(data_format); grad_node->SetAttributedata_format(data_format);
grad_node->SetAttributeuse_addto(use_addto);
grad_node->SetAttributeworkspace_size_MB(workspace_size_MB);
grad_node->SetAttributeexhaustive_search(exhaustive_search);
// Set TensorWrappers for Forward Inputs if needed // Set TensorWrappers for Forward Inputs if needed
grad_node->SetTensorWrapperinput(input); grad_node->SetTensorWrapperinput(input);
grad_node->SetTensorWrapperfilter(filter); grad_node->SetTensorWrapperfilter(filter);
...@@ -215,13 +206,10 @@ Conv2dDoubleGradNodeFinal::operator()( ...@@ -215,13 +206,10 @@ Conv2dDoubleGradNodeFinal::operator()(
auto& strides = this->strides_; auto& strides = this->strides_;
auto& paddings = this->paddings_; auto& paddings = this->paddings_;
auto& paddding_algorithm = this->paddding_algorithm_; auto& padding_algorithm = this->padding_algorithm_;
auto& groups = this->groups_; auto& groups = this->groups_;
auto& dilations = this->dilations_; auto& dilations = this->dilations_;
auto& data_format = this->data_format_; auto& data_format = this->data_format_;
auto& use_addto = this->use_addto_;
auto& workspace_size_MB = this->workspace_size_MB_;
auto& exhaustive_search = this->exhaustive_search_;
// Prepare Grad function call // Prepare Grad function call
const auto& out_metas = OutputMeta(); const auto& out_metas = OutputMeta();
...@@ -261,13 +249,10 @@ Conv2dDoubleGradNodeFinal::operator()( ...@@ -261,13 +249,10 @@ Conv2dDoubleGradNodeFinal::operator()(
grad_filter_grad_optional, grad_filter_grad_optional,
strides, strides,
paddings, paddings,
paddding_algorithm, padding_algorithm,
groups,
dilations, dilations,
groups,
data_format, data_format,
use_addto,
workspace_size_MB,
exhaustive_search,
api_output_0, api_output_0,
api_output_1, api_output_1,
api_output_2); api_output_2);
......
...@@ -63,8 +63,8 @@ class Conv2dGradNodeFinal : public egr::GradNodeBase { ...@@ -63,8 +63,8 @@ class Conv2dGradNodeFinal : public egr::GradNodeBase {
void SetAttributepaddings(const std::vector<int>& paddings) { void SetAttributepaddings(const std::vector<int>& paddings) {
paddings_ = paddings; paddings_ = paddings;
} }
void SetAttributepaddding_algorithm(const std::string& paddding_algorithm) { void SetAttributepadding_algorithm(const std::string& padding_algorithm) {
paddding_algorithm_ = paddding_algorithm; padding_algorithm_ = padding_algorithm;
} }
void SetAttributegroups(const int& groups) { groups_ = groups; } void SetAttributegroups(const int& groups) { groups_ = groups; }
void SetAttributedilations(const std::vector<int>& dilations) { void SetAttributedilations(const std::vector<int>& dilations) {
...@@ -73,13 +73,6 @@ class Conv2dGradNodeFinal : public egr::GradNodeBase { ...@@ -73,13 +73,6 @@ class Conv2dGradNodeFinal : public egr::GradNodeBase {
void SetAttributedata_format(const std::string& data_format) { void SetAttributedata_format(const std::string& data_format) {
data_format_ = data_format; data_format_ = data_format;
} }
void SetAttributeuse_addto(const bool& use_addto) { use_addto_ = use_addto; }
void SetAttributeworkspace_size_MB(const int& workspace_size_MB) {
workspace_size_MB_ = workspace_size_MB;
}
void SetAttributeexhaustive_search(const bool& exhaustive_search) {
exhaustive_search_ = exhaustive_search;
}
private: private:
// TensorWrappers // TensorWrappers
...@@ -89,13 +82,10 @@ class Conv2dGradNodeFinal : public egr::GradNodeBase { ...@@ -89,13 +82,10 @@ class Conv2dGradNodeFinal : public egr::GradNodeBase {
// Attributes // Attributes
std::vector<int> strides_; std::vector<int> strides_;
std::vector<int> paddings_; std::vector<int> paddings_;
std::string paddding_algorithm_; std::string padding_algorithm_;
int groups_; int groups_;
std::vector<int> dilations_; std::vector<int> dilations_;
std::string data_format_; std::string data_format_;
bool use_addto_;
int workspace_size_MB_;
bool exhaustive_search_;
}; };
class Conv2dDoubleGradNodeFinal : public egr::GradNodeBase { class Conv2dDoubleGradNodeFinal : public egr::GradNodeBase {
...@@ -146,8 +136,8 @@ class Conv2dDoubleGradNodeFinal : public egr::GradNodeBase { ...@@ -146,8 +136,8 @@ class Conv2dDoubleGradNodeFinal : public egr::GradNodeBase {
void SetAttributepaddings(const std::vector<int>& paddings) { void SetAttributepaddings(const std::vector<int>& paddings) {
paddings_ = paddings; paddings_ = paddings;
} }
void SetAttributepaddding_algorithm(const std::string& paddding_algorithm) { void SetAttributepadding_algorithm(const std::string& padding_algorithm) {
paddding_algorithm_ = paddding_algorithm; padding_algorithm_ = padding_algorithm;
} }
void SetAttributegroups(const int& groups) { groups_ = groups; } void SetAttributegroups(const int& groups) { groups_ = groups; }
void SetAttributedilations(const std::vector<int>& dilations) { void SetAttributedilations(const std::vector<int>& dilations) {
...@@ -156,13 +146,6 @@ class Conv2dDoubleGradNodeFinal : public egr::GradNodeBase { ...@@ -156,13 +146,6 @@ class Conv2dDoubleGradNodeFinal : public egr::GradNodeBase {
void SetAttributedata_format(const std::string& data_format) { void SetAttributedata_format(const std::string& data_format) {
data_format_ = data_format; data_format_ = data_format;
} }
void SetAttributeuse_addto(const bool& use_addto) { use_addto_ = use_addto; }
void SetAttributeworkspace_size_MB(const int& workspace_size_MB) {
workspace_size_MB_ = workspace_size_MB;
}
void SetAttributeexhaustive_search(const bool& exhaustive_search) {
exhaustive_search_ = exhaustive_search;
}
private: private:
// TensorWrappers // TensorWrappers
...@@ -173,13 +156,10 @@ class Conv2dDoubleGradNodeFinal : public egr::GradNodeBase { ...@@ -173,13 +156,10 @@ class Conv2dDoubleGradNodeFinal : public egr::GradNodeBase {
// Attributes // Attributes
std::vector<int> strides_; std::vector<int> strides_;
std::vector<int> paddings_; std::vector<int> paddings_;
std::string paddding_algorithm_; std::string padding_algorithm_;
int groups_; int groups_;
std::vector<int> dilations_; std::vector<int> dilations_;
std::string data_format_; std::string data_format_;
bool use_addto_;
int workspace_size_MB_;
bool exhaustive_search_;
}; };
class AddNGradNodeFinal : public egr::GradNodeBase { class AddNGradNodeFinal : public egr::GradNodeBase {
......
...@@ -32,8 +32,8 @@ ...@@ -32,8 +32,8 @@
#include <valarray> #include <valarray>
#include <vector> #include <vector>
#include "paddle/fluid/framework/expect.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/phi/core/expect.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -30,7 +30,7 @@ ...@@ -30,7 +30,7 @@
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "paddle/fluid/framework/expect.h" #include "paddle/phi/core/expect.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -28,6 +28,7 @@ limitations under the License. */ ...@@ -28,6 +28,7 @@ limitations under the License. */
#include "paddle/fluid/framework/unused_var_check.h" #include "paddle/fluid/framework/unused_var_check.h"
#include "paddle/fluid/framework/var_type.h" #include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/operators/isfinite_op.h" #include "paddle/fluid/operators/isfinite_op.h"
#include "paddle/fluid/operators/ops_extra_info.h"
#include "paddle/fluid/platform/device/device_wrapper.h" #include "paddle/fluid/platform/device/device_wrapper.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
...@@ -2269,7 +2270,8 @@ Scope* OperatorWithKernel::PrepareData( ...@@ -2269,7 +2270,8 @@ Scope* OperatorWithKernel::PrepareData(
} }
std::unique_ptr<OpKernelType> new_expected_kernel_key = nullptr; std::unique_ptr<OpKernelType> new_expected_kernel_key = nullptr;
if (run_phi_kernel_ && in_def->backend != phi::Backend::ALL_BACKEND) { if (run_phi_kernel_ && in_def != nullptr &&
in_def->backend != phi::Backend::ALL_BACKEND) {
auto tensor_backend = phi::TransToPhiBackend(tensor_in->place()); auto tensor_backend = phi::TransToPhiBackend(tensor_in->place());
if ((in_def->backend != tensor_backend && if ((in_def->backend != tensor_backend &&
(in_def->backend != phi::Backend::GPUDNN || (in_def->backend != phi::Backend::GPUDNN ||
...@@ -2388,7 +2390,6 @@ Scope* OperatorWithKernel::PrepareData( ...@@ -2388,7 +2390,6 @@ Scope* OperatorWithKernel::PrepareData(
input_names.size(), input_names.size(),
input_defs.size())); input_defs.size()));
for (size_t i = 0; i < input_defs.size(); ++i) { for (size_t i = 0; i < input_defs.size(); ++i) {
const auto& input_defs = phi_kernel_->args_def().input_defs();
auto& in_def = input_defs.at(i); auto& in_def = input_defs.at(i);
std::string input_name = input_names[i]; std::string input_name = input_names[i];
auto iter = ctx->inputs.find(input_name); auto iter = ctx->inputs.find(input_name);
...@@ -2400,6 +2401,22 @@ Scope* OperatorWithKernel::PrepareData( ...@@ -2400,6 +2401,22 @@ Scope* OperatorWithKernel::PrepareData(
no_buffer_ins && no_buffer_ins->count(input_name) > 0; no_buffer_ins && no_buffer_ins->count(input_name) > 0;
prepare_input_data(input_name, &ins_vector, &in_def, should_skip_input); prepare_input_data(input_name, &ins_vector, &in_def, should_skip_input);
} }
#ifdef PADDLE_WITH_MKLDNN
// For input that is Extra, only MKLDNN will use Extra Inputs
auto& extra_input_names =
paddle::operators::ExtraInfoUtils::Instance().GetExtraInputNamesMap(
Type());
for (const auto& input_name : extra_input_names) {
auto iter = ctx->inputs.find(input_name);
if (iter == ctx->inputs.end()) {
continue;
}
bool should_skip_input =
no_buffer_ins && no_buffer_ins->count(input_name) > 0;
std::vector<Variable*>& input_vars = iter->second;
prepare_input_data(input_name, &input_vars, nullptr, should_skip_input);
}
#endif
} else { } else {
for (auto& var_name_item : Inputs()) { for (auto& var_name_item : Inputs()) {
bool should_skip_input = bool should_skip_input =
...@@ -2699,6 +2716,65 @@ phi::KernelSignature OperatorWithKernel::GetExpectedPhiKernelArgs( ...@@ -2699,6 +2716,65 @@ phi::KernelSignature OperatorWithKernel::GetExpectedPhiKernelArgs(
return (*arg_map_fn_)(arg_mapping_ctx); return (*arg_map_fn_)(arg_mapping_ctx);
} }
static void SetDnnAttrIntoDeviceContext(
phi::DeviceContext* dev_ctx,
const Attribute& attr,
const std::string& attr_name,
const operators::ExtraAttrPropertySet& attr_propertys) {
#ifdef PADDLE_WITH_MKLDNN
if (phi::OneDNNContext::classof(dev_ctx) &&
attr_propertys.Support(operators::ExtraAttrProperty::ONEDNN)) {
VLOG(4) << "Runtime attr `" << attr_name << "` is passed to OneDNNContext.";
phi::OneDNNContext* one_dnn_ctx = static_cast<phi::OneDNNContext*>(dev_ctx);
switch (AttrTypeID(attr)) {
case proto::AttrType::FLOAT:
one_dnn_ctx->SetDnnAttr(attr_name, PADDLE_GET_CONST(float, attr));
break;
case proto::AttrType::INT:
one_dnn_ctx->SetDnnAttr(attr_name, PADDLE_GET_CONST(int, attr));
break;
case proto::AttrType::STRING:
one_dnn_ctx->SetDnnAttr(attr_name, PADDLE_GET_CONST(std::string, attr));
break;
case proto::AttrType::INTS:
one_dnn_ctx->SetDnnAttr(attr_name,
PADDLE_GET_CONST(std::vector<int>, attr));
break;
case proto::AttrType::FLOATS:
one_dnn_ctx->SetDnnAttr(attr_name,
PADDLE_GET_CONST(std::vector<float>, attr));
break;
case proto::AttrType::BOOLEAN:
one_dnn_ctx->SetDnnAttr(attr_name, PADDLE_GET_CONST(bool, attr));
break;
default:
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported Attribute value type `%s` for phi.",
platform::demangle(attr.type().name())));
}
}
#endif
#ifdef PADDLE_WITH_CUDA
if (phi::GPUContext::classof(dev_ctx) &&
attr_propertys.Support(operators::ExtraAttrProperty::GPUDNN)) {
VLOG(4) << "Runtime attr `" << attr_name << "` is passed to GPUDNNContext.";
phi::GPUContext* gpu_dnn_ctx = static_cast<phi::GPUContext*>(dev_ctx);
switch (AttrTypeID(attr)) {
case proto::AttrType::INT:
gpu_dnn_ctx->SetDnnAttr(attr_name, PADDLE_GET_CONST(int, attr));
break;
case proto::AttrType::BOOLEAN:
gpu_dnn_ctx->SetDnnAttr(attr_name, PADDLE_GET_CONST(bool, attr));
break;
default:
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported Attribute value type `%s` for phi.",
platform::demangle(attr.type().name())));
}
}
#endif
}
void OperatorWithKernel::BuildPhiKernelContext( void OperatorWithKernel::BuildPhiKernelContext(
const RuntimeContext& ctx, const RuntimeContext& ctx,
platform::DeviceContext* dev_ctx, platform::DeviceContext* dev_ctx,
...@@ -2713,6 +2789,15 @@ void OperatorWithKernel::BuildPhiKernelContext( ...@@ -2713,6 +2789,15 @@ void OperatorWithKernel::BuildPhiKernelContext(
auto attr_defs = phi_kernel_->args_def().attribute_defs(); auto attr_defs = phi_kernel_->args_def().attribute_defs();
auto output_defs = phi_kernel_->args_def().output_defs(); auto output_defs = phi_kernel_->args_def().output_defs();
#if defined(PADDLE_WITH_MKLDNN)
if (phi::OneDNNContext::classof(dev_ctx)) {
// Onednn holds this op's variable's name and init them here.
phi::OneDNNContext* one_dnn_ctx = static_cast<phi::OneDNNContext*>(dev_ctx);
one_dnn_ctx->SetInputsName(Inputs());
one_dnn_ctx->SetOutputsName(Outputs());
}
#endif
PADDLE_ENFORCE_EQ(input_names.size(), PADDLE_ENFORCE_EQ(input_names.size(),
input_defs.size(), input_defs.size(),
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
...@@ -2992,6 +3077,7 @@ void OperatorWithKernel::BuildPhiKernelContext( ...@@ -2992,6 +3077,7 @@ void OperatorWithKernel::BuildPhiKernelContext(
} break; } break;
default: { default: {
if (attr_iter == Attrs().end()) { if (attr_iter == Attrs().end()) {
// TODO(chenweihang): remove this backup searching later
attr_iter = RuntimeAttrs().find(attr_names[i]); attr_iter = RuntimeAttrs().find(attr_names[i]);
PADDLE_ENFORCE_NE(attr_iter, PADDLE_ENFORCE_NE(attr_iter,
RuntimeAttrs().end(), RuntimeAttrs().end(),
...@@ -3075,6 +3161,63 @@ void OperatorWithKernel::BuildPhiKernelContext( ...@@ -3075,6 +3161,63 @@ void OperatorWithKernel::BuildPhiKernelContext(
} }
} }
VLOG(4) << "Done attributes"; VLOG(4) << "Done attributes";
// For compatible with Op with extra attrs for specific backend
#if defined(PADDLE_WITH_MKLDNN) || defined(PADDLE_WITH_CUDA)
auto& runtime_attrs = RuntimeAttrs();
for (const auto& attr_iter : runtime_attrs) {
auto& attr_name = attr_iter.first;
auto& attr = attr_iter.second;
auto attr_propertys = paddle::operators::GetExtraAttrPropertys(attr_name);
SetDnnAttrIntoDeviceContext(dev_ctx, attr, attr_name, attr_propertys);
}
// TODO(chenweihang): Since the pass will still `SetAttr` in the OpDesc,
// we try to add these Attrs to the RuntimeAttrs, but these OpDesc will lose
// the RuntimeAttrs information in the process of converting the Graph to
// the Program, so additional record configuration will be introduced,
// which increases the The cost of development and understanding, so we
// still use Attrs to get and the attributes set by these passes from Attrs
// for the time being. In the future, it is necessary to clarify the
// positioning of RuntimeAttrs and expand related functions.
auto& attrs = Attrs();
for (const auto& attr_iter : attrs) {
auto& attr_name = attr_iter.first;
auto& attr = attr_iter.second;
auto attr_propertys = paddle::operators::GetExtraAttrPropertys(attr_name);
SetDnnAttrIntoDeviceContext(dev_ctx, attr, attr_name, attr_propertys);
}
VLOG(4) << "Done runtime attributes";
#endif
// For compatible with Op with extra input for onednn backend
#ifdef PADDLE_WITH_MKLDNN
if (phi::OneDNNContext::classof(dev_ctx)) {
phi::OneDNNContext* one_dnn_ctx = static_cast<phi::OneDNNContext*>(dev_ctx);
auto& extra_input_names =
paddle::operators::ExtraInfoUtils::Instance().GetExtraInputNamesMap(
Type());
for (const auto& input_name : extra_input_names) {
auto it = ctx.inputs.find(input_name);
if (it == ctx.inputs.end() || it->second.size() == 0) {
one_dnn_ctx->SetDnnInput(input_name, nullptr);
} else {
auto ins_vector = it->second;
PADDLE_ENFORCE_EQ(
ins_vector.size(),
1UL,
phi::errors::InvalidArgument(
"OneDNN's extra input only allows one input tensor."));
auto* var = ins_vector[0];
PADDLE_ENFORCE_EQ(var->IsType<phi::DenseTensor>(),
true,
phi::errors::InvalidArgument(
"OneDNN's extra input only can be DenseTensor."));
one_dnn_ctx->SetDnnInput(input_name, &(var->Get<phi::DenseTensor>()));
}
}
}
VLOG(4) << "Done runtime extra inputs";
#endif
} }
} // namespace framework } // namespace framework
......
...@@ -13,9 +13,9 @@ See the License for the specific language governing permissions and ...@@ -13,9 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/expect.h"
#include "paddle/fluid/operators/fused/fusion_gru_op.h" #include "paddle/fluid/operators/fused/fusion_gru_op.h"
#include "paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h" #include "paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h"
#include "paddle/phi/core/expect.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -13,9 +13,9 @@ See the License for the specific language governing permissions and ...@@ -13,9 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/expect.h"
#include "paddle/fluid/operators/fused/fusion_lstm_op.h" #include "paddle/fluid/operators/fused/fusion_lstm_op.h"
#include "paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h" #include "paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h"
#include "paddle/phi/core/expect.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -14,11 +14,11 @@ ...@@ -14,11 +14,11 @@
#include <tuple> #include <tuple>
#include "paddle/fluid/framework/expect.h"
#include "paddle/fluid/operators/conv_op.h" #include "paddle/fluid/operators/conv_op.h"
#include "paddle/fluid/platform/cpu_info.h" #include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/platform/mkldnn_helper.h" #include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/platform/mkldnn_reuse.h" #include "paddle/fluid/platform/mkldnn_reuse.h"
#include "paddle/phi/core/expect.h"
#include "paddle/phi/core/visit_type.h" #include "paddle/phi/core/visit_type.h"
...@@ -1184,20 +1184,6 @@ class ConvMKLDNNGradOpKernel : public framework::OpKernel<T> { ...@@ -1184,20 +1184,6 @@ class ConvMKLDNNGradOpKernel : public framework::OpKernel<T> {
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_KERNEL(conv2d,
MKLDNN,
::paddle::platform::CPUPlace,
ops::ConvMKLDNNOpKernel<float>,
ops::ConvMKLDNNOpKernel<paddle::platform::bfloat16>,
ops::ConvMKLDNNOpKernel<uint8_t>,
ops::ConvMKLDNNOpKernel<int8_t>);
REGISTER_OP_KERNEL(conv2d_grad,
MKLDNN,
::paddle::platform::CPUPlace,
ops::ConvMKLDNNGradOpKernel<float>,
ops::ConvMKLDNNGradOpKernel<paddle::platform::bfloat16>);
REGISTER_OP_KERNEL(depthwise_conv2d, REGISTER_OP_KERNEL(depthwise_conv2d,
MKLDNN, MKLDNN,
::paddle::platform::CPUPlace, ::paddle::platform::CPUPlace,
......
...@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/framework/expect.h"
#include "paddle/fluid/platform/mkldnn_reuse.h" #include "paddle/fluid/platform/mkldnn_reuse.h"
#include "paddle/phi/core/expect.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -36,7 +36,7 @@ PD_DECLARE_KERNEL(relu, OneDNN, ONEDNN); ...@@ -36,7 +36,7 @@ PD_DECLARE_KERNEL(relu, OneDNN, ONEDNN);
USE_OP_ITSELF(softmax); USE_OP_ITSELF(softmax);
USE_OP_DEVICE_KERNEL(softmax, MKLDNN); USE_OP_DEVICE_KERNEL(softmax, MKLDNN);
USE_OP_ITSELF(conv2d); USE_OP_ITSELF(conv2d);
USE_OP_DEVICE_KERNEL(conv2d, MKLDNN); PD_DECLARE_KERNEL(conv2d, OneDNN, ONEDNN);
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -14,11 +14,137 @@ ...@@ -14,11 +14,137 @@
#pragma once #pragma once
#include <string>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/framework/attribute.h" #include "paddle/fluid/framework/attribute.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
// This file is to be compatible with the bad design and
// implementation of fluid in the past
// Many operators in fluid have extra attributes, which are generally added
// to implement some specific kernel selection and to meet the specialization
// needs of a specific operation library like mkldnn or cudnn
enum class ExtraAttrProperty : uint8_t {
// The attributes that are no longer used by any scene
DEPRECATED = 0,
// The attributes used for framework execution scheduling,
// such as `use_mkldnn`, `use_cudnn`, no need to save
SCHEDULE,
// The attributes for ONEDNN only, can be saved in OneDNNContext
ONEDNN,
// The attributes for ONEDNN only, can be saved in GPUContext
GPUDNN,
// Add necessary properties as needed
};
class ExtraAttrPropertySet final {
public:
constexpr ExtraAttrPropertySet() : bitset_(0) {}
constexpr ExtraAttrPropertySet(ExtraAttrProperty e) // NOLINT
: bitset_(e == ExtraAttrProperty::DEPRECATED
? 0
: 1ULL << (static_cast<uint8_t>(e) - 1)) {}
inline uint64_t bitset() const { return bitset_; }
bool inline Support(ExtraAttrProperty e) const {
// DEPRECATED ExtraAttr always return false
return static_cast<bool>(bitset_ & ExtraAttrPropertySet(e).bitset());
}
bool IsEmpty() const { return bitset_ == 0; }
ExtraAttrPropertySet operator|(const ExtraAttrPropertySet& other) const {
return ExtraAttrPropertySet(bitset_ | other.bitset());
}
ExtraAttrPropertySet operator&(const ExtraAttrPropertySet& other) const {
return ExtraAttrPropertySet(bitset_ & other.bitset());
}
ExtraAttrPropertySet operator-(const ExtraAttrPropertySet& other) const {
return ExtraAttrPropertySet(bitset_ & ~other.bitset());
}
ExtraAttrPropertySet operator^(const ExtraAttrPropertySet& other) const {
return ExtraAttrPropertySet(bitset_ ^ other.bitset());
}
bool operator==(const ExtraAttrPropertySet& other) const {
return bitset_ == other.bitset();
}
private:
constexpr ExtraAttrPropertySet(uint64_t bitset) : bitset_(bitset) {}
uint64_t bitset_;
};
const std::unordered_map<std::string, ExtraAttrPropertySet>
extra_attr_properties = {
// DEPRECATED attributes
{"use_quantizer", ExtraAttrProperty::DEPRECATED},
// SCHEDULE attributes
{"use_cudnn", ExtraAttrProperty::SCHEDULE},
{"use_mkldnn", ExtraAttrProperty::SCHEDULE},
// ONEDNN dedicated attributes
{"Bias", ExtraAttrProperty::ONEDNN},
{"data_format", ExtraAttrProperty::ONEDNN},
{"force_fp32_output", ExtraAttrProperty::ONEDNN},
{"fuse_activation", ExtraAttrProperty::ONEDNN},
{"fuse_activation_type", ExtraAttrProperty::ONEDNN},
{"fuse_activation_alpha", ExtraAttrProperty::ONEDNN},
{"fuse_activation_beta", ExtraAttrProperty::ONEDNN},
{"fuse_activation_scale", ExtraAttrProperty::ONEDNN},
{"fuse_alpha", ExtraAttrProperty::ONEDNN},
{"fuse_beta", ExtraAttrProperty::ONEDNN},
{"fuse_relu", ExtraAttrProperty::ONEDNN},
{"fuse_residual_connection", ExtraAttrProperty::ONEDNN},
{"fuse_with_relu", ExtraAttrProperty::ONEDNN},
{"fused_reshape_Out", ExtraAttrProperty::ONEDNN},
{"fused_transpose_Out", ExtraAttrProperty::ONEDNN},
{"fused_reshape_X", ExtraAttrProperty::ONEDNN},
{"fused_reshape_Y", ExtraAttrProperty::ONEDNN},
{"fused_transpose_X", ExtraAttrProperty::ONEDNN},
{"fused_transpose_Y", ExtraAttrProperty::ONEDNN},
{"mkldnn_data_type", ExtraAttrProperty::ONEDNN},
{"ResidualData", ExtraAttrProperty::ONEDNN},
{"scale_x", ExtraAttrProperty::ONEDNN},
{"scale_y", ExtraAttrProperty::ONEDNN},
{"scale_out", ExtraAttrProperty::ONEDNN},
{"Scale_in", ExtraAttrProperty::ONEDNN},
{"Scale_in_eltwise", ExtraAttrProperty::ONEDNN},
{"Scale_x", ExtraAttrProperty::ONEDNN},
{"Scale_y", ExtraAttrProperty::ONEDNN},
{"Scale_out", ExtraAttrProperty::ONEDNN},
{"Scale_weights", ExtraAttrProperty::ONEDNN},
{"x_data_format", ExtraAttrProperty::ONEDNN},
{"y_data_format", ExtraAttrProperty::ONEDNN},
// ONEDNN pass dedicated attributes
{"Activation_scale", ExtraAttrProperty::ONEDNN},
{"Bias_scales", ExtraAttrProperty::ONEDNN},
{"Output_shift_scale", ExtraAttrProperty::ONEDNN},
{"Sum_scale", ExtraAttrProperty::ONEDNN},
// GPUDNN dedicated attributes
{"exhaustive_search", ExtraAttrProperty::GPUDNN},
{"fuse_relu_before_depthwise_conv", ExtraAttrProperty::GPUDNN},
{"use_addto", ExtraAttrProperty::GPUDNN},
{"workspace_size_MB", ExtraAttrProperty::GPUDNN},
// Mixed-use attributes
{"is_test",
ExtraAttrPropertySet(ExtraAttrProperty::ONEDNN) |
ExtraAttrPropertySet(ExtraAttrProperty::GPUDNN)},
};
inline ExtraAttrPropertySet GetExtraAttrPropertys(
const std::string& attr_name) {
auto iter = extra_attr_properties.find(attr_name);
if (iter != extra_attr_properties.end()) {
return iter->second;
}
return ExtraAttrPropertySet();
}
template <typename T> template <typename T>
struct ExtraAttrChecker { struct ExtraAttrChecker {
ExtraAttrChecker(const std::string& attr_name, T default_value) ExtraAttrChecker(const std::string& attr_name, T default_value)
...@@ -71,6 +197,15 @@ class ExtraInfoUtils { ...@@ -71,6 +197,15 @@ class ExtraInfoUtils {
return empty_extra_attrs_checker_; return empty_extra_attrs_checker_;
} }
const std::vector<std::string>& GetExtraInputNamesMap(
const std::string& op_type) const {
auto iter = g_extra_input_names_map_.find(op_type);
if (iter != g_extra_input_names_map_.end()) {
return iter->second;
}
return empty_extra_input_names_;
}
private: private:
ExtraInfoUtils(); ExtraInfoUtils();
...@@ -83,6 +218,12 @@ class ExtraInfoUtils { ...@@ -83,6 +218,12 @@ class ExtraInfoUtils {
g_extra_attrs_checker_; g_extra_attrs_checker_;
std::vector<std::function<void(framework::AttributeMap*, bool)>> std::vector<std::function<void(framework::AttributeMap*, bool)>>
empty_extra_attrs_checker_{}; empty_extra_attrs_checker_{};
// TODO(chenweihang): move these extra inputs into op_compat.yaml
std::unordered_map<std::string, std::vector<std::string>>
g_extra_input_names_map_ = {{"conv2d", {"Bias", "ResidualData"}},
{"conv2d_grad", {"Bias"}}};
std::vector<std::string> empty_extra_input_names_;
}; };
} // namespace operators } // namespace operators
......
...@@ -89,7 +89,9 @@ class MLUContext { ...@@ -89,7 +89,9 @@ class MLUContext {
DISABLE_COPY_AND_ASSIGN(MLUContext); DISABLE_COPY_AND_ASSIGN(MLUContext);
}; };
class MLUDeviceContext : public DeviceContext { class MLUDeviceContext
: public DeviceContext,
public phi::TypeInfoTraits<DeviceContext, MLUDeviceContext> {
public: public:
explicit MLUDeviceContext(MLUPlace place); explicit MLUDeviceContext(MLUPlace place);
virtual ~MLUDeviceContext(); virtual ~MLUDeviceContext();
...@@ -148,6 +150,8 @@ class MLUDeviceContext : public DeviceContext { ...@@ -148,6 +150,8 @@ class MLUDeviceContext : public DeviceContext {
return thread_ctx_.at(this); return thread_ctx_.at(this);
} }
static const char* name() { return "MLUDeviceContext"; }
private: private:
int compute_capability_; int compute_capability_;
int driver_version_; int driver_version_;
......
...@@ -19,7 +19,6 @@ limitations under the License. */ ...@@ -19,7 +19,6 @@ limitations under the License. */
#include <set> #include <set>
#include "glog/logging.h" #include "glog/logging.h"
#include "paddle/fluid/framework/expect.h"
#include "paddle/fluid/framework/generator.h" #include "paddle/fluid/framework/generator.h"
#include "paddle/fluid/memory/allocation/allocator_facade.h" #include "paddle/fluid/memory/allocation/allocator_facade.h"
#include "paddle/fluid/platform/device/device_wrapper.h" #include "paddle/fluid/platform/device/device_wrapper.h"
...@@ -28,6 +27,7 @@ limitations under the License. */ ...@@ -28,6 +27,7 @@ limitations under the License. */
#include "paddle/fluid/platform/profiler/event_tracing.h" #include "paddle/fluid/platform/profiler/event_tracing.h"
#include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/allocator.h" #include "paddle/phi/core/allocator.h"
#include "paddle/phi/core/expect.h"
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/fluid/memory/allocation/cuda_device_context_allocator.h" #include "paddle/fluid/memory/allocation/cuda_device_context_allocator.h"
......
...@@ -144,7 +144,9 @@ struct DefaultDeviceContextType<platform::CPUPlace> { ...@@ -144,7 +144,9 @@ struct DefaultDeviceContextType<platform::CPUPlace> {
// Graphcore IPU // Graphcore IPU
#ifdef PADDLE_WITH_IPU #ifdef PADDLE_WITH_IPU
class IPUDeviceContext : public DeviceContext { class IPUDeviceContext
: public DeviceContext,
public phi::TypeInfoTraits<DeviceContext, IPUDeviceContext> {
public: public:
IPUDeviceContext() = delete; IPUDeviceContext() = delete;
explicit IPUDeviceContext(IPUPlace place); explicit IPUDeviceContext(IPUPlace place);
...@@ -154,6 +156,8 @@ class IPUDeviceContext : public DeviceContext { ...@@ -154,6 +156,8 @@ class IPUDeviceContext : public DeviceContext {
/*! \brief Wait for all operations completion in the stream. */ /*! \brief Wait for all operations completion in the stream. */
void Wait() const override; void Wait() const override;
static const char* name() { return "IPUDeviceContext"; }
private: private:
IPUPlace place_; IPUPlace place_;
}; };
...@@ -188,7 +192,9 @@ struct DefaultDeviceContextType<platform::XPUPlace> { ...@@ -188,7 +192,9 @@ struct DefaultDeviceContextType<platform::XPUPlace> {
#endif #endif
#ifdef PADDLE_WITH_ASCEND_CL #ifdef PADDLE_WITH_ASCEND_CL
class NPUDeviceContext : public DeviceContext { class NPUDeviceContext
: public DeviceContext,
public phi::TypeInfoTraits<DeviceContext, NPUDeviceContext> {
public: public:
explicit NPUDeviceContext(NPUPlace place); explicit NPUDeviceContext(NPUPlace place);
virtual ~NPUDeviceContext(); virtual ~NPUDeviceContext();
...@@ -224,6 +230,8 @@ class NPUDeviceContext : public DeviceContext { ...@@ -224,6 +230,8 @@ class NPUDeviceContext : public DeviceContext {
// void WaitStreamCallback() const { return stream_->WaitCallback(); } // void WaitStreamCallback() const { return stream_->WaitCallback(); }
static const char* name() { return "NPUDeviceContext"; }
private: private:
NPUPlace place_; NPUPlace place_;
aclrtContext context_; aclrtContext context_;
...@@ -248,7 +256,9 @@ struct DefaultDeviceContextType<platform::NPUPlace> { ...@@ -248,7 +256,9 @@ struct DefaultDeviceContextType<platform::NPUPlace> {
}; };
// Currently, NPUPinnedDeviceContext is only used to data copying. // Currently, NPUPinnedDeviceContext is only used to data copying.
class NPUPinnedDeviceContext : public DeviceContext { class NPUPinnedDeviceContext
: public DeviceContext,
public phi::TypeInfoTraits<DeviceContext, NPUPinnedDeviceContext> {
public: public:
NPUPinnedDeviceContext(); NPUPinnedDeviceContext();
explicit NPUPinnedDeviceContext(NPUPinnedPlace place); explicit NPUPinnedDeviceContext(NPUPinnedPlace place);
...@@ -257,6 +267,8 @@ class NPUPinnedDeviceContext : public DeviceContext { ...@@ -257,6 +267,8 @@ class NPUPinnedDeviceContext : public DeviceContext {
Eigen::DefaultDevice* eigen_device() const; Eigen::DefaultDevice* eigen_device() const;
static const char* name() { return "NPUPinnedDeviceContext"; }
private: private:
NPUPinnedPlace place_; NPUPinnedPlace place_;
std::unique_ptr<Eigen::DefaultDevice> eigen_device_; std::unique_ptr<Eigen::DefaultDevice> eigen_device_;
...@@ -276,7 +288,9 @@ struct DefaultDeviceContextType<platform::CUDAPlace> { ...@@ -276,7 +288,9 @@ struct DefaultDeviceContextType<platform::CUDAPlace> {
}; };
// Currently, CUDAPinnedDeviceContext is only used to data copying. // Currently, CUDAPinnedDeviceContext is only used to data copying.
class CUDAPinnedDeviceContext : public DeviceContext { class CUDAPinnedDeviceContext
: public DeviceContext,
public phi::TypeInfoTraits<DeviceContext, CUDAPinnedDeviceContext> {
public: public:
CUDAPinnedDeviceContext(); CUDAPinnedDeviceContext();
explicit CUDAPinnedDeviceContext(CUDAPinnedPlace place); explicit CUDAPinnedDeviceContext(CUDAPinnedPlace place);
...@@ -285,6 +299,8 @@ class CUDAPinnedDeviceContext : public DeviceContext { ...@@ -285,6 +299,8 @@ class CUDAPinnedDeviceContext : public DeviceContext {
Eigen::DefaultDevice* eigen_device() const; Eigen::DefaultDevice* eigen_device() const;
static const char* name() { return "CUDAPinnedDeviceContext"; }
private: private:
CUDAPinnedPlace place_; CUDAPinnedPlace place_;
std::unique_ptr<Eigen::DefaultDevice> eigen_device_; std::unique_ptr<Eigen::DefaultDevice> eigen_device_;
......
...@@ -122,77 +122,80 @@ using namespace ::phi::enforce; // NOLINT ...@@ -122,77 +122,80 @@ using namespace ::phi::enforce; // NOLINT
#endif #endif
/* /*
* Summary: This PADDLE_GET(_**) series macros are used to call paddle::get * Summary: This macro is used to get Variable or internal type
* safely. paddle::get is not a completely safe api, although it will not * data (such as LoDTensor or SelectedRows) of the Input and
* go wrong in most cases, but in extreme cases, it may fail and directly * Output in op, generally used when call scope.FindVar(Input/
* throw a paddle::bad_variant_access const exception, without any stack * Output("Name")) or ctx.Input<LoDTensor>().
*information. * Firstly this macro check whether the obtained pointer is null,
* This kind of problems is difficult to debug, so add these macros to * and then return data if it is not null.
* enrich paddle::get error information. At the same time, we restrict *
* the direct use of paddle::get by CI rule. * Note: This macro is only suitable for specific scenarios and
* does not intended to be widely used. If it cannot meet the
* requirements, please use other PADDLE_ENFORCE** check macro.
* *
* Parameters: * Parameters:
*     __TYPE: the target variable type *     __PTR: pointer
* __VALUE: the target variable to get * __ROLE: (string), Input or Output
* __NAME: (string), Input or Output name
* __OP_TYPE: (string), the op type
* *
* Examples: * Return: The data pointed to by the pointer.
* - unsafe writing: int x = paddle::get<int>(y);
* - safe writing: int x = PADDLE_GET(int, y);
* *
* Note: GCC 4.8 cannot select right overloaded function here, so need * Examples:
* to define different functions and macros here, after we upgreade * GET_DATA_SAFELY(ctx.Input<LoDTensor>("X"), "Input", "X", "Mul");
* CI gcc version, we can only define one PADDLE_GET macro.
*/ */
namespace details { #define GET_DATA_SAFELY(__PTR, __ROLE, __NAME, __OP_TYPE) \
(([&]() -> std::add_lvalue_reference<decltype(*(__PTR))>::type { \
using namespace phi::enforce::details; // NOLINT auto* __ptr = (__PTR); \
if (UNLIKELY(nullptr == __ptr)) { \
#define DEFINE_SAFE_PADDLE_GET( \ auto __summary__ = phi::errors::NotFound( \
__InputType, __OutputType, __OutputTypePtr, __FuncName) \ "Unable to get %s data of %s %s in operator %s. " \
template <typename OutputType, typename InputType> \ "Possible reasons are:\n" \
auto __FuncName( \ " 1. The %s is not the %s of operator %s;\n" \
__InputType input, const char* expression, const char* file, int line) \ " 2. The %s has no corresponding variable passed in;\n" \
->typename std::conditional<std::is_pointer<InputType>::value, \ " 3. The %s corresponding variable is not initialized.", \
__OutputTypePtr, \ phi::demangle( \
__OutputType>::type { \ typeid(std::add_lvalue_reference<decltype(*__ptr)>::type) \
try { \ .name()), \
return paddle::get<OutputType>(input); \ __ROLE, \
} catch (paddle::bad_variant_access const&) { \ __NAME, \
HANDLE_THE_ERROR \ __OP_TYPE, \
throw ::phi::enforce::EnforceNotMet( \ __NAME, \
phi::errors::InvalidArgument( \ __ROLE, \
"paddle::get failed, cannot get value " \ __OP_TYPE, \
"(%s) by type %s, its type is %s.", \ __NAME, \
expression, \ __NAME); \
phi::enforce::demangle(typeid(OutputType).name()), \ auto __message__ = ::paddle::string::Sprintf( \
phi::enforce::demangle(input.type().name())), \ "%s\n [Hint: pointer " #__PTR " should not be null.]", \
file, \ __summary__.error_message()); \
line); \ __THROW_ERROR_INTERNAL__( \
END_HANDLE_THE_ERROR \ phi::ErrorSummary(__summary__.code(), __message__)); \
} \ } \
} return *__ptr; \
})())
DEFINE_SAFE_PADDLE_GET(InputType&, OutputType&, OutputType*, SafeBoostGet);
DEFINE_SAFE_PADDLE_GET(const InputType&,
const OutputType&,
const OutputType*,
SafeBoostGetConst);
DEFINE_SAFE_PADDLE_GET(InputType&&,
OutputType,
OutputType*,
SafeBoostGetMutable);
} // namespace details
#define PADDLE_GET(__TYPE, __VALUE) \ /*
paddle::platform::details::SafeBoostGet<__TYPE>( \ * Summary: This macro is used to check whether op has specified
__VALUE, #__VALUE, __FILE__, __LINE__) * Input or Output Variables. Because op's Input and Output
#define PADDLE_GET_CONST(__TYPE, __VALUE) \ * checking are written similarly, so abstract this macro.
paddle::platform::details::SafeBoostGetConst<__TYPE>( \ *
__VALUE, #__VALUE, __FILE__, __LINE__) * Parameters:
#define PADDLE_GET_MUTABLE(__TYPE, __VALUE) \ *     __EXPR: (bool), the bool expression
paddle::platform::details::SafeBoostGetMutable<__TYPE>( \ * __ROLE: (string), Input or Output
__VALUE, #__VALUE, __FILE__, __LINE__) * __NAME: (string), Input or Output name
* __OP_TYPE: (string), the op type
*
* Examples:
* OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "Mul");
*/
#define OP_INOUT_CHECK(__EXPR, __ROLE, __NAME, __OP_TYPE) \
do { \
PADDLE_ENFORCE_EQ( \
__EXPR, \
true, \
phi::errors::NotFound( \
"No %s(%s) found for %s operator.", __ROLE, __NAME, __OP_TYPE)); \
} while (0)
/** OTHER EXCEPTION AND ENFORCE **/ /** OTHER EXCEPTION AND ENFORCE **/
......
...@@ -528,10 +528,9 @@ struct CannotToStringType { ...@@ -528,10 +528,9 @@ struct CannotToStringType {
}; };
TEST(enforce, cannot_to_string_type) { TEST(enforce, cannot_to_string_type) {
static_assert( static_assert(!phi::enforce::details::CanToString<CannotToStringType>::kValue,
!paddle::platform::details::CanToString<CannotToStringType>::kValue, "CannotToStringType must not be converted to string");
"CannotToStringType must not be converted to string"); static_assert(phi::enforce::details::CanToString<int>::kValue,
static_assert(paddle::platform::details::CanToString<int>::kValue,
"int can be converted to string"); "int can be converted to string");
CannotToStringType obj1(3), obj2(4), obj3(3); CannotToStringType obj1(3), obj2(4), obj3(3);
......
...@@ -312,8 +312,8 @@ ...@@ -312,8 +312,8 @@
func : conj func : conj
- backward_op : conv2d_grad - backward_op : conv2d_grad
forward : conv2d (Tensor input, Tensor filter, int[] strides, int[] paddings, str paddding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search) -> Tensor(out) forward : conv2d (Tensor input, Tensor filter, int[] strides, int[] paddings, str padding_algorithm, int[] dilations, int groups, str data_format) -> Tensor(out)
args : (Tensor input, Tensor filter, Tensor out_grad, int[] strides, int[] paddings, str paddding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search) args : (Tensor input, Tensor filter, Tensor out_grad, int[] strides, int[] paddings, str padding_algorithm, int[] dilations, int groups, str data_format)
output : Tensor(input_grad), Tensor(filter_grad) output : Tensor(input_grad), Tensor(filter_grad)
infer_meta : infer_meta :
func : GeneralBinaryGradInferMeta func : GeneralBinaryGradInferMeta
...@@ -324,8 +324,8 @@ ...@@ -324,8 +324,8 @@
backward : conv2d_grad_grad backward : conv2d_grad_grad
- backward_op : conv2d_grad_grad - backward_op : conv2d_grad_grad
forward : conv2d_grad (Tensor input, Tensor filter, Tensor grad_out, int[] strides, int[] paddings, str paddding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search) -> Tensor(grad_input), Tensor(grad_filter) forward : conv2d_grad (Tensor input, Tensor filter, Tensor grad_out, int[] strides, int[] paddings, str padding_algorithm, int[] dilations, int groups, str data_format) -> Tensor(grad_input), Tensor(grad_filter)
args : (Tensor input, Tensor filter, Tensor grad_out, Tensor grad_input_grad, Tensor grad_filter_grad, int[] strides, int[] paddings, str paddding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search) args : (Tensor input, Tensor filter, Tensor grad_out, Tensor grad_input_grad, Tensor grad_filter_grad, int[] strides, int[] paddings, str padding_algorithm, int[] dilations, int groups, str data_format)
output : Tensor(input_grad), Tensor(filter_grad), Tensor(grad_out_grad) output : Tensor(input_grad), Tensor(filter_grad), Tensor(grad_out_grad)
infer_meta : infer_meta :
func : GeneralTernaryGradInferMeta func : GeneralTernaryGradInferMeta
...@@ -357,8 +357,8 @@ ...@@ -357,8 +357,8 @@
backward : conv2d_transpose_double_grad backward : conv2d_transpose_double_grad
- backward_op : conv3d_grad - backward_op : conv3d_grad
forward : conv3d (Tensor input, Tensor filter, int[] strides, int[] paddings, str paddding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search) -> Tensor(out) forward : conv3d (Tensor input, Tensor filter, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search) -> Tensor(out)
args : (Tensor input, Tensor filter, Tensor out_grad, int[] strides, int[] paddings, str paddding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search) args : (Tensor input, Tensor filter, Tensor out_grad, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search)
output : Tensor(input_grad), Tensor(filter_grad) output : Tensor(input_grad), Tensor(filter_grad)
infer_meta : infer_meta :
func : GeneralBinaryGradInferMeta func : GeneralBinaryGradInferMeta
...@@ -369,8 +369,8 @@ ...@@ -369,8 +369,8 @@
backward : conv3d_grad_grad backward : conv3d_grad_grad
- backward_op : conv3d_grad_grad - backward_op : conv3d_grad_grad
forward : conv3d_grad (Tensor input, Tensor filter, Tensor grad_out, int[] strides, int[] paddings, str paddding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search) -> Tensor(grad_input), Tensor(grad_filter) forward : conv3d_grad (Tensor input, Tensor filter, Tensor grad_out, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search) -> Tensor(grad_input), Tensor(grad_filter)
args : (Tensor input, Tensor filter, Tensor grad_out, Tensor grad_input_grad, Tensor grad_filter_grad, int[] strides, int[] paddings, str paddding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search) args : (Tensor input, Tensor filter, Tensor grad_out, Tensor grad_input_grad, Tensor grad_filter_grad, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search)
output : Tensor(input_grad), Tensor(filter_grad), Tensor(grad_out_grad) output : Tensor(input_grad), Tensor(filter_grad), Tensor(grad_out_grad)
infer_meta : infer_meta :
func : GeneralTernaryGradInferMeta func : GeneralTernaryGradInferMeta
...@@ -439,21 +439,21 @@ ...@@ -439,21 +439,21 @@
optional : mask optional : mask
- backward_op : depthwise_conv2d_grad - backward_op : depthwise_conv2d_grad
forward : depthwise_conv2d (Tensor input, Tensor filter, int[] strides, int[] paddings, str paddding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search, bool fuse_relu, bool use_gpudnn) -> Tensor(out) forward : depthwise_conv2d (Tensor input, Tensor filter, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search, bool fuse_relu, bool use_gpudnn) -> Tensor(out)
args : (Tensor input, Tensor filter, Tensor out_grad, int[] strides, int[] paddings, str paddding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search, bool fuse_relu, bool use_gpudnn) args : (Tensor input, Tensor filter, Tensor out_grad, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search, bool fuse_relu, bool use_gpudnn)
output : Tensor(input_grad), Tensor(filter_grad) output : Tensor(input_grad), Tensor(filter_grad)
infer_meta : infer_meta :
func : GeneralBinaryGradInferMeta func : GeneralBinaryGradInferMeta
param : [input, filter] param : [input, filter]
kernel : kernel :
func : depthwise_conv2d_grad func : depthwise_conv2d_grad
param : [input, filter, out_grad, strides, paddings, paddding_algorithm, groups, dilations, data_format, use_addto, workspace_size_MB, exhaustive_search, fuse_relu] param : [input, filter, out_grad, strides, paddings, padding_algorithm, groups, dilations, data_format, use_addto, workspace_size_MB, exhaustive_search, fuse_relu]
use_gpudnn : use_gpudnn use_gpudnn : use_gpudnn
backward : depthwise_conv2d_grad_grad backward : depthwise_conv2d_grad_grad
- backward_op : depthwise_conv2d_grad_grad - backward_op : depthwise_conv2d_grad_grad
forward : depthwise_conv2d_grad (Tensor input, Tensor filter, Tensor grad_out, int[] strides, int[] paddings, str paddding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search, bool fuse_relu, bool use_gpudnn) -> Tensor(grad_input), Tensor(grad_filter) forward : depthwise_conv2d_grad (Tensor input, Tensor filter, Tensor grad_out, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search, bool fuse_relu, bool use_gpudnn) -> Tensor(grad_input), Tensor(grad_filter)
args : (Tensor input, Tensor filter, Tensor grad_out, Tensor grad_input_grad, Tensor grad_filter_grad, int[] strides, int[] paddings, str paddding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search, bool fuse_relu) args : (Tensor input, Tensor filter, Tensor grad_out, Tensor grad_input_grad, Tensor grad_filter_grad, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search, bool fuse_relu)
output : Tensor(input_grad), Tensor(filter_grad), Tensor(grad_out_grad) output : Tensor(input_grad), Tensor(filter_grad), Tensor(grad_out_grad)
infer_meta : infer_meta :
func : GeneralTernaryGradInferMeta func : GeneralTernaryGradInferMeta
......
...@@ -454,7 +454,7 @@ ...@@ -454,7 +454,7 @@
backward : conj_grad backward : conj_grad
- op : conv2d - op : conv2d
args : (Tensor input, Tensor filter, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search) args : (Tensor input, Tensor filter, int[] strides, int[] paddings, str padding_algorithm, int[] dilations, int groups, str data_format)
output : Tensor output : Tensor
infer_meta : infer_meta :
func : ConvInferMeta func : ConvInferMeta
...@@ -474,10 +474,10 @@ ...@@ -474,10 +474,10 @@
backward : conv2d_transpose_grad backward : conv2d_transpose_grad
- op : conv3d - op : conv3d
args : (Tensor input, Tensor filter, int[] strides, int[] paddings, str paddding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search) args : (Tensor input, Tensor filter, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search)
output : Tensor output : Tensor
infer_meta : infer_meta :
func : ConvInferMeta func : Conv3DInferMeta
kernel : kernel :
func : conv3d func : conv3d
use_gpudnn : true use_gpudnn : true
...@@ -564,7 +564,7 @@ ...@@ -564,7 +564,7 @@
args : (Tensor x, Tensor filter, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search, bool fuse_relu, bool use_gpudnn) args : (Tensor x, Tensor filter, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search, bool fuse_relu, bool use_gpudnn)
output : Tensor(out) output : Tensor(out)
infer_meta : infer_meta :
func : ConvInferMeta func : DepthwiseConvInferMeta
param : [x, filter, strides, paddings, padding_algorithm, groups, dilations, data_format, use_addto, workspace_size_MB, exhaustive_search] param : [x, filter, strides, paddings, padding_algorithm, groups, dilations, data_format, use_addto, workspace_size_MB, exhaustive_search]
kernel : kernel :
func : depthwise_conv2d func : depthwise_conv2d
......
...@@ -23,9 +23,8 @@ limitations under the License. */ ...@@ -23,9 +23,8 @@ limitations under the License. */
#include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/backends/custom/custom_context.h" #include "paddle/phi/backends/custom/custom_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_context.h"
#ifdef PADDLE_WITH_XPU #include "paddle/phi/backends/onednn/onednn_context.h"
#include "paddle/phi/backends/xpu/xpu_context.h" #include "paddle/phi/backends/xpu/xpu_context.h"
#endif
#ifndef PADDLE_WITH_CUSTOM_KERNEL #ifndef PADDLE_WITH_CUSTOM_KERNEL
// TODO(wilber): DeviceContextPool nees include fluid file. // TODO(wilber): DeviceContextPool nees include fluid file.
......
...@@ -24,7 +24,8 @@ limitations under the License. */ ...@@ -24,7 +24,8 @@ limitations under the License. */
namespace phi { namespace phi {
class PADDLE_API CPUContext : public DeviceContext { class PADDLE_API CPUContext : public DeviceContext,
public TypeInfoTraits<DeviceContext, CPUContext> {
public: public:
CPUContext(); CPUContext();
CPUContext(CPUContext&&); CPUContext(CPUContext&&);
...@@ -34,6 +35,8 @@ class PADDLE_API CPUContext : public DeviceContext { ...@@ -34,6 +35,8 @@ class PADDLE_API CPUContext : public DeviceContext {
Eigen::DefaultDevice* eigen_device() const; Eigen::DefaultDevice* eigen_device() const;
const Place& GetPlace() const override; const Place& GetPlace() const override;
static const char* name() { return "CPUContext"; }
protected: protected:
// NOTE: External users manage resources. Used in inference scenarios. // NOTE: External users manage resources. Used in inference scenarios.
// The Set interface is for inference only, DeviceContext will mark the // The Set interface is for inference only, DeviceContext will mark the
......
...@@ -21,7 +21,8 @@ limitations under the License. */ ...@@ -21,7 +21,8 @@ limitations under the License. */
namespace phi { namespace phi {
class CustomContext : public DeviceContext { class CustomContext : public DeviceContext,
public TypeInfoTraits<DeviceContext, CustomContext> {
public: public:
explicit CustomContext(const CustomPlace&); explicit CustomContext(const CustomPlace&);
...@@ -35,6 +36,8 @@ class CustomContext : public DeviceContext { ...@@ -35,6 +36,8 @@ class CustomContext : public DeviceContext {
// Wait for all operations completion in the stream. // Wait for all operations completion in the stream.
void Wait() const override; void Wait() const override;
static const char* name() { return "CustomContext"; }
public: public:
// NOTE: DeviceContext hold resources. Used in training scenarios. // NOTE: DeviceContext hold resources. Used in training scenarios.
// The interface used by the training scene, DeviceContext will initialize // The interface used by the training scene, DeviceContext will initialize
......
...@@ -717,6 +717,23 @@ struct GPUContext::Impl { ...@@ -717,6 +717,23 @@ struct GPUContext::Impl {
} }
} }
bool HasDnnAttr(const std::string& attr_name) const {
return dnn_attrs_.count(attr_name) != 0UL;
}
const Attribute& GetDnnAttr(const std::string& attr_name) const {
auto iter = dnn_attrs_.find(attr_name);
PADDLE_ENFORCE_NE(
iter,
dnn_attrs_.end(),
phi::errors::NotFound("Attribute `%s` is not found in OneDNNContext."));
return iter->second;
}
void SetDnnAttr(const std::string& attr_name, Attribute attr) {
dnn_attrs_[attr_name] = attr;
}
// use one flag for all handles? // use one flag for all handles?
// they should be accessed consistently // they should be accessed consistently
bool owned_{false}; bool owned_{false};
...@@ -780,8 +797,15 @@ struct GPUContext::Impl { ...@@ -780,8 +797,15 @@ struct GPUContext::Impl {
Allocator* allocator_{nullptr}; // external resource. Allocator* allocator_{nullptr}; // external resource.
// A internal resouce to initinalize eigen_device. // A internal resouce to initinalize eigen_device.
std::unique_ptr<internal::EigenGpuStreamDevice> eigen_stream_{nullptr}; std::unique_ptr<internal::EigenGpuStreamDevice> eigen_stream_{nullptr};
// Holds some attributes only used by the gpudnn kernel calculation
// Because DeviceContext is a global singleton, you need to ensure thread
// safety, use the thread_local variable
static thread_local AttributeMap dnn_attrs_;
}; };
thread_local AttributeMap GPUContext::Impl::dnn_attrs_ = {};
GPUContext::GPUContext(GPUContext&&) = default; GPUContext::GPUContext(GPUContext&&) = default;
GPUContext& GPUContext::operator=(GPUContext&&) = default; GPUContext& GPUContext::operator=(GPUContext&&) = default;
...@@ -1000,4 +1024,16 @@ void GPUContext::SetDriverVersion(int val) { impl_->driver_version_ = val; } ...@@ -1000,4 +1024,16 @@ void GPUContext::SetDriverVersion(int val) { impl_->driver_version_ = val; }
void GPUContext::SetRuntimeVersion(int val) { impl_->runtime_version_ = val; } void GPUContext::SetRuntimeVersion(int val) { impl_->runtime_version_ = val; }
bool GPUContext::HasDnnAttr(const std::string& attr_name) const {
return impl_->HasDnnAttr(attr_name);
}
const Attribute& GPUContext::GetDnnAttr(const std::string& attr_name) const {
return impl_->GetDnnAttr(attr_name);
}
void GPUContext::SetDnnAttr(const std::string& attr_name, Attribute attr) {
return impl_->SetDnnAttr(attr_name, std::move(attr));
}
} // namespace phi } // namespace phi
...@@ -24,6 +24,7 @@ limitations under the License. */ ...@@ -24,6 +24,7 @@ limitations under the License. */
#include "paddle/phi/backends/gpu/gpu_helper.h" #include "paddle/phi/backends/gpu/gpu_helper.h"
#include "paddle/phi/backends/gpu/gpu_info.h" #include "paddle/phi/backends/gpu/gpu_info.h"
#include "paddle/phi/common/place.h" #include "paddle/phi/common/place.h"
#include "paddle/phi/core/attribute.h"
#include "paddle/phi/core/device_context.h" #include "paddle/phi/core/device_context.h"
namespace phi { namespace phi {
...@@ -77,7 +78,8 @@ class DnnWorkspaceHandle { ...@@ -77,7 +78,8 @@ class DnnWorkspaceHandle {
std::unique_ptr<std::mutex> mtx_; std::unique_ptr<std::mutex> mtx_;
}; };
class PADDLE_API GPUContext : public DeviceContext { class PADDLE_API GPUContext : public DeviceContext,
public TypeInfoTraits<DeviceContext, GPUContext> {
public: public:
explicit GPUContext(const GPUPlace& place, bool init = true); explicit GPUContext(const GPUPlace& place, bool init = true);
...@@ -166,6 +168,13 @@ class PADDLE_API GPUContext : public DeviceContext { ...@@ -166,6 +168,13 @@ class PADDLE_API GPUContext : public DeviceContext {
void WaitStreamCallback() const; void WaitStreamCallback() const;
// Several methods for adapting Dnn-specific attributes
bool HasDnnAttr(const std::string& attr_name) const;
const Attribute& GetDnnAttr(const std::string& attr_name) const;
void SetDnnAttr(const std::string& attr_name, Attribute attr);
static const char* name() { return "GPUContext"; }
public: public:
/*! \brief Return nccl communicators. */ /*! \brief Return nccl communicators. */
ncclComm_t nccl_comm() const; ncclComm_t nccl_comm() const;
...@@ -250,10 +259,10 @@ class PADDLE_API GPUContext : public DeviceContext { ...@@ -250,10 +259,10 @@ class PADDLE_API GPUContext : public DeviceContext {
std::unique_ptr<Impl> impl_; std::unique_ptr<Impl> impl_;
}; };
// Note: In order to register the kernel of CUDNN, GPUDNNContext is required. // Note: In order to register the kernel of CUDNN, DnnContext is required.
// Currently, CUDNN kernel directly uses GPUContext. But if the kernel function // Currently, CUDNN kernel directly uses GPUContext. But if the kernel function
// has the same name, this will lead to duplicate instantiations of GPU kernel // has the same name, this will lead to duplicate instantiations of GPU kernel
// and GPUDNN kernel function, so if we using GPUDNNContext = GPUContext, we // and Dnn kernel function, so if we using DnnContext = GPUContext, we
// must use different function name for cudnn kernel // must use different function name for cudnn kernel
using GPUDNNContext = GPUContext; using GPUDNNContext = GPUContext;
......
...@@ -16,9 +16,10 @@ ...@@ -16,9 +16,10 @@
#include "paddle/phi/common/place.h" #include "paddle/phi/common/place.h"
#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/enforce.h"
#include "paddle/utils/flat_hash_map.h"
#include "paddle/fluid/framework/expect.h"
#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/device_context.h"
#include "paddle/phi/core/expect.h"
namespace phi { namespace phi {
...@@ -284,6 +285,69 @@ struct OneDNNContext::Impl { ...@@ -284,6 +285,69 @@ struct OneDNNContext::Impl {
return key_it->second; return key_it->second;
} }
bool HasDnnAttr(const std::string& attr_name) const {
return dnn_attrs_.count(attr_name) != 0UL;
}
const Attribute& GetDnnAttr(const std::string& attr_name) const {
auto iter = dnn_attrs_.find(attr_name);
PADDLE_ENFORCE_NE(
iter,
dnn_attrs_.end(),
phi::errors::NotFound("Attribute `%s` is not found in OneDNNContext."));
return iter->second;
}
void SetDnnAttr(const std::string& attr_name, Attribute attr) {
dnn_attrs_[attr_name] = attr;
}
bool HasDnnInput(const std::string& input_name) const {
return dnn_inputs_.count(input_name) != 0UL;
}
const DenseTensor* GetDnnInput(const std::string& input_name) const {
auto iter = dnn_inputs_.find(input_name);
PADDLE_ENFORCE_NE(
iter,
dnn_inputs_.end(),
phi::errors::NotFound(
"Input DenseTensor `%s` is not found in OneDNNContext."));
return iter->second;
}
void SetDnnInput(const std::string& input_name, const DenseTensor* input) {
dnn_inputs_[input_name] = input;
}
void SetInputsName(const TensorNameMap& inputs_name) {
inputs_name_ = inputs_name;
}
void SetOutputsName(const TensorNameMap& outputs_name) {
outputs_name_ = outputs_name;
}
const std::vector<std::string>& GetInputsName(
const std::string& input) const {
auto it = inputs_name_.find(input);
PADDLE_ENFORCE_NE(it,
inputs_name_.end(),
phi::errors::NotFound(
"OneDnnContext does not have the input %s.", input));
return it->second;
}
const std::vector<std::string>& GetOutputsName(
const std::string& output) const {
auto it = outputs_name_.find(output);
PADDLE_ENFORCE_NE(
it,
outputs_name_.end(),
phi::errors::NotFound("OneDnnContext does not have the output %s.",
output));
return it->second;
}
std::shared_ptr<BlobMap> p_blobmap_; std::shared_ptr<BlobMap> p_blobmap_;
// Map key is pointer of executor and value is a data(iterator in map) needed // Map key is pointer of executor and value is a data(iterator in map) needed
// to erase // to erase
...@@ -291,8 +355,35 @@ struct OneDNNContext::Impl { ...@@ -291,8 +355,35 @@ struct OneDNNContext::Impl {
std::shared_ptr<std::mutex> p_mutex_; std::shared_ptr<std::mutex> p_mutex_;
// 0 - clearing is allowed. x > 0 do not clear. // 0 - clearing is allowed. x > 0 do not clear.
unsigned int block_next_cache_clearing_ = 0; unsigned int block_next_cache_clearing_ = 0;
// Holds some attributes only used by the onednn kernel calculation
// Since original mkldnn op kernel directly adds the operations that require
// fusion to the native kernel operations, and uses the attribute `fuse_xxx`
// to control, for onednn, there will be some attributes that seem to be
// independent of the device are also saved here.
// Here, the operation of fusion needs to be implemented separately as
// a fusion op and kernel, instead of patching it to a basic operation.
// Because DeviceContext is a global singleton, you need to ensure thread
// safety, use the thread_local variable
static thread_local AttributeMap dnn_attrs_;
// For onednn, in addition to extra attrs, there are also extra inputs,
// but the number is small. Hope that the implementation can be optimized
// to remove this member in the future.
static thread_local paddle::flat_hash_map<std::string, const DenseTensor*>
dnn_inputs_;
// Onednn need get input and output's name in current Kernel for generating
// unique_key.
static thread_local TensorNameMap inputs_name_;
static thread_local TensorNameMap outputs_name_;
}; };
thread_local AttributeMap OneDNNContext::Impl::dnn_attrs_ = {};
thread_local paddle::flat_hash_map<std::string, const DenseTensor*>
OneDNNContext::Impl::dnn_inputs_ = {};
thread_local TensorNameMap OneDNNContext::Impl::inputs_name_ = {};
thread_local TensorNameMap OneDNNContext::Impl::outputs_name_ = {};
OneDNNContext::OneDNNContext(const Place& place) OneDNNContext::OneDNNContext(const Place& place)
: CPUContext(place), impl_(std::make_unique<Impl>()) {} : CPUContext(place), impl_(std::make_unique<Impl>()) {}
...@@ -322,5 +413,49 @@ OneDNNContext::BlobPtr_t<void> OneDNNContext::GetBlob( ...@@ -322,5 +413,49 @@ OneDNNContext::BlobPtr_t<void> OneDNNContext::GetBlob(
return impl_->GetBlob(name); return impl_->GetBlob(name);
} }
bool OneDNNContext::HasDnnAttr(const std::string& attr_name) const {
return impl_->HasDnnAttr(attr_name);
}
const Attribute& OneDNNContext::GetDnnAttr(const std::string& attr_name) const {
return impl_->GetDnnAttr(attr_name);
}
void OneDNNContext::SetDnnAttr(const std::string& attr_name, Attribute attr) {
return impl_->SetDnnAttr(attr_name, std::move(attr));
}
bool OneDNNContext::HasDnnInput(const std::string& input_name) const {
return impl_->HasDnnInput(input_name);
}
const DenseTensor* OneDNNContext::GetDnnInput(
const std::string& input_name) const {
return impl_->GetDnnInput(input_name);
}
void OneDNNContext::SetDnnInput(const std::string& input_name,
const DenseTensor* input) {
return impl_->SetDnnInput(input_name, input);
}
void OneDNNContext::SetInputsName(const TensorNameMap& inputs_name) {
impl_->SetInputsName(inputs_name);
}
void OneDNNContext::SetOutputsName(const TensorNameMap& outputs_name) {
impl_->SetOutputsName(outputs_name);
}
const std::vector<std::string>& OneDNNContext::GetInputsName(
const std::string& input) const {
return impl_->GetInputsName(input);
}
const std::vector<std::string>& OneDNNContext::GetOutputsName(
const std::string& output) const {
return impl_->GetOutputsName(output);
}
} // namespace phi } // namespace phi
#endif #endif
...@@ -20,9 +20,12 @@ limitations under the License. */ ...@@ -20,9 +20,12 @@ limitations under the License. */
#include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/common/layout.h" #include "paddle/phi/common/layout.h"
#include "paddle/phi/common/place.h" #include "paddle/phi/common/place.h"
#include "paddle/phi/core/attribute.h"
namespace phi { namespace phi {
using TensorNameMap = std::map<std::string, std::vector<std::string>>;
class OneDNNContextThreadLocals { class OneDNNContextThreadLocals {
// default mkldnn session id // default mkldnn session id
...@@ -134,6 +137,26 @@ class OneDNNContext : public CPUContext { ...@@ -134,6 +137,26 @@ class OneDNNContext : public CPUContext {
return OneDNNContextThreadLocals::fetch(); return OneDNNContextThreadLocals::fetch();
} }
// Several methods for adapting ONEDNN-specific attributes and inputs
bool HasDnnAttr(const std::string& attr_name) const;
const Attribute& GetDnnAttr(const std::string& attr_name) const;
void SetDnnAttr(const std::string& attr_name, Attribute attr);
bool HasDnnInput(const std::string& input_name) const;
const DenseTensor* GetDnnInput(const std::string& input_name) const;
void SetDnnInput(const std::string& input_name, const DenseTensor* input);
void SetInputsName(const TensorNameMap& inputs_name);
void SetOutputsName(const TensorNameMap& outputs_name);
const std::vector<std::string>& GetInputsName(const std::string& input) const;
const std::vector<std::string>& GetOutputsName(
const std::string& output) const;
static const char* name() { return "OneDNNContext"; }
private: private:
struct Impl; struct Impl;
std::unique_ptr<Impl> impl_; std::unique_ptr<Impl> impl_;
......
...@@ -195,6 +195,41 @@ inline std::string CreateKey(const OneDNNContext& dev_ctx, ArgTypes&&... args) { ...@@ -195,6 +195,41 @@ inline std::string CreateKey(const OneDNNContext& dev_ctx, ArgTypes&&... args) {
return key; return key;
} }
inline std::vector<std::vector<int64_t>> ToOnednnPadding(
const std::vector<int64_t>& paddings) {
if (paddings.size() == 6) {
int padding_front = paddings[0];
int padding_back = paddings[1];
int padding_top = paddings[2];
int padding_bottom = paddings[3];
int padding_left = paddings[4];
int padding_right = paddings[5];
return {{padding_front, padding_top, padding_left},
{padding_back, padding_bottom, padding_right}};
} else {
int padding_top = paddings[0];
int padding_bottom = paddings[1];
int padding_left = paddings[2];
int padding_right = paddings[3];
return {{padding_top, padding_left}, {padding_bottom, padding_right}};
}
}
// The function adjusts the vector of weight dimensions for group convolutions
inline void GetGroupConvWeightsTz(std::vector<int64_t>& weights_tz, // NOLINT
const int groups) {
if (groups > 1) {
// if (is_conv3d) [o, i, d, h, w]->[g, o/g, i, d, h, w]
// else [o, i, h, w] -> [g, o/g, i, h, w]
weights_tz.push_back(0);
std::rotate(weights_tz.begin(), weights_tz.end() - 1, weights_tz.end());
weights_tz[0] = groups;
weights_tz[1] = weights_tz[1] / groups;
}
}
inline void MatchShapeToLayout(DenseTensor* tensor_in, inline void MatchShapeToLayout(DenseTensor* tensor_in,
DataLayout from, DataLayout from,
DataLayout to) { DataLayout to) {
......
...@@ -39,6 +39,67 @@ using memory = dnnl::memory; ...@@ -39,6 +39,67 @@ using memory = dnnl::memory;
using OneDNNMemoryFormat = dnnl::memory::format_tag; using OneDNNMemoryFormat = dnnl::memory::format_tag;
static void AppendActivation(const OneDNNContext& dev_ctx,
dnnl::post_ops& post_ops, // NOLINT
float activation_scale = 1.0f) {
const auto invalid_attribute =
dev_ctx.HasDnnAttr("fuse_activation")
? PADDLE_GET_CONST(std::string, dev_ctx.GetDnnAttr("fuse_activation"))
.empty()
: true;
if (invalid_attribute) return;
const auto fuse_activation =
dev_ctx.HasDnnAttr("fuse_activation")
? PADDLE_GET_CONST(std::string, dev_ctx.GetDnnAttr("fuse_activation"))
: "";
const auto fuse_alpha =
dev_ctx.HasDnnAttr("fuse_alpha")
? PADDLE_GET_CONST(float, dev_ctx.GetDnnAttr("fuse_alpha"))
: 0.0f;
const auto fuse_beta =
dev_ctx.HasDnnAttr("fuse_beta")
? PADDLE_GET_CONST(float, dev_ctx.GetDnnAttr("fuse_beta"))
: 0.0f;
if (fuse_activation == "hard_sigmoid") {
post_ops.append_eltwise(activation_scale,
dnnl::algorithm::eltwise_linear,
fuse_alpha,
fuse_beta);
post_ops.append_eltwise(
activation_scale, dnnl::algorithm::eltwise_clip, 0.0f, 1.0f);
} else {
const std::unordered_map<std::string, dnnl::algorithm> activation_map = {
{"abs", dnnl::algorithm::eltwise_abs},
{"clip", dnnl::algorithm::eltwise_clip},
{"gelu", dnnl::algorithm::eltwise_gelu_erf},
{"gelu_erf", dnnl::algorithm::eltwise_gelu_erf},
{"gelu_tanh", dnnl::algorithm::eltwise_gelu_tanh},
{"hard_swish", dnnl::algorithm::eltwise_hardswish},
{"leaky_relu", dnnl::algorithm::eltwise_relu},
{"mish", dnnl::algorithm::eltwise_mish},
{"relu", dnnl::algorithm::eltwise_relu},
{"relu6", dnnl::algorithm::eltwise_bounded_relu},
{"sigmoid", dnnl::algorithm::eltwise_logistic},
{"sqrt", dnnl::algorithm::eltwise_sqrt},
{"swish", dnnl::algorithm::eltwise_swish},
{"tanh", dnnl::algorithm::eltwise_tanh}};
const auto& activation_type = activation_map.find(fuse_activation);
PADDLE_ENFORCE_NE(
activation_type,
activation_map.end(),
phi::errors::InvalidArgument(
"Activation '%s' not found in oneDNN algorithms mapper",
fuse_activation));
post_ops.append_eltwise(
activation_scale, activation_type->second, fuse_alpha, fuse_beta);
}
}
template <typename T, template <typename T,
typename TForward, typename TForward,
typename TBackward = onednn_dummy_primitive, typename TBackward = onednn_dummy_primitive,
...@@ -1085,5 +1146,6 @@ class ClipOneDNNHandler ...@@ -1085,5 +1146,6 @@ class ClipOneDNNHandler
to_void_cast<T>(input_data)); to_void_cast<T>(input_data));
} }
}; };
} // namespace funcs } // namespace funcs
} // namespace phi } // namespace phi
...@@ -14,6 +14,8 @@ limitations under the License. */ ...@@ -14,6 +14,8 @@ limitations under the License. */
#pragma once #pragma once
#ifdef PADDLE_WITH_XPU
#include <memory> #include <memory>
#include "paddle/phi/backends/xpu/forwards.h" #include "paddle/phi/backends/xpu/forwards.h"
...@@ -26,7 +28,8 @@ namespace xpu = baidu::xpu::api; ...@@ -26,7 +28,8 @@ namespace xpu = baidu::xpu::api;
namespace phi { namespace phi {
class XPUContext : public DeviceContext { class XPUContext : public DeviceContext,
public TypeInfoTraits<DeviceContext, XPUContext> {
public: public:
XPUContext(); XPUContext();
...@@ -65,6 +68,8 @@ class XPUContext : public DeviceContext { ...@@ -65,6 +68,8 @@ class XPUContext : public DeviceContext {
XPUStream stream() const; XPUStream stream() const;
static const char* name() { return "XPUContext"; }
private: private:
struct Impl; struct Impl;
std::unique_ptr<Impl> impl_; std::unique_ptr<Impl> impl_;
...@@ -79,3 +84,5 @@ using KPSContext = XPUContext; ...@@ -79,3 +84,5 @@ using KPSContext = XPUContext;
#endif #endif
} // namespace phi } // namespace phi
#endif
...@@ -48,6 +48,6 @@ using Attribute = paddle::variant<bool, ...@@ -48,6 +48,6 @@ using Attribute = paddle::variant<bool,
DataLayout, DataLayout,
Place>; Place>;
using RuntimeAttrs = paddle::flat_hash_map<std::string, Attribute>; using AttributeMap = paddle::flat_hash_map<std::string, Attribute>;
} // namespace phi } // namespace phi
...@@ -21,6 +21,7 @@ limitations under the License. */ ...@@ -21,6 +21,7 @@ limitations under the License. */
#include "paddle/phi/common/place.h" #include "paddle/phi/common/place.h"
#include "paddle/phi/core/allocator.h" #include "paddle/phi/core/allocator.h"
#include "paddle/phi/core/generator.h" #include "paddle/phi/core/generator.h"
#include "paddle/phi/core/utils/type_registry.h"
namespace phi { namespace phi {
class TensorBase; class TensorBase;
...@@ -188,9 +189,21 @@ class PADDLE_API DeviceContext { ...@@ -188,9 +189,21 @@ class PADDLE_API DeviceContext {
*/ */
Generator* GetHostGenerator() const; Generator* GetHostGenerator() const;
/**
* @brief Return the type information of the derived class to support
* safely downcast in non-rtti environment.
*
* @return The type information of the derived class.
*/
TypeInfo<DeviceContext> type_info() const { return type_info_; }
private: private:
struct Impl; struct Impl;
std::unique_ptr<Impl> impl_; std::unique_ptr<Impl> impl_;
template <typename T, typename U>
friend class TypeInfoTraits;
TypeInfo<DeviceContext> type_info_{TypeInfo<DeviceContext>::kUnknownType};
}; };
} // namespace phi } // namespace phi
...@@ -43,6 +43,7 @@ limitations under the License. */ ...@@ -43,6 +43,7 @@ limitations under the License. */
#include "paddle/phi/core/errors.h" #include "paddle/phi/core/errors.h"
#include "paddle/utils/string/printf.h" #include "paddle/utils/string/printf.h"
#include "paddle/utils/string/to_string.h" #include "paddle/utils/string/to_string.h"
#include "paddle/utils/variant.h"
DECLARE_int32(call_stack_level); DECLARE_int32(call_stack_level);
...@@ -409,80 +410,75 @@ struct EnforceNotMet : public std::exception { ...@@ -409,80 +410,75 @@ struct EnforceNotMet : public std::exception {
/** EXTENDED TOOL FUNCTIONS WITH CHECKING **/ /** EXTENDED TOOL FUNCTIONS WITH CHECKING **/
/* /*
* Summary: This macro is used to get Variable or internal type * Summary: This PADDLE_GET(_**) series macros are used to call paddle::get
* data (such as LoDTensor or SelectedRows) of the Input and * safely. paddle::get is not a completely safe api, although it will not
* Output in op, generally used when call scope.FindVar(Input/ * go wrong in most cases, but in extreme cases, it may fail and directly
* Output("Name")) or ctx.Input<LoDTensor>(). * throw a paddle::bad_variant_access const exception, without any stack
* Firstly this macro check whether the obtained pointer is null, *information.
* and then return data if it is not null. * This kind of problems is difficult to debug, so add these macros to
* * enrich paddle::get error information. At the same time, we restrict
* Note: This macro is only suitable for specific scenarios and * the direct use of paddle::get by CI rule.
* does not intended to be widely used. If it cannot meet the
* requirements, please use other PADDLE_ENFORCE** check macro.
* *
* Parameters: * Parameters:
*     __PTR: pointer *     __TYPE: the target variable type
* __ROLE: (string), Input or Output * __VALUE: the target variable to get
* __NAME: (string), Input or Output name
* __OP_TYPE: (string), the op type
*
* Return: The data pointed to by the pointer.
* *
* Examples: * Examples:
* GET_DATA_SAFELY(ctx.Input<LoDTensor>("X"), "Input", "X", "Mul"); * - unsafe writing: int x = paddle::get<int>(y);
*/ * - safe writing: int x = PADDLE_GET(int, y);
#define GET_DATA_SAFELY(__PTR, __ROLE, __NAME, __OP_TYPE) \
(([&]() -> std::add_lvalue_reference<decltype(*(__PTR))>::type { \
auto* __ptr = (__PTR); \
if (UNLIKELY(nullptr == __ptr)) { \
auto __summary__ = phi::errors::NotFound( \
"Unable to get %s data of %s %s in operator %s. " \
"Possible reasons are:\n" \
" 1. The %s is not the %s of operator %s;\n" \
" 2. The %s has no corresponding variable passed in;\n" \
" 3. The %s corresponding variable is not initialized.", \
phi::demangle( \
typeid(std::add_lvalue_reference<decltype(*__ptr)>::type) \
.name()), \
__ROLE, \
__NAME, \
__OP_TYPE, \
__NAME, \
__ROLE, \
__OP_TYPE, \
__NAME, \
__NAME); \
auto __message__ = ::paddle::string::Sprintf( \
"%s\n [Hint: pointer " #__PTR " should not be null.]", \
__summary__.error_message()); \
__THROW_ERROR_INTERNAL__( \
phi::ErrorSummary(__summary__.code(), __message__)); \
} \
return *__ptr; \
})())
/*
* Summary: This macro is used to check whether op has specified
* Input or Output Variables. Because op's Input and Output
* checking are written similarly, so abstract this macro.
*
* Parameters:
*     __EXPR: (bool), the bool expression
* __ROLE: (string), Input or Output
* __NAME: (string), Input or Output name
* __OP_TYPE: (string), the op type
* *
* Examples: * Note: GCC 4.8 cannot select right overloaded function here, so need
* OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "Mul"); * to define different functions and macros here, after we upgrade
* CI gcc version, we can only define one PADDLE_GET macro.
*/ */
#define OP_INOUT_CHECK(__EXPR, __ROLE, __NAME, __OP_TYPE) \ namespace details {
do { \
PADDLE_ENFORCE_EQ( \ #define DEFINE_SAFE_PADDLE_GET( \
__EXPR, \ __InputType, __OutputType, __OutputTypePtr, __FuncName) \
true, \ template <typename OutputType, typename InputType> \
phi::errors::NotFound( \ auto __FuncName( \
"No %s(%s) found for %s operator.", __ROLE, __NAME, __OP_TYPE)); \ __InputType input, const char* expression, const char* file, int line) \
} while (0) ->typename std::conditional<std::is_pointer<InputType>::value, \
__OutputTypePtr, \
__OutputType>::type { \
try { \
return paddle::get<OutputType>(input); \
} catch (paddle::bad_variant_access const&) { \
HANDLE_THE_ERROR \
throw ::phi::enforce::EnforceNotMet( \
phi::errors::InvalidArgument( \
"paddle::get failed, cannot get value " \
"(%s) by type %s, its type is %s.", \
expression, \
phi::enforce::demangle(typeid(OutputType).name()), \
phi::enforce::demangle(input.type().name())), \
file, \
line); \
END_HANDLE_THE_ERROR \
} \
}
DEFINE_SAFE_PADDLE_GET(InputType&, OutputType&, OutputType*, SafeBoostGet);
DEFINE_SAFE_PADDLE_GET(const InputType&,
const OutputType&,
const OutputType*,
SafeBoostGetConst);
DEFINE_SAFE_PADDLE_GET(InputType&&,
OutputType,
OutputType*,
SafeBoostGetMutable);
} // namespace details
#define PADDLE_GET(__TYPE, __VALUE) \
phi::enforce::details::SafeBoostGet<__TYPE>( \
__VALUE, #__VALUE, __FILE__, __LINE__)
#define PADDLE_GET_CONST(__TYPE, __VALUE) \
phi::enforce::details::SafeBoostGetConst<__TYPE>( \
__VALUE, #__VALUE, __FILE__, __LINE__)
#define PADDLE_GET_MUTABLE(__TYPE, __VALUE) \
phi::enforce::details::SafeBoostGetMutable<__TYPE>( \
__VALUE, #__VALUE, __FILE__, __LINE__)
} // namespace enforce } // namespace enforce
using namespace enforce; // NOLINT using namespace enforce; // NOLINT
......
...@@ -138,8 +138,6 @@ class KernelContext { ...@@ -138,8 +138,6 @@ class KernelContext {
template <typename AttrType> template <typename AttrType>
const AttrType& AttrAt(size_t idx) const; const AttrType& AttrAt(size_t idx) const;
const RuntimeAttrs& GetRuntimeAttrs() const { return runtime_attrs_; }
size_t InputsSize() const { return inputs_.size(); } size_t InputsSize() const { return inputs_.size(); }
size_t OutputsSize() const { return outputs_.size(); } size_t OutputsSize() const { return outputs_.size(); }
size_t AttrsSize() const { return attrs_.size(); } size_t AttrsSize() const { return attrs_.size(); }
...@@ -161,8 +159,6 @@ class KernelContext { ...@@ -161,8 +159,6 @@ class KernelContext {
paddle::small_vector<std::pair<int, int>, kInputSmallVectorSize> input_range_; paddle::small_vector<std::pair<int, int>, kInputSmallVectorSize> input_range_;
paddle::small_vector<std::pair<int, int>, kOutputSmallVectorSize> paddle::small_vector<std::pair<int, int>, kOutputSmallVectorSize>
output_range_; output_range_;
RuntimeAttrs runtime_attrs_;
}; };
} // namespace phi } // namespace phi
...@@ -233,8 +233,6 @@ struct KernelArgsParseFunctor<Return_ (*)(Args_...)> { ...@@ -233,8 +233,6 @@ struct KernelArgsParseFunctor<Return_ (*)(Args_...)> {
args_def->AppendAttribute(AttributeType::DATA_LAYOUT); args_def->AppendAttribute(AttributeType::DATA_LAYOUT);
} else if (arg_type == std::type_index(typeid(Place))) { } else if (arg_type == std::type_index(typeid(Place))) {
args_def->AppendAttribute(AttributeType::PLACE); args_def->AppendAttribute(AttributeType::PLACE);
} else if (arg_type == std::type_index(typeid(RuntimeAttrs))) {
// do nothing
} else { } else {
PADDLE_THROW(phi::errors::Unavailable( PADDLE_THROW(phi::errors::Unavailable(
"Unsupported kernel argument type `%s`.", arg_type.name())); "Unsupported kernel argument type `%s`.", arg_type.name()));
......
...@@ -14,13 +14,7 @@ ...@@ -14,13 +14,7 @@
#pragma once #pragma once
#include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/backends/all_context.h"
#include "paddle/phi/backends/custom/custom_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/onednn/onednn_context.h"
#ifdef PADDLE_WITH_XPU
#include "paddle/phi/backends/xpu/xpu_context.h"
#endif
#include "paddle/phi/common/int_array.h" #include "paddle/phi/common/int_array.h"
#include "paddle/phi/common/scalar.h" #include "paddle/phi/common/scalar.h"
#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/dense_tensor.h"
...@@ -330,21 +324,6 @@ struct KernelImpl<Return (*)(DevCtx, Args...), kernel_fn> { ...@@ -330,21 +324,6 @@ struct KernelImpl<Return (*)(DevCtx, Args...), kernel_fn> {
PD_SPECIALIZE_KernelCallHelper_FOR_OUTPUT(TensorArray); PD_SPECIALIZE_KernelCallHelper_FOR_OUTPUT(TensorArray);
template <typename... Tail>
struct KernelCallHelper<const RuntimeAttrs&, Tail...> {
template <int dev_ctx_idx,
int in_idx,
int attr_idx,
int out_idx,
typename... PreviousArgs>
static void Compute(KernelContext* ctx, PreviousArgs&... pargs) {
const auto& runtime_attrs = ctx->GetRuntimeAttrs();
KernelCallHelper<Tail...>::
template Compute<dev_ctx_idx, in_idx, attr_idx, out_idx>(
ctx, pargs..., runtime_attrs);
}
};
/* End case */ /* End case */
template <typename T> template <typename T>
struct KernelCallHelper<TypeTag<T>> { struct KernelCallHelper<TypeTag<T>> {
......
...@@ -409,12 +409,9 @@ void ConvInferMeta(const MetaTensor& input, ...@@ -409,12 +409,9 @@ void ConvInferMeta(const MetaTensor& input,
const std::vector<int>& strides, const std::vector<int>& strides,
const std::vector<int>& paddings_t, const std::vector<int>& paddings_t,
const std::string& padding_algorithm, const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations_t, const std::vector<int>& dilations_t,
int groups,
const std::string& data_format, const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search,
MetaTensor* out, MetaTensor* out,
MetaConfig config) { MetaConfig config) {
std::vector<int> paddings = paddings_t; std::vector<int> paddings = paddings_t;
...@@ -559,27 +556,27 @@ void ConvInferMeta(const MetaTensor& input, ...@@ -559,27 +556,27 @@ void ConvInferMeta(const MetaTensor& input,
out->set_dtype(input.dtype()); out->set_dtype(input.dtype());
} }
void ConvInferInferMeta(const MetaTensor& input, void Conv3DInferMeta(const MetaTensor& input,
const MetaTensor& filter, const MetaTensor& filter,
const std::vector<int>& strides, const std::vector<int>& strides,
const std::vector<int>& paddings, const std::vector<int>& paddings,
const std::string& paddding_algorithm, const std::string& padding_algorithm,
int groups, int groups,
const std::vector<int>& dilations, const std::vector<int>& dilations,
const std::string& data_format, const std::string& data_format,
MetaTensor* out, bool use_addto,
MetaConfig config) { int workspace_size_MB,
bool exhaustive_search,
MetaTensor* out,
MetaConfig config) {
ConvInferMeta(input, ConvInferMeta(input,
filter, filter,
strides, strides,
paddings, paddings,
paddding_algorithm, padding_algorithm,
groups,
dilations, dilations,
groups,
data_format, data_format,
/*use_addto=*/false,
/*workspace_size_MB=*/512, // useless in infermeta
/*exhaustive_search=*/false,
out, out,
config); config);
} }
...@@ -922,6 +919,31 @@ void CrossEntropyWithSoftmaxInferMeta(const MetaTensor& logits, ...@@ -922,6 +919,31 @@ void CrossEntropyWithSoftmaxInferMeta(const MetaTensor& logits,
loss->share_lod(logits); loss->share_lod(logits);
} }
void DepthwiseConvInferMeta(const MetaTensor& input,
const MetaTensor& filter,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search,
MetaTensor* out,
MetaConfig config) {
ConvInferMeta(input,
filter,
strides,
paddings,
padding_algorithm,
dilations,
groups,
data_format,
out,
config);
}
void DistInferMeta(const MetaTensor& x, void DistInferMeta(const MetaTensor& x,
const MetaTensor& y, const MetaTensor& y,
float p, float p,
...@@ -2876,4 +2898,3 @@ void Unpool3dInferMeta(const MetaTensor& x, ...@@ -2876,4 +2898,3 @@ void Unpool3dInferMeta(const MetaTensor& x,
} // namespace phi } // namespace phi
PD_REGISTER_INFER_META_FN(add_raw, phi::ElementwiseRawInferMeta); PD_REGISTER_INFER_META_FN(add_raw, phi::ElementwiseRawInferMeta);
PD_REGISTER_INFER_META_FN(conv2d_infer, phi::ConvInferInferMeta);
...@@ -80,26 +80,26 @@ void ConvInferMeta(const MetaTensor& input, ...@@ -80,26 +80,26 @@ void ConvInferMeta(const MetaTensor& input,
const MetaTensor& filter, const MetaTensor& filter,
const std::vector<int>& strides, const std::vector<int>& strides,
const std::vector<int>& paddings, const std::vector<int>& paddings,
const std::string& paddding_algorithm, const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations, const std::vector<int>& dilations,
int groups,
const std::string& data_format, const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search,
MetaTensor* out, MetaTensor* out,
MetaConfig config = MetaConfig()); MetaConfig config = MetaConfig());
void ConvInferInferMeta(const MetaTensor& input, void Conv3DInferMeta(const MetaTensor& input,
const MetaTensor& filter, const MetaTensor& filter,
const std::vector<int>& strides, const std::vector<int>& strides,
const std::vector<int>& paddings, const std::vector<int>& paddings,
const std::string& paddding_algorithm, const std::string& padding_algorithm,
int groups, int groups,
const std::vector<int>& dilations, const std::vector<int>& dilations,
const std::string& data_format, const std::string& data_format,
MetaTensor* out, bool use_addto,
MetaConfig config = MetaConfig()); int workspace_size_MB,
bool exhaustive_search,
MetaTensor* out,
MetaConfig config = MetaConfig());
void ConvTransposeInferMeta(const MetaTensor& x, void ConvTransposeInferMeta(const MetaTensor& x,
const MetaTensor& filter, const MetaTensor& filter,
...@@ -143,6 +143,20 @@ void CrossEntropyWithSoftmaxInferMeta(const MetaTensor& logits, ...@@ -143,6 +143,20 @@ void CrossEntropyWithSoftmaxInferMeta(const MetaTensor& logits,
MetaTensor* loss, MetaTensor* loss,
MetaConfig config = MetaConfig()); MetaConfig config = MetaConfig());
void DepthwiseConvInferMeta(const MetaTensor& input,
const MetaTensor& filter,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search,
MetaTensor* out,
MetaConfig config = MetaConfig());
void DistInferMeta(const MetaTensor& x, void DistInferMeta(const MetaTensor& x,
const MetaTensor& y, const MetaTensor& y,
float p, float p,
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/dense_tensor.h"
namespace phi {
template <typename T, typename Context>
void ConvGradGradKernel(const Context& dev_ctx,
const DenseTensor& input,
const DenseTensor& filter,
const DenseTensor& out_grad,
const paddle::optional<DenseTensor>& input_grad_grad,
const paddle::optional<DenseTensor>& filter_grad_grad,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string& paddding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search,
DenseTensor* input_grad,
DenseTensor* filter_grad,
DenseTensor* out_grad_grad);
template <typename T, typename Context>
void Conv3DGradGradKernel(const Context& dev_ctx,
const DenseTensor& input,
const DenseTensor& filter,
const DenseTensor& out_grad,
const paddle::optional<DenseTensor>& input_grad_grad,
const paddle::optional<DenseTensor>& filter_grad_grad,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string& paddding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search,
DenseTensor* input_grad,
DenseTensor* filter_grad,
DenseTensor* out_grad_grad);
} // namespace phi
...@@ -25,13 +25,10 @@ void ConvGradKernel(const Context& dev_ctx, ...@@ -25,13 +25,10 @@ void ConvGradKernel(const Context& dev_ctx,
const DenseTensor& out_grad, const DenseTensor& out_grad,
const std::vector<int>& strides, const std::vector<int>& strides,
const std::vector<int>& paddings, const std::vector<int>& paddings,
const std::string& paddding_algorithm, const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations, const std::vector<int>& dilations,
int groups,
const std::string& data_format, const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search,
DenseTensor* input_grad, DenseTensor* input_grad,
DenseTensor* filter_grad); DenseTensor* filter_grad);
...@@ -42,7 +39,7 @@ void Conv3DGradKernel(const Context& dev_ctx, ...@@ -42,7 +39,7 @@ void Conv3DGradKernel(const Context& dev_ctx,
const DenseTensor& out_grad, const DenseTensor& out_grad,
const std::vector<int>& strides, const std::vector<int>& strides,
const std::vector<int>& paddings, const std::vector<int>& paddings,
const std::string& paddding_algorithm, const std::string& padding_algorithm,
int groups, int groups,
const std::vector<int>& dilations, const std::vector<int>& dilations,
const std::string& data_format, const std::string& data_format,
...@@ -59,7 +56,7 @@ void DepthwiseConvGradKernel(const Context& dev_ctx, ...@@ -59,7 +56,7 @@ void DepthwiseConvGradKernel(const Context& dev_ctx,
const DenseTensor& out_grad, const DenseTensor& out_grad,
const std::vector<int>& strides, const std::vector<int>& strides,
const std::vector<int>& paddings, const std::vector<int>& paddings,
const std::string& paddding_algorithm, const std::string& padding_algorithm,
int groups, int groups,
const std::vector<int>& dilations, const std::vector<int>& dilations,
const std::string& data_format, const std::string& data_format,
...@@ -70,4 +67,41 @@ void DepthwiseConvGradKernel(const Context& dev_ctx, ...@@ -70,4 +67,41 @@ void DepthwiseConvGradKernel(const Context& dev_ctx,
DenseTensor* input_grad, DenseTensor* input_grad,
DenseTensor* filter_grad); DenseTensor* filter_grad);
template <typename T, typename Context>
void ConvGradGradKernel(const Context& dev_ctx,
const DenseTensor& input,
const DenseTensor& filter,
const DenseTensor& out_grad,
const paddle::optional<DenseTensor>& input_grad_grad,
const paddle::optional<DenseTensor>& filter_grad_grad,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string& padding_algorithm,
const std::vector<int>& dilations,
int groups,
const std::string& data_format,
DenseTensor* input_grad,
DenseTensor* filter_grad,
DenseTensor* out_grad_grad);
template <typename T, typename Context>
void Conv3DGradGradKernel(const Context& dev_ctx,
const DenseTensor& input,
const DenseTensor& filter,
const DenseTensor& out_grad,
const paddle::optional<DenseTensor>& input_grad_grad,
const paddle::optional<DenseTensor>& filter_grad_grad,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search,
DenseTensor* input_grad,
DenseTensor* filter_grad,
DenseTensor* out_grad_grad);
} // namespace phi } // namespace phi
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/conv_kernel.h"
#include "paddle/fluid/platform/cudnn_workspace_helper.h"
#include "paddle/phi/core/kernel_registry.h"
namespace phi {
template <typename T, typename Context>
void ConvInferKernel(const Context& dev_ctx,
const DenseTensor& input,
const DenseTensor& filter,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string& paddding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::string& data_format,
DenseTensor* out) {
ConvKernel<T, Context>(dev_ctx,
input,
filter,
strides,
paddings,
paddding_algorithm,
groups,
dilations,
data_format,
/*use_addto=*/false,
/*workspace_size_MB=*/
paddle::platform::GetDefaultConvWorkspaceSizeLimitMB(),
/*exhaustive_search=*/false,
out);
}
} // namespace phi
PD_REGISTER_KERNEL(
conv2d_infer, CPU, ALL_LAYOUT, phi::ConvInferKernel, float, double) {}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PD_REGISTER_KERNEL(
conv2d_infer, GPU, ALL_LAYOUT, phi::ConvInferKernel, float, double) {}
#endif
...@@ -25,12 +25,9 @@ void ConvKernel(const Context& dev_ctx, ...@@ -25,12 +25,9 @@ void ConvKernel(const Context& dev_ctx,
const std::vector<int>& strides, const std::vector<int>& strides,
const std::vector<int>& paddings, const std::vector<int>& paddings,
const std::string& padding_algorithm, const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations, const std::vector<int>& dilations,
int groups,
const std::string& data_format, const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search,
DenseTensor* out); DenseTensor* out);
template <typename T, typename Context> template <typename T, typename Context>
...@@ -54,7 +51,7 @@ void DepthwiseConvKernel(const Context& dev_ctx, ...@@ -54,7 +51,7 @@ void DepthwiseConvKernel(const Context& dev_ctx,
const DenseTensor& filter, const DenseTensor& filter,
const std::vector<int>& strides, const std::vector<int>& strides,
const std::vector<int>& paddings, const std::vector<int>& paddings,
const std::string& paddding_algorithm, const std::string& padding_algorithm,
int groups, int groups,
const std::vector<int>& dilations, const std::vector<int>& dilations,
const std::string& data_format, const std::string& data_format,
...@@ -64,16 +61,4 @@ void DepthwiseConvKernel(const Context& dev_ctx, ...@@ -64,16 +61,4 @@ void DepthwiseConvKernel(const Context& dev_ctx,
bool fuse_relu, bool fuse_relu,
DenseTensor* out); DenseTensor* out);
template <typename T, typename Context>
void ConvInferKernel(const Context& dev_ctx,
const DenseTensor& input,
const DenseTensor& filter,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string& paddding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::string& data_format,
DenseTensor* out);
} // namespace phi } // namespace phi
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/conv_grad_grad_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/conv_grad_grad_kernel_impl.h"
namespace phi {
template <typename T, typename Context>
void Conv3DGradGradKernel(const Context& ctx,
const DenseTensor& input,
const DenseTensor& filter,
const DenseTensor& out_grad,
const paddle::optional<DenseTensor>& input_grad_grad,
const paddle::optional<DenseTensor>& filter_grad_grad,
const std::vector<int>& strides,
const std::vector<int>& paddings_t,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations_t,
const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search_t,
DenseTensor* input_grad,
DenseTensor* filter_grad,
DenseTensor* out_grad_grad) {
ConvGradGradKernel<T>(ctx,
input,
filter,
out_grad,
input_grad_grad,
filter_grad_grad,
strides,
paddings_t,
padding_algorithm,
groups,
dilations_t,
data_format,
use_addto,
workspace_size_MB,
exhaustive_search_t,
input_grad,
filter_grad,
out_grad_grad);
}
} // namespace phi
PD_REGISTER_KERNEL(
conv2d_grad_grad, CPU, ALL_LAYOUT, phi::ConvGradGradKernel, float, double) {
}
PD_REGISTER_KERNEL(conv3d_grad_grad,
CPU,
ALL_LAYOUT,
phi::Conv3DGradGradKernel,
float,
double) {}
...@@ -27,7 +27,7 @@ void DepthwiseConvGradKernel(const Context& dev_ctx, ...@@ -27,7 +27,7 @@ void DepthwiseConvGradKernel(const Context& dev_ctx,
const DenseTensor& out_grad, const DenseTensor& out_grad,
const std::vector<int>& strides, const std::vector<int>& strides,
const std::vector<int>& paddings, const std::vector<int>& paddings,
const std::string& paddding_algorithm, const std::string& padding_algorithm,
int groups, int groups,
const std::vector<int>& dilations, const std::vector<int>& dilations,
const std::string& data_format, const std::string& data_format,
...@@ -43,13 +43,10 @@ void DepthwiseConvGradKernel(const Context& dev_ctx, ...@@ -43,13 +43,10 @@ void DepthwiseConvGradKernel(const Context& dev_ctx,
out_grad, out_grad,
strides, strides,
paddings, paddings,
paddding_algorithm, padding_algorithm,
groups,
dilations, dilations,
groups,
data_format, data_format,
use_addto,
workspace_size_MB,
exhaustive_search,
input_grad, input_grad,
filter_grad); filter_grad);
} }
...@@ -61,7 +58,7 @@ void Conv3DGradKernel(const Context& dev_ctx, ...@@ -61,7 +58,7 @@ void Conv3DGradKernel(const Context& dev_ctx,
const DenseTensor& out_grad, const DenseTensor& out_grad,
const std::vector<int>& strides, const std::vector<int>& strides,
const std::vector<int>& paddings, const std::vector<int>& paddings,
const std::string& paddding_algorithm, const std::string& padding_algorithm,
int groups, int groups,
const std::vector<int>& dilations, const std::vector<int>& dilations,
const std::string& data_format, const std::string& data_format,
...@@ -76,17 +73,50 @@ void Conv3DGradKernel(const Context& dev_ctx, ...@@ -76,17 +73,50 @@ void Conv3DGradKernel(const Context& dev_ctx,
out_grad, out_grad,
strides, strides,
paddings, paddings,
paddding_algorithm, padding_algorithm,
groups,
dilations, dilations,
groups,
data_format, data_format,
use_addto,
workspace_size_MB,
exhaustive_search,
input_grad, input_grad,
filter_grad); filter_grad);
} }
template <typename T, typename Context>
void Conv3DGradGradKernel(const Context& ctx,
const DenseTensor& input,
const DenseTensor& filter,
const DenseTensor& out_grad,
const paddle::optional<DenseTensor>& input_grad_grad,
const paddle::optional<DenseTensor>& filter_grad_grad,
const std::vector<int>& strides,
const std::vector<int>& paddings_t,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations_t,
const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search_t,
DenseTensor* input_grad,
DenseTensor* filter_grad,
DenseTensor* out_grad_grad) {
ConvGradGradKernel<T>(ctx,
input,
filter,
out_grad,
input_grad_grad,
filter_grad_grad,
strides,
paddings_t,
padding_algorithm,
dilations_t,
groups,
data_format,
input_grad,
filter_grad,
out_grad_grad);
}
} // namespace phi } // namespace phi
PD_REGISTER_KERNEL( PD_REGISTER_KERNEL(
...@@ -101,3 +131,14 @@ PD_REGISTER_KERNEL(depthwise_conv2d_grad, ...@@ -101,3 +131,14 @@ PD_REGISTER_KERNEL(depthwise_conv2d_grad,
PD_REGISTER_KERNEL( PD_REGISTER_KERNEL(
conv3d_grad, CPU, ALL_LAYOUT, phi::Conv3DGradKernel, float, double) {} conv3d_grad, CPU, ALL_LAYOUT, phi::Conv3DGradKernel, float, double) {}
PD_REGISTER_KERNEL(
conv2d_grad_grad, CPU, ALL_LAYOUT, phi::ConvGradGradKernel, float, double) {
}
PD_REGISTER_KERNEL(conv3d_grad_grad,
CPU,
ALL_LAYOUT,
phi::Conv3DGradGradKernel,
float,
double) {}
...@@ -19,6 +19,30 @@ ...@@ -19,6 +19,30 @@
#include "paddle/phi/kernels/impl/conv_kernel_impl.h" #include "paddle/phi/kernels/impl/conv_kernel_impl.h"
namespace phi { namespace phi {
template <typename T, typename Context>
void ConvKernel(const Context& dev_ctx,
const DenseTensor& input,
const DenseTensor& filter,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string& padding_algorithm,
const std::vector<int>& dilations,
int groups,
const std::string& data_format,
DenseTensor* out) {
ConvKernelImpl<T>(dev_ctx,
input,
filter,
strides,
paddings,
padding_algorithm,
groups,
dilations,
data_format,
out);
}
template <typename T, typename Context> template <typename T, typename Context>
void DepthwiseConvKernel(const Context& dev_ctx, void DepthwiseConvKernel(const Context& dev_ctx,
const DenseTensor& input, const DenseTensor& input,
...@@ -34,19 +58,16 @@ void DepthwiseConvKernel(const Context& dev_ctx, ...@@ -34,19 +58,16 @@ void DepthwiseConvKernel(const Context& dev_ctx,
bool exhaustive_search, bool exhaustive_search,
bool fuse_relu, bool fuse_relu,
DenseTensor* out) { DenseTensor* out) {
ConvKernel<T>(dev_ctx, ConvKernelImpl<T>(dev_ctx,
input, input,
filter, filter,
strides, strides,
paddings, paddings,
padding_algorithm, padding_algorithm,
groups, groups,
dilations, dilations,
data_format, data_format,
use_addto, out);
workspace_size_MB,
exhaustive_search,
out);
} }
template <typename T, typename Context> template <typename T, typename Context>
...@@ -63,19 +84,16 @@ void Conv3DKernel(const Context& dev_ctx, ...@@ -63,19 +84,16 @@ void Conv3DKernel(const Context& dev_ctx,
int workspace_size_MB, int workspace_size_MB,
bool exhaustive_search, bool exhaustive_search,
DenseTensor* out) { DenseTensor* out) {
ConvKernel<T>(dev_ctx, ConvKernelImpl<T>(dev_ctx,
input, input,
filter, filter,
strides, strides,
paddings, paddings,
padding_algorithm, padding_algorithm,
groups, groups,
dilations, dilations,
data_format, data_format,
use_addto, out);
workspace_size_MB,
exhaustive_search,
out);
} }
} // namespace phi } // namespace phi
......
...@@ -12,6 +12,10 @@ ...@@ -12,6 +12,10 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#ifndef _USE_MATH_DEFINES
#define _USE_MATH_DEFINES // use M_2_SQRTPI on Windows
#endif
#include "paddle/phi/kernels/erfinv_grad_kernel.h" #include "paddle/phi/kernels/erfinv_grad_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/backends/cpu/cpu_context.h"
......
...@@ -12,10 +12,28 @@ ...@@ -12,10 +12,28 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#ifndef _USE_MATH_DEFINES
#define _USE_MATH_DEFINES // use M_2_SQRTPI on Windows
#endif
#include "paddle/phi/kernels/erfinv_kernel.h" #include "paddle/phi/kernels/erfinv_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/erfinv_kernel_impl.h" #include "paddle/phi/kernels/funcs/eigen/common.h"
namespace phi {
template <typename T, typename Context>
void ErfinvKernel(const Context& ctx, const DenseTensor& x, DenseTensor* out) {
ctx.template Alloc<T>(out);
auto eigen_in = EigenVector<T>::Flatten(x);
auto eigen_out = EigenVector<T>::Flatten(*out);
auto& place = *ctx.eigen_device();
constexpr T half = static_cast<T>(0.5);
constexpr T half_sqrt = static_cast<T>(M_SQRT1_2);
eigen_out.device(place) = (eigen_in * half + half).ndtri() * half_sqrt;
}
} // namespace phi
PD_REGISTER_KERNEL(erfinv, CPU, ALL_LAYOUT, phi::ErfinvKernel, float, double) {} PD_REGISTER_KERNEL(erfinv, CPU, ALL_LAYOUT, phi::ErfinvKernel, float, double) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/conv_grad_grad_kernel.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/conv_grad_grad_kernel_impl.h"
PD_REGISTER_KERNEL(
conv2d_grad_grad, GPU, ALL_LAYOUT, phi::ConvGradGradKernel, float, double) {
}
...@@ -27,7 +27,7 @@ void Conv3DGradKernel(const Context& dev_ctx, ...@@ -27,7 +27,7 @@ void Conv3DGradKernel(const Context& dev_ctx,
const DenseTensor& out_grad, const DenseTensor& out_grad,
const std::vector<int>& strides, const std::vector<int>& strides,
const std::vector<int>& paddings, const std::vector<int>& paddings,
const std::string& paddding_algorithm, const std::string& padding_algorithm,
int groups, int groups,
const std::vector<int>& dilations, const std::vector<int>& dilations,
const std::string& data_format, const std::string& data_format,
...@@ -42,13 +42,10 @@ void Conv3DGradKernel(const Context& dev_ctx, ...@@ -42,13 +42,10 @@ void Conv3DGradKernel(const Context& dev_ctx,
out_grad, out_grad,
strides, strides,
paddings, paddings,
paddding_algorithm, padding_algorithm,
groups,
dilations, dilations,
groups,
data_format, data_format,
use_addto,
workspace_size_MB,
exhaustive_search,
input_grad, input_grad,
filter_grad); filter_grad);
} }
...@@ -60,3 +57,7 @@ PD_REGISTER_KERNEL( ...@@ -60,3 +57,7 @@ PD_REGISTER_KERNEL(
PD_REGISTER_KERNEL( PD_REGISTER_KERNEL(
conv3d_grad, GPU, ALL_LAYOUT, phi::Conv3DGradKernel, float, double) {} conv3d_grad, GPU, ALL_LAYOUT, phi::Conv3DGradKernel, float, double) {}
PD_REGISTER_KERNEL(
conv2d_grad_grad, GPU, ALL_LAYOUT, phi::ConvGradGradKernel, float, double) {
}
...@@ -20,6 +20,29 @@ ...@@ -20,6 +20,29 @@
namespace phi { namespace phi {
template <typename T, typename Context>
void ConvKernel(const Context& dev_ctx,
const DenseTensor& input,
const DenseTensor& filter,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string& padding_algorithm,
const std::vector<int>& dilations,
int groups,
const std::string& data_format,
DenseTensor* out) {
ConvKernelImpl<T>(dev_ctx,
input,
filter,
strides,
paddings,
padding_algorithm,
groups,
dilations,
data_format,
out);
}
template <typename T, typename Context> template <typename T, typename Context>
void Conv3DKernel(const Context& dev_ctx, void Conv3DKernel(const Context& dev_ctx,
const DenseTensor& input, const DenseTensor& input,
...@@ -34,19 +57,16 @@ void Conv3DKernel(const Context& dev_ctx, ...@@ -34,19 +57,16 @@ void Conv3DKernel(const Context& dev_ctx,
int workspace_size_MB, int workspace_size_MB,
bool exhaustive_search, bool exhaustive_search,
DenseTensor* out) { DenseTensor* out) {
ConvKernel<T>(dev_ctx, ConvKernelImpl<T>(dev_ctx,
input, input,
filter, filter,
strides, strides,
paddings, paddings,
padding_algorithm, padding_algorithm,
groups, groups,
dilations, dilations,
data_format, data_format,
use_addto, out);
workspace_size_MB,
exhaustive_search,
out);
} }
} // namespace phi } // namespace phi
......
...@@ -12,6 +12,10 @@ ...@@ -12,6 +12,10 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#ifndef _USE_MATH_DEFINES
#define _USE_MATH_DEFINES // use M_2_SQRTPI on Windows
#endif
#include "paddle/phi/kernels/erfinv_grad_kernel.h" #include "paddle/phi/kernels/erfinv_grad_kernel.h"
#include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_context.h"
......
...@@ -42,18 +42,23 @@ void ConvCudnnKernel(const Context& ctx, ...@@ -42,18 +42,23 @@ void ConvCudnnKernel(const Context& ctx,
const std::vector<int>& strides, const std::vector<int>& strides,
const std::vector<int>& paddings_t, const std::vector<int>& paddings_t,
const std::string& padding_algorithm, const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations_t, const std::vector<int>& dilations_t,
int groups,
const std::string& data_format, const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search_t,
DenseTensor* output) { DenseTensor* output) {
ctx.template Alloc<T>(output); ctx.template Alloc<T>(output);
std::vector<int> paddings = paddings_t; std::vector<int> paddings = paddings_t;
std::vector<int> dilations = dilations_t; std::vector<int> dilations = dilations_t;
bool exhaustive_search = FLAGS_cudnn_exhaustive_search || exhaustive_search_t; bool has_exhaustive_search = ctx.HasDnnAttr("exhaustive_search");
VLOG(4) << "GPUContext contains `exhaustive_search`: "
<< has_exhaustive_search;
bool exhaustive_search_attr =
has_exhaustive_search
? PADDLE_GET_CONST(bool, ctx.GetDnnAttr("exhaustive_search"))
: false;
bool exhaustive_search =
FLAGS_cudnn_exhaustive_search || exhaustive_search_attr;
bool deterministic = FLAGS_cudnn_deterministic; bool deterministic = FLAGS_cudnn_deterministic;
PADDLE_ENFORCE_EQ(exhaustive_search && deterministic, PADDLE_ENFORCE_EQ(exhaustive_search && deterministic,
false, false,
...@@ -402,12 +407,9 @@ void Conv3DCudnnKernel(const Context& dev_ctx, ...@@ -402,12 +407,9 @@ void Conv3DCudnnKernel(const Context& dev_ctx,
strides, strides,
paddings, paddings,
padding_algorithm, padding_algorithm,
groups,
dilations, dilations,
groups,
data_format, data_format,
use_addto,
workspace_size_MB,
exhaustive_search,
out); out);
} }
...@@ -432,12 +434,9 @@ void DepthwiseConvCudnnKernel(const Context& dev_ctx, ...@@ -432,12 +434,9 @@ void DepthwiseConvCudnnKernel(const Context& dev_ctx,
strides, strides,
paddings, paddings,
padding_algorithm, padding_algorithm,
groups,
dilations, dilations,
groups,
data_format, data_format,
use_addto,
workspace_size_MB,
exhaustive_search,
out); out);
} }
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/operators/math/im2col.h"
#include "paddle/fluid/operators/math/vol2col.h"
#include "paddle/phi/kernels/conv_kernel.h"
#include "paddle/phi/kernels/cpu/conv_util.h"
#include "paddle/phi/kernels/funcs/batch_norm_utils.h"
#include "paddle/phi/kernels/funcs/blas/blas.h"
#include "paddle/phi/kernels/funcs/math_function.h"
namespace phi {
template <typename T, typename Context>
void ConvGradGradKernel(const Context& dev_ctx,
const DenseTensor& input,
const DenseTensor& filter,
const DenseTensor& out_grad,
const paddle::optional<DenseTensor>& input_grad_grad,
const paddle::optional<DenseTensor>& filter_grad_grad,
const std::vector<int>& strides_t,
const std::vector<int>& paddings_t,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations_t,
const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search,
DenseTensor* input_grad,
DenseTensor* filter_grad,
DenseTensor* out_grad_grad) {
const DenseTensor* X = &input;
const DenseTensor* dY = &out_grad;
const DenseTensor* ddX = input_grad_grad.get_ptr();
const DenseTensor* ddW_in = filter_grad_grad.get_ptr();
DenseTensor* ddY = out_grad_grad;
DenseTensor* dW = filter_grad;
DenseTensor* dX = input_grad;
DenseTensor W = filter;
if (!ddY && !dW && !dX) return;
const std::vector<int> strides = strides_t;
std::vector<int> paddings = paddings_t;
std::vector<int> dilations = dilations_t;
const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC");
// transform Tensor
DenseTensor transformed_X(X->type());
DenseTensor transformed_dY(dY->type());
DenseTensor transformed_ddX(X->type());
if (channel_last) {
ResizeToChannelFirst<Context, T>(dev_ctx, X, &transformed_X);
TransToChannelFirst<Context, T>(dev_ctx, X, &transformed_X);
ResizeToChannelFirst<Context, T>(dev_ctx, dY, &transformed_dY);
TransToChannelFirst<Context, T>(dev_ctx, dY, &transformed_dY);
if (ddX) {
ResizeToChannelFirst<Context, T>(dev_ctx, ddX, &transformed_ddX);
TransToChannelFirst<Context, T>(dev_ctx, ddX, &transformed_ddX);
}
} else {
transformed_X = *X;
transformed_dY = *dY;
if (ddX) {
transformed_ddX = *ddX;
}
}
// update padding and dilation
auto in_dims = transformed_X.dims();
auto filter_dims = W.dims();
DDim in_data_dims = slice_ddim(in_dims, 2, in_dims.size());
DDim filter_data_dims = slice_ddim(filter_dims, 2, filter_dims.size());
std::vector<int> ksize = vectorize<int>(filter_data_dims);
UpdatePaddingAndDilation(
&paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize);
const int batch_size = static_cast<int>(transformed_X.dims()[0]);
std::vector<int64_t> filter_shape_vec(vectorize(W.dims()));
std::vector<int64_t> output_shape_vec(vectorize(transformed_dY.dims()));
size_t data_dim = filter_shape_vec.size() - 2;
std::vector<int64_t> col_shape_vec(1 + 2 * data_dim);
// col_shape [in_channel/group, kh, kw, oh, ow]
col_shape_vec[0] = transformed_X.dims()[1] / groups;
for (size_t j = 0; j < data_dim; ++j) {
col_shape_vec[j + 1] = filter_shape_vec[j + 2];
col_shape_vec[j + data_dim + 1] = output_shape_vec[j + 2];
}
DDim col_shape(make_ddim(col_shape_vec));
// col_matrix_shape [in_channel/group * kh * kw, oh * ow]
DDim col_matrix_shape = flatten_to_2d(col_shape, data_dim + 1);
// input_shape [Cin, H, W]
DDim input_shape =
slice_ddim(transformed_X.dims(), 1, transformed_X.dims().size());
// filter_matrix_shape [Cout, Cin * kh * kw]
DDim filter_matrix_shape = {W.dims()[0], W.numel() / W.dims()[0]};
W.Resize(filter_matrix_shape);
DDim output_matrix_shape = {
transformed_dY.dims()[1],
transformed_dY.numel() /
(transformed_dY.dims()[0] * transformed_dY.dims()[1])};
int in_step = static_cast<int>(transformed_X.dims()[1]) / groups;
int out_step = static_cast<int>(transformed_dY.dims()[1]) / groups;
bool is_expand = IsExpand(filter_shape_vec, strides, paddings, dilations);
DenseTensor col;
DenseTensor col_matrix;
if (is_expand) {
col.Resize(col_shape);
dev_ctx.template Alloc<T>(&col);
col_matrix.ShareDataWith(col);
col_matrix.Resize(col_matrix_shape);
}
phi::funcs::SetConstant<Context, T> set_zero;
auto blas = phi::funcs::GetBlas<Context, T>(dev_ctx);
// dx convolution double grad: gemm + col2im(col2vol)
// dx = ddw * dy ==> dx(N, Cin, H, W), ddw(Cout, Cin, kh, kw), dy(N, Cout,
// oH, oW)
if (dX && ddW_in) {
Tensor ddW;
ddW.ShareDataWith(*ddW_in).Resize(filter_matrix_shape);
dev_ctx.template Alloc<T>(dX);
DenseTensor transformed_dX(dX->type());
if (channel_last) {
ResizeToChannelFirst<Context, T>(dev_ctx, dX, &transformed_dX);
} else {
transformed_dX = *dX;
}
// if is_expand is false, the operation of set_zero is unnecessary
// because math::matmul will reset dx
if (is_expand) {
set_zero(dev_ctx, &transformed_dX, static_cast<T>(0));
}
paddle::operators::math::Col2VolFunctor<Context, T> col2vol;
paddle::operators::math::
Col2ImFunctor<paddle::operators::math::ColFormat::kCFO, Context, T>
col2im;
for (int i = 0; i < batch_size; i++) {
DenseTensor dy_batch =
transformed_dY.Slice(i, i + 1).Resize(output_matrix_shape);
DenseTensor dx_batch = transformed_dX.Slice(i, i + 1).Resize(input_shape);
for (int g = 0; g < groups; g++) {
// gemm
DenseTensor dy_slice = dy_batch.Slice(g * out_step, (g + 1) * out_step);
DenseTensor ddw_slice = ddW.Slice(g * out_step, (g + 1) * out_step);
DenseTensor dx_slice = dx_batch.Slice(g * in_step, (g + 1) * in_step);
if (!is_expand) {
col_matrix.ShareDataWith(dx_slice);
col_matrix.Resize(col_matrix_shape);
}
blas.MatMul(
ddw_slice, true, dy_slice, false, T(1.0), &col_matrix, T(0.0));
if (is_expand && data_dim == 2U) {
col2im(dev_ctx,
col,
dilations,
strides,
std::vector<int>{
paddings[0], paddings[2], paddings[1], paddings[3]},
&dx_slice);
} else if (is_expand && data_dim == 3U) {
col2vol(dev_ctx, col, dilations, strides, paddings, &dx_slice);
}
}
}
if (channel_last) {
TransToChannelLast<Context, T>(dev_ctx, &transformed_dX, dX);
}
}
// dw = ddx * dy ==> dw(Cout, Cin, kh, kw), ddx(N, Cin, H, W), dy(N, Cout,
// oH, oW)
// dw convolution double grad: im2col(vol2col) + gemm
if (dW && ddX) {
dev_ctx.template Alloc<T>(dW);
set_zero(dev_ctx, dW, static_cast<T>(0));
DenseTensor dW_arr = *dW;
dW_arr.Resize(filter_matrix_shape);
paddle::operators::math::
Im2ColFunctor<paddle::operators::math::ColFormat::kCFO, Context, T>
im2col;
paddle::operators::math::Vol2ColFunctor<Context, T> vol2col;
for (int i = 0; i < batch_size; ++i) {
DenseTensor dy_batch =
transformed_dY.Slice(i, i + 1).Resize(output_matrix_shape);
Tensor ddx_batch = transformed_ddX.Slice(i, i + 1).Resize(input_shape);
for (int g = 0; g < groups; ++g) {
// im2col
DenseTensor dy_slice = dy_batch.Slice(g * out_step, (g + 1) * out_step);
DenseTensor ddx_slice = ddx_batch.Slice(g * in_step, (g + 1) * in_step);
if (!is_expand) {
col.ShareDataWith(ddx_slice);
col_matrix.ShareDataWith(col);
col_matrix.Resize(col_matrix_shape);
} else if (data_dim == 2U) {
im2col(dev_ctx,
ddx_slice,
dilations,
strides,
std::vector<int>{
paddings[0], paddings[2], paddings[1], paddings[3]},
&col);
} else if (data_dim == 3U) {
vol2col(dev_ctx, ddx_slice, dilations, strides, paddings, &col);
}
DenseTensor dw_slice = dW_arr.Slice(g * out_step, (g + 1) * out_step);
blas.MatMul(
dy_slice, false, col_matrix, true, T(1.0), &dw_slice, T(1.0));
}
}
}
// ddy = w * ddx + x * ddw ==> ddy(N, Cout, oH, oW), x/ddx(N, Cin, H, W),
// w/ddw(Cout, Cin, kh, kw)
// ddy convolution double grad: im2col(vol2col) + gemm
if (ddY) {
dev_ctx.template Alloc<T>(ddY);
DenseTensor transformed_ddY(ddY->type());
if (channel_last) {
ResizeToChannelFirst<Context, T>(dev_ctx, ddY, &transformed_ddY);
} else {
transformed_ddY = *ddY;
}
set_zero(dev_ctx, &transformed_ddY, static_cast<T>(0));
paddle::operators::math::
Im2ColFunctor<paddle::operators::math::ColFormat::kCFO, Context, T>
im2col;
paddle::operators::math::Vol2ColFunctor<Context, T> vol2col;
for (int i = 0; i < batch_size; ++i) {
DenseTensor ddy_batch =
transformed_ddY.Slice(i, i + 1).Resize(output_matrix_shape);
for (int g = 0; g < groups; ++g) {
// gemm
DenseTensor ddy_slice =
ddy_batch.Slice(g * out_step, (g + 1) * out_step);
if (ddX) {
DenseTensor ddx_batch =
transformed_ddX.Slice(i, i + 1).Resize(input_shape);
DenseTensor ddx_slice =
ddx_batch.Slice(g * in_step, (g + 1) * in_step);
if (!is_expand) {
col.ShareDataWith(ddx_slice);
col_matrix.ShareDataWith(col);
col_matrix.Resize(col_matrix_shape);
} else if (data_dim == 2U) {
im2col(dev_ctx,
ddx_slice,
dilations,
strides,
std::vector<int>{
paddings[0], paddings[2], paddings[1], paddings[3]},
&col);
} else if (data_dim == 3U) {
vol2col(dev_ctx, ddx_slice, dilations, strides, paddings, &col);
}
DenseTensor w_slice = W.Slice(g * out_step, (g + 1) * out_step);
blas.MatMul(
w_slice, false, col_matrix, false, T(1.0), &ddy_slice, T(0.0));
}
if (ddW_in) {
DenseTensor x_batch =
transformed_X.Slice(i, i + 1).Resize(input_shape);
DenseTensor x_slice = x_batch.Slice(g * in_step, (g + 1) * in_step);
DenseTensor ddW;
ddW.ShareDataWith(*ddW_in).Resize(filter_matrix_shape);
if (!is_expand) {
col.ShareDataWith(x_slice);
col_matrix.ShareDataWith(col);
col_matrix.Resize(col_matrix_shape);
} else if (data_dim == 2U) {
im2col(dev_ctx,
x_slice,
dilations,
strides,
std::vector<int>{
paddings[0], paddings[2], paddings[1], paddings[3]},
&col);
} else if (data_dim == 3U) {
vol2col(dev_ctx, x_slice, dilations, strides, paddings, &col);
}
// gemm
DenseTensor ddw_slice = ddW.Slice(g * out_step, (g + 1) * out_step);
blas.MatMul(
ddw_slice, false, col_matrix, false, T(1.0), &ddy_slice, T(1.0));
}
}
}
if (channel_last) {
TransToChannelLast<Context, T>(dev_ctx, &transformed_ddY, ddY);
}
}
}
} // namespace phi
...@@ -16,7 +16,6 @@ ...@@ -16,7 +16,6 @@
#include "paddle/fluid/operators/math/im2col.h" #include "paddle/fluid/operators/math/im2col.h"
#include "paddle/fluid/operators/math/vol2col.h" #include "paddle/fluid/operators/math/vol2col.h"
#include "paddle/phi/kernels/conv_grad_kernel.h"
#include "paddle/phi/kernels/cpu/conv_util.h" #include "paddle/phi/kernels/cpu/conv_util.h"
#include "paddle/phi/kernels/funcs/batch_norm_utils.h" #include "paddle/phi/kernels/funcs/batch_norm_utils.h"
#include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/blas/blas.h"
...@@ -32,12 +31,9 @@ void ConvGradKernel(const Context& dev_ctx, ...@@ -32,12 +31,9 @@ void ConvGradKernel(const Context& dev_ctx,
const std::vector<int>& strides, const std::vector<int>& strides,
const std::vector<int>& paddings_t, const std::vector<int>& paddings_t,
const std::string& padding_algorithm, const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations_t, const std::vector<int>& dilations_t,
int groups,
const std::string& data_format, const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search,
DenseTensor* input_grad, DenseTensor* input_grad,
DenseTensor* filter_grad) { DenseTensor* filter_grad) {
// The filter and filter_grad will be reshaped in the calculations, // The filter and filter_grad will be reshaped in the calculations,
...@@ -254,4 +250,304 @@ void ConvGradKernel(const Context& dev_ctx, ...@@ -254,4 +250,304 @@ void ConvGradKernel(const Context& dev_ctx,
} }
} }
template <typename T, typename Context>
void ConvGradGradKernel(const Context& dev_ctx,
const DenseTensor& input,
const DenseTensor& filter,
const DenseTensor& out_grad,
const paddle::optional<DenseTensor>& input_grad_grad,
const paddle::optional<DenseTensor>& filter_grad_grad,
const std::vector<int>& strides_t,
const std::vector<int>& paddings_t,
const std::string& padding_algorithm,
const std::vector<int>& dilations_t,
int groups,
const std::string& data_format,
DenseTensor* input_grad,
DenseTensor* filter_grad,
DenseTensor* out_grad_grad) {
const DenseTensor* X = &input;
const DenseTensor* dY = &out_grad;
const DenseTensor* ddX = input_grad_grad.get_ptr();
const DenseTensor* ddW_in = filter_grad_grad.get_ptr();
DenseTensor* ddY = out_grad_grad;
DenseTensor* dW = filter_grad;
DenseTensor* dX = input_grad;
DenseTensor W = filter;
if (!ddY && !dW && !dX) return;
const std::vector<int> strides = strides_t;
std::vector<int> paddings = paddings_t;
std::vector<int> dilations = dilations_t;
const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC");
// transform Tensor
DenseTensor transformed_X(X->type());
DenseTensor transformed_dY(dY->type());
DenseTensor transformed_ddX(X->type());
if (channel_last) {
ResizeToChannelFirst<Context, T>(dev_ctx, X, &transformed_X);
TransToChannelFirst<Context, T>(dev_ctx, X, &transformed_X);
ResizeToChannelFirst<Context, T>(dev_ctx, dY, &transformed_dY);
TransToChannelFirst<Context, T>(dev_ctx, dY, &transformed_dY);
if (ddX) {
ResizeToChannelFirst<Context, T>(dev_ctx, ddX, &transformed_ddX);
TransToChannelFirst<Context, T>(dev_ctx, ddX, &transformed_ddX);
}
} else {
transformed_X = *X;
transformed_dY = *dY;
if (ddX) {
transformed_ddX = *ddX;
}
}
// update padding and dilation
auto in_dims = transformed_X.dims();
auto filter_dims = W.dims();
DDim in_data_dims = slice_ddim(in_dims, 2, in_dims.size());
DDim filter_data_dims = slice_ddim(filter_dims, 2, filter_dims.size());
std::vector<int> ksize = vectorize<int>(filter_data_dims);
UpdatePaddingAndDilation(
&paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize);
const int batch_size = static_cast<int>(transformed_X.dims()[0]);
std::vector<int64_t> filter_shape_vec(vectorize(W.dims()));
std::vector<int64_t> output_shape_vec(vectorize(transformed_dY.dims()));
size_t data_dim = filter_shape_vec.size() - 2;
std::vector<int64_t> col_shape_vec(1 + 2 * data_dim);
// col_shape [in_channel/group, kh, kw, oh, ow]
col_shape_vec[0] = transformed_X.dims()[1] / groups;
for (size_t j = 0; j < data_dim; ++j) {
col_shape_vec[j + 1] = filter_shape_vec[j + 2];
col_shape_vec[j + data_dim + 1] = output_shape_vec[j + 2];
}
DDim col_shape(make_ddim(col_shape_vec));
// col_matrix_shape [in_channel/group * kh * kw, oh * ow]
DDim col_matrix_shape = flatten_to_2d(col_shape, data_dim + 1);
// input_shape [Cin, H, W]
DDim input_shape =
slice_ddim(transformed_X.dims(), 1, transformed_X.dims().size());
// filter_matrix_shape [Cout, Cin * kh * kw]
DDim filter_matrix_shape = {W.dims()[0], W.numel() / W.dims()[0]};
W.Resize(filter_matrix_shape);
DDim output_matrix_shape = {
transformed_dY.dims()[1],
transformed_dY.numel() /
(transformed_dY.dims()[0] * transformed_dY.dims()[1])};
int in_step = static_cast<int>(transformed_X.dims()[1]) / groups;
int out_step = static_cast<int>(transformed_dY.dims()[1]) / groups;
bool is_expand = IsExpand(filter_shape_vec, strides, paddings, dilations);
DenseTensor col;
DenseTensor col_matrix;
if (is_expand) {
col.Resize(col_shape);
dev_ctx.template Alloc<T>(&col);
col_matrix.ShareDataWith(col);
col_matrix.Resize(col_matrix_shape);
}
phi::funcs::SetConstant<Context, T> set_zero;
auto blas = phi::funcs::GetBlas<Context, T>(dev_ctx);
// dx convolution double grad: gemm + col2im(col2vol)
// dx = ddw * dy ==> dx(N, Cin, H, W), ddw(Cout, Cin, kh, kw), dy(N, Cout,
// oH, oW)
if (dX && ddW_in) {
Tensor ddW;
ddW.ShareDataWith(*ddW_in).Resize(filter_matrix_shape);
dev_ctx.template Alloc<T>(dX);
DenseTensor transformed_dX(dX->type());
if (channel_last) {
ResizeToChannelFirst<Context, T>(dev_ctx, dX, &transformed_dX);
} else {
transformed_dX = *dX;
}
// if is_expand is false, the operation of set_zero is unnecessary
// because math::matmul will reset dx
if (is_expand) {
set_zero(dev_ctx, &transformed_dX, static_cast<T>(0));
}
paddle::operators::math::Col2VolFunctor<Context, T> col2vol;
paddle::operators::math::
Col2ImFunctor<paddle::operators::math::ColFormat::kCFO, Context, T>
col2im;
for (int i = 0; i < batch_size; i++) {
DenseTensor dy_batch =
transformed_dY.Slice(i, i + 1).Resize(output_matrix_shape);
DenseTensor dx_batch = transformed_dX.Slice(i, i + 1).Resize(input_shape);
for (int g = 0; g < groups; g++) {
// gemm
DenseTensor dy_slice = dy_batch.Slice(g * out_step, (g + 1) * out_step);
DenseTensor ddw_slice = ddW.Slice(g * out_step, (g + 1) * out_step);
DenseTensor dx_slice = dx_batch.Slice(g * in_step, (g + 1) * in_step);
if (!is_expand) {
col_matrix.ShareDataWith(dx_slice);
col_matrix.Resize(col_matrix_shape);
}
blas.MatMul(
ddw_slice, true, dy_slice, false, T(1.0), &col_matrix, T(0.0));
if (is_expand && data_dim == 2U) {
col2im(dev_ctx,
col,
dilations,
strides,
std::vector<int>{
paddings[0], paddings[2], paddings[1], paddings[3]},
&dx_slice);
} else if (is_expand && data_dim == 3U) {
col2vol(dev_ctx, col, dilations, strides, paddings, &dx_slice);
}
}
}
if (channel_last) {
TransToChannelLast<Context, T>(dev_ctx, &transformed_dX, dX);
}
}
// dw = ddx * dy ==> dw(Cout, Cin, kh, kw), ddx(N, Cin, H, W), dy(N, Cout,
// oH, oW)
// dw convolution double grad: im2col(vol2col) + gemm
if (dW && ddX) {
dev_ctx.template Alloc<T>(dW);
set_zero(dev_ctx, dW, static_cast<T>(0));
DenseTensor dW_arr = *dW;
dW_arr.Resize(filter_matrix_shape);
paddle::operators::math::
Im2ColFunctor<paddle::operators::math::ColFormat::kCFO, Context, T>
im2col;
paddle::operators::math::Vol2ColFunctor<Context, T> vol2col;
for (int i = 0; i < batch_size; ++i) {
DenseTensor dy_batch =
transformed_dY.Slice(i, i + 1).Resize(output_matrix_shape);
Tensor ddx_batch = transformed_ddX.Slice(i, i + 1).Resize(input_shape);
for (int g = 0; g < groups; ++g) {
// im2col
DenseTensor dy_slice = dy_batch.Slice(g * out_step, (g + 1) * out_step);
DenseTensor ddx_slice = ddx_batch.Slice(g * in_step, (g + 1) * in_step);
if (!is_expand) {
col.ShareDataWith(ddx_slice);
col_matrix.ShareDataWith(col);
col_matrix.Resize(col_matrix_shape);
} else if (data_dim == 2U) {
im2col(dev_ctx,
ddx_slice,
dilations,
strides,
std::vector<int>{
paddings[0], paddings[2], paddings[1], paddings[3]},
&col);
} else if (data_dim == 3U) {
vol2col(dev_ctx, ddx_slice, dilations, strides, paddings, &col);
}
DenseTensor dw_slice = dW_arr.Slice(g * out_step, (g + 1) * out_step);
blas.MatMul(
dy_slice, false, col_matrix, true, T(1.0), &dw_slice, T(1.0));
}
}
}
// ddy = w * ddx + x * ddw ==> ddy(N, Cout, oH, oW), x/ddx(N, Cin, H, W),
// w/ddw(Cout, Cin, kh, kw)
// ddy convolution double grad: im2col(vol2col) + gemm
if (ddY) {
dev_ctx.template Alloc<T>(ddY);
DenseTensor transformed_ddY(ddY->type());
if (channel_last) {
ResizeToChannelFirst<Context, T>(dev_ctx, ddY, &transformed_ddY);
} else {
transformed_ddY = *ddY;
}
set_zero(dev_ctx, &transformed_ddY, static_cast<T>(0));
paddle::operators::math::
Im2ColFunctor<paddle::operators::math::ColFormat::kCFO, Context, T>
im2col;
paddle::operators::math::Vol2ColFunctor<Context, T> vol2col;
for (int i = 0; i < batch_size; ++i) {
DenseTensor ddy_batch =
transformed_ddY.Slice(i, i + 1).Resize(output_matrix_shape);
for (int g = 0; g < groups; ++g) {
// gemm
DenseTensor ddy_slice =
ddy_batch.Slice(g * out_step, (g + 1) * out_step);
if (ddX) {
DenseTensor ddx_batch =
transformed_ddX.Slice(i, i + 1).Resize(input_shape);
DenseTensor ddx_slice =
ddx_batch.Slice(g * in_step, (g + 1) * in_step);
if (!is_expand) {
col.ShareDataWith(ddx_slice);
col_matrix.ShareDataWith(col);
col_matrix.Resize(col_matrix_shape);
} else if (data_dim == 2U) {
im2col(dev_ctx,
ddx_slice,
dilations,
strides,
std::vector<int>{
paddings[0], paddings[2], paddings[1], paddings[3]},
&col);
} else if (data_dim == 3U) {
vol2col(dev_ctx, ddx_slice, dilations, strides, paddings, &col);
}
DenseTensor w_slice = W.Slice(g * out_step, (g + 1) * out_step);
blas.MatMul(
w_slice, false, col_matrix, false, T(1.0), &ddy_slice, T(0.0));
}
if (ddW_in) {
DenseTensor x_batch =
transformed_X.Slice(i, i + 1).Resize(input_shape);
DenseTensor x_slice = x_batch.Slice(g * in_step, (g + 1) * in_step);
DenseTensor ddW;
ddW.ShareDataWith(*ddW_in).Resize(filter_matrix_shape);
if (!is_expand) {
col.ShareDataWith(x_slice);
col_matrix.ShareDataWith(col);
col_matrix.Resize(col_matrix_shape);
} else if (data_dim == 2U) {
im2col(dev_ctx,
x_slice,
dilations,
strides,
std::vector<int>{
paddings[0], paddings[2], paddings[1], paddings[3]},
&col);
} else if (data_dim == 3U) {
vol2col(dev_ctx, x_slice, dilations, strides, paddings, &col);
}
// gemm
DenseTensor ddw_slice = ddW.Slice(g * out_step, (g + 1) * out_step);
blas.MatMul(
ddw_slice, false, col_matrix, false, T(1.0), &ddy_slice, T(1.0));
}
}
}
if (channel_last) {
TransToChannelLast<Context, T>(dev_ctx, &transformed_ddY, ddY);
}
}
}
} // namespace phi } // namespace phi
...@@ -13,9 +13,6 @@ ...@@ -13,9 +13,6 @@
// limitations under the License. // limitations under the License.
#pragma once #pragma once
#ifndef _USE_MATH_DEFINES
#define _USE_MATH_DEFINES // use M_2_SQRTPI on Windows
#endif
#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/eigen/common.h"
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
...@@ -289,12 +289,9 @@ class Conv2D(layers.Layer): ...@@ -289,12 +289,9 @@ class Conv2D(layers.Layer):
self._stride, self._stride,
self._padding, self._padding,
"EXPLICIT", "EXPLICIT",
self._groups if self._groups else 1,
self._dilation, self._dilation,
self._groups if self._groups else 1,
"NCHW", "NCHW",
False,
-1,
False,
) )
if self.bias is not None: if self.bias is not None:
pre_act = F.elementwise_add(pre_bias, self.bias, axis=1) pre_act = F.elementwise_add(pre_bias, self.bias, axis=1)
......
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册