未验证 提交 c923e6c9 编写于 作者: C Chen Weihang 提交者: GitHub

Adapting device-specific Extra Attributes for the PHI kernel (#46342)

* add extra attr property set

* add type_info for all context

* add onednn context to all context

* fix context compile error

* simplify conv kernel args

* pass runtime attr into dev_ctx

* fix marco error

* clear conv_grad_kernel extra args

* merge conv_grad_grad into conv_grad

* clear conv2d_grad_grad extra attrs

* clear yaml and eager extra attr

* fix conv1d error

* change to thread local

* fix npu compile failed

* try to fix windows compile failed

* add conv2d onednn phi kernel

* fix ci bugs (#36)

* fix compile bugs (#38)

* fix extra input transform bug (#39)

* support dynamic created attr (#40)

* reset extra info gen code

* rm conv_grad_grad kernel

* reimpl pass attr adapting

* add int attr support

* remove vector inputnames creating

* fix map at error

* Update paddle/phi/kernels/onednn/conv_grad_kernel.cc
Co-authored-by: NSławomir Siwek <slawomir.siwek@intel.com>

* remove useless extra attrs

* replace mkldnn_engine by onednn_engine
Co-authored-by: NYuanRisheng <yuanrisheng@baidu.com>
Co-authored-by: NSławomir Siwek <slawomir.siwek@intel.com>
上级 f82d7e3c
......@@ -24,10 +24,7 @@ paddle::experimental::Tensor conv2d_ad_func(
const paddle::experimental::Tensor& filter,
std::vector<int> strides,
std::vector<int> paddings,
std::string paddding_algorithm,
int groups,
std::string padding_algorithm,
std::vector<int> dilations,
std::string data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search);
int groups,
std::string data_format);
......@@ -29,13 +29,10 @@ paddle::experimental::Tensor conv2d_ad_func(
const paddle::experimental::Tensor& filter,
std::vector<int> strides,
std::vector<int> paddings,
std::string paddding_algorithm,
int groups,
std::string padding_algorithm,
std::vector<int> dilations,
std::string data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search) {
int groups,
std::string data_format) {
// Dygraph Record Event
paddle::platform::RecordEvent dygraph_entrance_record_event(
"conv2d dygraph", paddle::platform::TracerEventType::Operator, 1);
......@@ -64,13 +61,10 @@ paddle::experimental::Tensor conv2d_ad_func(
new_filter,
strides,
paddings,
paddding_algorithm,
groups,
padding_algorithm,
dilations,
data_format,
use_addto,
workspace_size_MB,
exhaustive_search);
groups,
data_format);
}
}
......@@ -92,13 +86,10 @@ paddle::experimental::Tensor conv2d_ad_func(
filter,
strides,
paddings,
paddding_algorithm,
groups,
padding_algorithm,
dilations,
data_format,
use_addto,
workspace_size_MB,
exhaustive_search);
groups,
data_format);
transformer->SetOutTensorLayout(&out);
if (need_tune) {
egr::Controller::Instance().EnableLayoutAutoTune();
......@@ -119,13 +110,10 @@ paddle::experimental::Tensor conv2d_ad_func(
filter,
strides,
paddings,
paddding_algorithm,
groups,
padding_algorithm,
dilations,
data_format,
use_addto,
workspace_size_MB,
exhaustive_search);
groups,
data_format);
// Check NaN and Inf if needed
if (FLAGS_check_nan_inf) {
egr::CheckTensorHasNanOrInf("conv2d", api_result);
......@@ -157,13 +145,10 @@ paddle::experimental::Tensor conv2d_ad_func(
// SetAttributes if needed
grad_node->SetAttributestrides(strides);
grad_node->SetAttributepaddings(paddings);
grad_node->SetAttributepaddding_algorithm(paddding_algorithm);
grad_node->SetAttributepadding_algorithm(padding_algorithm);
grad_node->SetAttributegroups(groups);
grad_node->SetAttributedilations(dilations);
grad_node->SetAttributedata_format(data_format);
grad_node->SetAttributeuse_addto(use_addto);
grad_node->SetAttributeworkspace_size_MB(workspace_size_MB);
grad_node->SetAttributeexhaustive_search(exhaustive_search);
// Set TensorWrappers for Forward Inputs if needed
grad_node->SetTensorWrapperinput(input);
grad_node->SetTensorWrapperfilter(filter);
......
......@@ -46,13 +46,10 @@ Conv2dGradNodeFinal::operator()(
auto& grad_out = hooked_grads[0][0];
auto& strides = this->strides_;
auto& paddings = this->paddings_;
auto& paddding_algorithm = this->paddding_algorithm_;
auto& padding_algorithm = this->padding_algorithm_;
auto& groups = this->groups_;
auto& dilations = this->dilations_;
auto& data_format = this->data_format_;
auto& use_addto = this->use_addto_;
auto& workspace_size_MB = this->workspace_size_MB_;
auto& exhaustive_search = this->exhaustive_search_;
// Prepare Grad function call
const auto& out_metas = OutputMeta();
......@@ -87,13 +84,10 @@ Conv2dGradNodeFinal::operator()(
grad_out,
strides,
paddings,
paddding_algorithm,
groups,
padding_algorithm,
dilations,
groups,
data_format,
use_addto,
workspace_size_MB,
exhaustive_search,
api_output_0,
api_output_1);
// Check NaN and Inf id needed
......@@ -134,13 +128,10 @@ Conv2dGradNodeFinal::operator()(
// SetAttributes if needed
grad_node->SetAttributestrides(strides);
grad_node->SetAttributepaddings(paddings);
grad_node->SetAttributepaddding_algorithm(paddding_algorithm);
grad_node->SetAttributepadding_algorithm(padding_algorithm);
grad_node->SetAttributegroups(groups);
grad_node->SetAttributedilations(dilations);
grad_node->SetAttributedata_format(data_format);
grad_node->SetAttributeuse_addto(use_addto);
grad_node->SetAttributeworkspace_size_MB(workspace_size_MB);
grad_node->SetAttributeexhaustive_search(exhaustive_search);
// Set TensorWrappers for Forward Inputs if needed
grad_node->SetTensorWrapperinput(input);
grad_node->SetTensorWrapperfilter(filter);
......@@ -215,13 +206,10 @@ Conv2dDoubleGradNodeFinal::operator()(
auto& strides = this->strides_;
auto& paddings = this->paddings_;
auto& paddding_algorithm = this->paddding_algorithm_;
auto& padding_algorithm = this->padding_algorithm_;
auto& groups = this->groups_;
auto& dilations = this->dilations_;
auto& data_format = this->data_format_;
auto& use_addto = this->use_addto_;
auto& workspace_size_MB = this->workspace_size_MB_;
auto& exhaustive_search = this->exhaustive_search_;
// Prepare Grad function call
const auto& out_metas = OutputMeta();
......@@ -261,13 +249,10 @@ Conv2dDoubleGradNodeFinal::operator()(
grad_filter_grad_optional,
strides,
paddings,
paddding_algorithm,
groups,
padding_algorithm,
dilations,
groups,
data_format,
use_addto,
workspace_size_MB,
exhaustive_search,
api_output_0,
api_output_1,
api_output_2);
......
......@@ -63,8 +63,8 @@ class Conv2dGradNodeFinal : public egr::GradNodeBase {
void SetAttributepaddings(const std::vector<int>& paddings) {
paddings_ = paddings;
}
void SetAttributepaddding_algorithm(const std::string& paddding_algorithm) {
paddding_algorithm_ = paddding_algorithm;
void SetAttributepadding_algorithm(const std::string& padding_algorithm) {
padding_algorithm_ = padding_algorithm;
}
void SetAttributegroups(const int& groups) { groups_ = groups; }
void SetAttributedilations(const std::vector<int>& dilations) {
......@@ -73,13 +73,6 @@ class Conv2dGradNodeFinal : public egr::GradNodeBase {
void SetAttributedata_format(const std::string& data_format) {
data_format_ = data_format;
}
void SetAttributeuse_addto(const bool& use_addto) { use_addto_ = use_addto; }
void SetAttributeworkspace_size_MB(const int& workspace_size_MB) {
workspace_size_MB_ = workspace_size_MB;
}
void SetAttributeexhaustive_search(const bool& exhaustive_search) {
exhaustive_search_ = exhaustive_search;
}
private:
// TensorWrappers
......@@ -89,13 +82,10 @@ class Conv2dGradNodeFinal : public egr::GradNodeBase {
// Attributes
std::vector<int> strides_;
std::vector<int> paddings_;
std::string paddding_algorithm_;
std::string padding_algorithm_;
int groups_;
std::vector<int> dilations_;
std::string data_format_;
bool use_addto_;
int workspace_size_MB_;
bool exhaustive_search_;
};
class Conv2dDoubleGradNodeFinal : public egr::GradNodeBase {
......@@ -146,8 +136,8 @@ class Conv2dDoubleGradNodeFinal : public egr::GradNodeBase {
void SetAttributepaddings(const std::vector<int>& paddings) {
paddings_ = paddings;
}
void SetAttributepaddding_algorithm(const std::string& paddding_algorithm) {
paddding_algorithm_ = paddding_algorithm;
void SetAttributepadding_algorithm(const std::string& padding_algorithm) {
padding_algorithm_ = padding_algorithm;
}
void SetAttributegroups(const int& groups) { groups_ = groups; }
void SetAttributedilations(const std::vector<int>& dilations) {
......@@ -156,13 +146,6 @@ class Conv2dDoubleGradNodeFinal : public egr::GradNodeBase {
void SetAttributedata_format(const std::string& data_format) {
data_format_ = data_format;
}
void SetAttributeuse_addto(const bool& use_addto) { use_addto_ = use_addto; }
void SetAttributeworkspace_size_MB(const int& workspace_size_MB) {
workspace_size_MB_ = workspace_size_MB;
}
void SetAttributeexhaustive_search(const bool& exhaustive_search) {
exhaustive_search_ = exhaustive_search;
}
private:
// TensorWrappers
......@@ -173,13 +156,10 @@ class Conv2dDoubleGradNodeFinal : public egr::GradNodeBase {
// Attributes
std::vector<int> strides_;
std::vector<int> paddings_;
std::string paddding_algorithm_;
std::string padding_algorithm_;
int groups_;
std::vector<int> dilations_;
std::string data_format_;
bool use_addto_;
int workspace_size_MB_;
bool exhaustive_search_;
};
class AddNGradNodeFinal : public egr::GradNodeBase {
......
......@@ -32,8 +32,8 @@
#include <valarray>
#include <vector>
#include "paddle/fluid/framework/expect.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/phi/core/expect.h"
namespace paddle {
namespace framework {
......
......@@ -30,7 +30,7 @@
#include <utility>
#include <vector>
#include "paddle/fluid/framework/expect.h"
#include "paddle/phi/core/expect.h"
namespace paddle {
namespace framework {
......
......@@ -28,6 +28,7 @@ limitations under the License. */
#include "paddle/fluid/framework/unused_var_check.h"
#include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/operators/isfinite_op.h"
#include "paddle/fluid/operators/ops_extra_info.h"
#include "paddle/fluid/platform/device/device_wrapper.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/profiler.h"
......@@ -2269,7 +2270,8 @@ Scope* OperatorWithKernel::PrepareData(
}
std::unique_ptr<OpKernelType> new_expected_kernel_key = nullptr;
if (run_phi_kernel_ && in_def->backend != phi::Backend::ALL_BACKEND) {
if (run_phi_kernel_ && in_def != nullptr &&
in_def->backend != phi::Backend::ALL_BACKEND) {
auto tensor_backend = phi::TransToPhiBackend(tensor_in->place());
if ((in_def->backend != tensor_backend &&
(in_def->backend != phi::Backend::GPUDNN ||
......@@ -2388,7 +2390,6 @@ Scope* OperatorWithKernel::PrepareData(
input_names.size(),
input_defs.size()));
for (size_t i = 0; i < input_defs.size(); ++i) {
const auto& input_defs = phi_kernel_->args_def().input_defs();
auto& in_def = input_defs.at(i);
std::string input_name = input_names[i];
auto iter = ctx->inputs.find(input_name);
......@@ -2400,6 +2401,22 @@ Scope* OperatorWithKernel::PrepareData(
no_buffer_ins && no_buffer_ins->count(input_name) > 0;
prepare_input_data(input_name, &ins_vector, &in_def, should_skip_input);
}
#ifdef PADDLE_WITH_MKLDNN
// For input that is Extra, only MKLDNN will use Extra Inputs
auto& extra_input_names =
paddle::operators::ExtraInfoUtils::Instance().GetExtraInputNamesMap(
Type());
for (const auto& input_name : extra_input_names) {
auto iter = ctx->inputs.find(input_name);
if (iter == ctx->inputs.end()) {
continue;
}
bool should_skip_input =
no_buffer_ins && no_buffer_ins->count(input_name) > 0;
std::vector<Variable*>& input_vars = iter->second;
prepare_input_data(input_name, &input_vars, nullptr, should_skip_input);
}
#endif
} else {
for (auto& var_name_item : Inputs()) {
bool should_skip_input =
......@@ -2699,6 +2716,65 @@ phi::KernelSignature OperatorWithKernel::GetExpectedPhiKernelArgs(
return (*arg_map_fn_)(arg_mapping_ctx);
}
static void SetDnnAttrIntoDeviceContext(
phi::DeviceContext* dev_ctx,
const Attribute& attr,
const std::string& attr_name,
const operators::ExtraAttrPropertySet& attr_propertys) {
#ifdef PADDLE_WITH_MKLDNN
if (phi::OneDNNContext::classof(dev_ctx) &&
attr_propertys.Support(operators::ExtraAttrProperty::ONEDNN)) {
VLOG(4) << "Runtime attr `" << attr_name << "` is passed to OneDNNContext.";
phi::OneDNNContext* one_dnn_ctx = static_cast<phi::OneDNNContext*>(dev_ctx);
switch (AttrTypeID(attr)) {
case proto::AttrType::FLOAT:
one_dnn_ctx->SetDnnAttr(attr_name, PADDLE_GET_CONST(float, attr));
break;
case proto::AttrType::INT:
one_dnn_ctx->SetDnnAttr(attr_name, PADDLE_GET_CONST(int, attr));
break;
case proto::AttrType::STRING:
one_dnn_ctx->SetDnnAttr(attr_name, PADDLE_GET_CONST(std::string, attr));
break;
case proto::AttrType::INTS:
one_dnn_ctx->SetDnnAttr(attr_name,
PADDLE_GET_CONST(std::vector<int>, attr));
break;
case proto::AttrType::FLOATS:
one_dnn_ctx->SetDnnAttr(attr_name,
PADDLE_GET_CONST(std::vector<float>, attr));
break;
case proto::AttrType::BOOLEAN:
one_dnn_ctx->SetDnnAttr(attr_name, PADDLE_GET_CONST(bool, attr));
break;
default:
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported Attribute value type `%s` for phi.",
platform::demangle(attr.type().name())));
}
}
#endif
#ifdef PADDLE_WITH_CUDA
if (phi::GPUContext::classof(dev_ctx) &&
attr_propertys.Support(operators::ExtraAttrProperty::GPUDNN)) {
VLOG(4) << "Runtime attr `" << attr_name << "` is passed to GPUDNNContext.";
phi::GPUContext* gpu_dnn_ctx = static_cast<phi::GPUContext*>(dev_ctx);
switch (AttrTypeID(attr)) {
case proto::AttrType::INT:
gpu_dnn_ctx->SetDnnAttr(attr_name, PADDLE_GET_CONST(int, attr));
break;
case proto::AttrType::BOOLEAN:
gpu_dnn_ctx->SetDnnAttr(attr_name, PADDLE_GET_CONST(bool, attr));
break;
default:
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported Attribute value type `%s` for phi.",
platform::demangle(attr.type().name())));
}
}
#endif
}
void OperatorWithKernel::BuildPhiKernelContext(
const RuntimeContext& ctx,
platform::DeviceContext* dev_ctx,
......@@ -2713,6 +2789,15 @@ void OperatorWithKernel::BuildPhiKernelContext(
auto attr_defs = phi_kernel_->args_def().attribute_defs();
auto output_defs = phi_kernel_->args_def().output_defs();
#if defined(PADDLE_WITH_MKLDNN)
if (phi::OneDNNContext::classof(dev_ctx)) {
// Onednn holds this op's variable's name and init them here.
phi::OneDNNContext* one_dnn_ctx = static_cast<phi::OneDNNContext*>(dev_ctx);
one_dnn_ctx->SetInputsName(Inputs());
one_dnn_ctx->SetOutputsName(Outputs());
}
#endif
PADDLE_ENFORCE_EQ(input_names.size(),
input_defs.size(),
platform::errors::InvalidArgument(
......@@ -2992,6 +3077,7 @@ void OperatorWithKernel::BuildPhiKernelContext(
} break;
default: {
if (attr_iter == Attrs().end()) {
// TODO(chenweihang): remove this backup searching later
attr_iter = RuntimeAttrs().find(attr_names[i]);
PADDLE_ENFORCE_NE(attr_iter,
RuntimeAttrs().end(),
......@@ -3075,6 +3161,63 @@ void OperatorWithKernel::BuildPhiKernelContext(
}
}
VLOG(4) << "Done attributes";
// For compatible with Op with extra attrs for specific backend
#if defined(PADDLE_WITH_MKLDNN) || defined(PADDLE_WITH_CUDA)
auto& runtime_attrs = RuntimeAttrs();
for (const auto& attr_iter : runtime_attrs) {
auto& attr_name = attr_iter.first;
auto& attr = attr_iter.second;
auto attr_propertys = paddle::operators::GetExtraAttrPropertys(attr_name);
SetDnnAttrIntoDeviceContext(dev_ctx, attr, attr_name, attr_propertys);
}
// TODO(chenweihang): Since the pass will still `SetAttr` in the OpDesc,
// we try to add these Attrs to the RuntimeAttrs, but these OpDesc will lose
// the RuntimeAttrs information in the process of converting the Graph to
// the Program, so additional record configuration will be introduced,
// which increases the The cost of development and understanding, so we
// still use Attrs to get and the attributes set by these passes from Attrs
// for the time being. In the future, it is necessary to clarify the
// positioning of RuntimeAttrs and expand related functions.
auto& attrs = Attrs();
for (const auto& attr_iter : attrs) {
auto& attr_name = attr_iter.first;
auto& attr = attr_iter.second;
auto attr_propertys = paddle::operators::GetExtraAttrPropertys(attr_name);
SetDnnAttrIntoDeviceContext(dev_ctx, attr, attr_name, attr_propertys);
}
VLOG(4) << "Done runtime attributes";
#endif
// For compatible with Op with extra input for onednn backend
#ifdef PADDLE_WITH_MKLDNN
if (phi::OneDNNContext::classof(dev_ctx)) {
phi::OneDNNContext* one_dnn_ctx = static_cast<phi::OneDNNContext*>(dev_ctx);
auto& extra_input_names =
paddle::operators::ExtraInfoUtils::Instance().GetExtraInputNamesMap(
Type());
for (const auto& input_name : extra_input_names) {
auto it = ctx.inputs.find(input_name);
if (it == ctx.inputs.end() || it->second.size() == 0) {
one_dnn_ctx->SetDnnInput(input_name, nullptr);
} else {
auto ins_vector = it->second;
PADDLE_ENFORCE_EQ(
ins_vector.size(),
1UL,
phi::errors::InvalidArgument(
"OneDNN's extra input only allows one input tensor."));
auto* var = ins_vector[0];
PADDLE_ENFORCE_EQ(var->IsType<phi::DenseTensor>(),
true,
phi::errors::InvalidArgument(
"OneDNN's extra input only can be DenseTensor."));
one_dnn_ctx->SetDnnInput(input_name, &(var->Get<phi::DenseTensor>()));
}
}
}
VLOG(4) << "Done runtime extra inputs";
#endif
}
} // namespace framework
......
......@@ -13,9 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/expect.h"
#include "paddle/fluid/operators/fused/fusion_gru_op.h"
#include "paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h"
#include "paddle/phi/core/expect.h"
namespace paddle {
namespace operators {
......
......@@ -13,9 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/expect.h"
#include "paddle/fluid/operators/fused/fusion_lstm_op.h"
#include "paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h"
#include "paddle/phi/core/expect.h"
namespace paddle {
namespace operators {
......
......@@ -14,11 +14,11 @@
#include <tuple>
#include "paddle/fluid/framework/expect.h"
#include "paddle/fluid/operators/conv_op.h"
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/platform/mkldnn_reuse.h"
#include "paddle/phi/core/expect.h"
#include "paddle/phi/core/visit_type.h"
......@@ -1184,20 +1184,6 @@ class ConvMKLDNNGradOpKernel : public framework::OpKernel<T> {
namespace ops = paddle::operators;
REGISTER_OP_KERNEL(conv2d,
MKLDNN,
::paddle::platform::CPUPlace,
ops::ConvMKLDNNOpKernel<float>,
ops::ConvMKLDNNOpKernel<paddle::platform::bfloat16>,
ops::ConvMKLDNNOpKernel<uint8_t>,
ops::ConvMKLDNNOpKernel<int8_t>);
REGISTER_OP_KERNEL(conv2d_grad,
MKLDNN,
::paddle::platform::CPUPlace,
ops::ConvMKLDNNGradOpKernel<float>,
ops::ConvMKLDNNGradOpKernel<paddle::platform::bfloat16>);
REGISTER_OP_KERNEL(depthwise_conv2d,
MKLDNN,
::paddle::platform::CPUPlace,
......
......@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/expect.h"
#include "paddle/fluid/platform/mkldnn_reuse.h"
#include "paddle/phi/core/expect.h"
namespace paddle {
namespace operators {
......
......@@ -36,7 +36,7 @@ PD_DECLARE_KERNEL(relu, OneDNN, ONEDNN);
USE_OP_ITSELF(softmax);
USE_OP_DEVICE_KERNEL(softmax, MKLDNN);
USE_OP_ITSELF(conv2d);
USE_OP_DEVICE_KERNEL(conv2d, MKLDNN);
PD_DECLARE_KERNEL(conv2d, OneDNN, ONEDNN);
namespace paddle {
namespace operators {
......
......@@ -14,11 +14,137 @@
#pragma once
#include <string>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/framework/attribute.h"
namespace paddle {
namespace operators {
// This file is to be compatible with the bad design and
// implementation of fluid in the past
// Many operators in fluid have extra attributes, which are generally added
// to implement some specific kernel selection and to meet the specialization
// needs of a specific operation library like mkldnn or cudnn
enum class ExtraAttrProperty : uint8_t {
// The attributes that are no longer used by any scene
DEPRECATED = 0,
// The attributes used for framework execution scheduling,
// such as `use_mkldnn`, `use_cudnn`, no need to save
SCHEDULE,
// The attributes for ONEDNN only, can be saved in OneDNNContext
ONEDNN,
// The attributes for ONEDNN only, can be saved in GPUContext
GPUDNN,
// Add necessary properties as needed
};
class ExtraAttrPropertySet final {
public:
constexpr ExtraAttrPropertySet() : bitset_(0) {}
constexpr ExtraAttrPropertySet(ExtraAttrProperty e) // NOLINT
: bitset_(e == ExtraAttrProperty::DEPRECATED
? 0
: 1ULL << (static_cast<uint8_t>(e) - 1)) {}
inline uint64_t bitset() const { return bitset_; }
bool inline Support(ExtraAttrProperty e) const {
// DEPRECATED ExtraAttr always return false
return static_cast<bool>(bitset_ & ExtraAttrPropertySet(e).bitset());
}
bool IsEmpty() const { return bitset_ == 0; }
ExtraAttrPropertySet operator|(const ExtraAttrPropertySet& other) const {
return ExtraAttrPropertySet(bitset_ | other.bitset());
}
ExtraAttrPropertySet operator&(const ExtraAttrPropertySet& other) const {
return ExtraAttrPropertySet(bitset_ & other.bitset());
}
ExtraAttrPropertySet operator-(const ExtraAttrPropertySet& other) const {
return ExtraAttrPropertySet(bitset_ & ~other.bitset());
}
ExtraAttrPropertySet operator^(const ExtraAttrPropertySet& other) const {
return ExtraAttrPropertySet(bitset_ ^ other.bitset());
}
bool operator==(const ExtraAttrPropertySet& other) const {
return bitset_ == other.bitset();
}
private:
constexpr ExtraAttrPropertySet(uint64_t bitset) : bitset_(bitset) {}
uint64_t bitset_;
};
const std::unordered_map<std::string, ExtraAttrPropertySet>
extra_attr_properties = {
// DEPRECATED attributes
{"use_quantizer", ExtraAttrProperty::DEPRECATED},
// SCHEDULE attributes
{"use_cudnn", ExtraAttrProperty::SCHEDULE},
{"use_mkldnn", ExtraAttrProperty::SCHEDULE},
// ONEDNN dedicated attributes
{"Bias", ExtraAttrProperty::ONEDNN},
{"data_format", ExtraAttrProperty::ONEDNN},
{"force_fp32_output", ExtraAttrProperty::ONEDNN},
{"fuse_activation", ExtraAttrProperty::ONEDNN},
{"fuse_activation_type", ExtraAttrProperty::ONEDNN},
{"fuse_activation_alpha", ExtraAttrProperty::ONEDNN},
{"fuse_activation_beta", ExtraAttrProperty::ONEDNN},
{"fuse_activation_scale", ExtraAttrProperty::ONEDNN},
{"fuse_alpha", ExtraAttrProperty::ONEDNN},
{"fuse_beta", ExtraAttrProperty::ONEDNN},
{"fuse_relu", ExtraAttrProperty::ONEDNN},
{"fuse_residual_connection", ExtraAttrProperty::ONEDNN},
{"fuse_with_relu", ExtraAttrProperty::ONEDNN},
{"fused_reshape_Out", ExtraAttrProperty::ONEDNN},
{"fused_transpose_Out", ExtraAttrProperty::ONEDNN},
{"fused_reshape_X", ExtraAttrProperty::ONEDNN},
{"fused_reshape_Y", ExtraAttrProperty::ONEDNN},
{"fused_transpose_X", ExtraAttrProperty::ONEDNN},
{"fused_transpose_Y", ExtraAttrProperty::ONEDNN},
{"mkldnn_data_type", ExtraAttrProperty::ONEDNN},
{"ResidualData", ExtraAttrProperty::ONEDNN},
{"scale_x", ExtraAttrProperty::ONEDNN},
{"scale_y", ExtraAttrProperty::ONEDNN},
{"scale_out", ExtraAttrProperty::ONEDNN},
{"Scale_in", ExtraAttrProperty::ONEDNN},
{"Scale_in_eltwise", ExtraAttrProperty::ONEDNN},
{"Scale_x", ExtraAttrProperty::ONEDNN},
{"Scale_y", ExtraAttrProperty::ONEDNN},
{"Scale_out", ExtraAttrProperty::ONEDNN},
{"Scale_weights", ExtraAttrProperty::ONEDNN},
{"x_data_format", ExtraAttrProperty::ONEDNN},
{"y_data_format", ExtraAttrProperty::ONEDNN},
// ONEDNN pass dedicated attributes
{"Activation_scale", ExtraAttrProperty::ONEDNN},
{"Bias_scales", ExtraAttrProperty::ONEDNN},
{"Output_shift_scale", ExtraAttrProperty::ONEDNN},
{"Sum_scale", ExtraAttrProperty::ONEDNN},
// GPUDNN dedicated attributes
{"exhaustive_search", ExtraAttrProperty::GPUDNN},
{"fuse_relu_before_depthwise_conv", ExtraAttrProperty::GPUDNN},
{"use_addto", ExtraAttrProperty::GPUDNN},
{"workspace_size_MB", ExtraAttrProperty::GPUDNN},
// Mixed-use attributes
{"is_test",
ExtraAttrPropertySet(ExtraAttrProperty::ONEDNN) |
ExtraAttrPropertySet(ExtraAttrProperty::GPUDNN)},
};
inline ExtraAttrPropertySet GetExtraAttrPropertys(
const std::string& attr_name) {
auto iter = extra_attr_properties.find(attr_name);
if (iter != extra_attr_properties.end()) {
return iter->second;
}
return ExtraAttrPropertySet();
}
template <typename T>
struct ExtraAttrChecker {
ExtraAttrChecker(const std::string& attr_name, T default_value)
......@@ -71,6 +197,15 @@ class ExtraInfoUtils {
return empty_extra_attrs_checker_;
}
const std::vector<std::string>& GetExtraInputNamesMap(
const std::string& op_type) const {
auto iter = g_extra_input_names_map_.find(op_type);
if (iter != g_extra_input_names_map_.end()) {
return iter->second;
}
return empty_extra_input_names_;
}
private:
ExtraInfoUtils();
......@@ -83,6 +218,12 @@ class ExtraInfoUtils {
g_extra_attrs_checker_;
std::vector<std::function<void(framework::AttributeMap*, bool)>>
empty_extra_attrs_checker_{};
// TODO(chenweihang): move these extra inputs into op_compat.yaml
std::unordered_map<std::string, std::vector<std::string>>
g_extra_input_names_map_ = {{"conv2d", {"Bias", "ResidualData"}},
{"conv2d_grad", {"Bias"}}};
std::vector<std::string> empty_extra_input_names_;
};
} // namespace operators
......
......@@ -89,7 +89,9 @@ class MLUContext {
DISABLE_COPY_AND_ASSIGN(MLUContext);
};
class MLUDeviceContext : public DeviceContext {
class MLUDeviceContext
: public DeviceContext,
public phi::TypeInfoTraits<DeviceContext, MLUDeviceContext> {
public:
explicit MLUDeviceContext(MLUPlace place);
virtual ~MLUDeviceContext();
......@@ -148,6 +150,8 @@ class MLUDeviceContext : public DeviceContext {
return thread_ctx_.at(this);
}
static const char* name() { return "MLUDeviceContext"; }
private:
int compute_capability_;
int driver_version_;
......
......@@ -19,7 +19,6 @@ limitations under the License. */
#include <set>
#include "glog/logging.h"
#include "paddle/fluid/framework/expect.h"
#include "paddle/fluid/framework/generator.h"
#include "paddle/fluid/memory/allocation/allocator_facade.h"
#include "paddle/fluid/platform/device/device_wrapper.h"
......@@ -28,6 +27,7 @@ limitations under the License. */
#include "paddle/fluid/platform/profiler/event_tracing.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/allocator.h"
#include "paddle/phi/core/expect.h"
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/fluid/memory/allocation/cuda_device_context_allocator.h"
......
......@@ -144,7 +144,9 @@ struct DefaultDeviceContextType<platform::CPUPlace> {
// Graphcore IPU
#ifdef PADDLE_WITH_IPU
class IPUDeviceContext : public DeviceContext {
class IPUDeviceContext
: public DeviceContext,
public phi::TypeInfoTraits<DeviceContext, IPUDeviceContext> {
public:
IPUDeviceContext() = delete;
explicit IPUDeviceContext(IPUPlace place);
......@@ -154,6 +156,8 @@ class IPUDeviceContext : public DeviceContext {
/*! \brief Wait for all operations completion in the stream. */
void Wait() const override;
static const char* name() { return "IPUDeviceContext"; }
private:
IPUPlace place_;
};
......@@ -188,7 +192,9 @@ struct DefaultDeviceContextType<platform::XPUPlace> {
#endif
#ifdef PADDLE_WITH_ASCEND_CL
class NPUDeviceContext : public DeviceContext {
class NPUDeviceContext
: public DeviceContext,
public phi::TypeInfoTraits<DeviceContext, NPUDeviceContext> {
public:
explicit NPUDeviceContext(NPUPlace place);
virtual ~NPUDeviceContext();
......@@ -224,6 +230,8 @@ class NPUDeviceContext : public DeviceContext {
// void WaitStreamCallback() const { return stream_->WaitCallback(); }
static const char* name() { return "NPUDeviceContext"; }
private:
NPUPlace place_;
aclrtContext context_;
......@@ -248,7 +256,9 @@ struct DefaultDeviceContextType<platform::NPUPlace> {
};
// Currently, NPUPinnedDeviceContext is only used to data copying.
class NPUPinnedDeviceContext : public DeviceContext {
class NPUPinnedDeviceContext
: public DeviceContext,
public phi::TypeInfoTraits<DeviceContext, NPUPinnedDeviceContext> {
public:
NPUPinnedDeviceContext();
explicit NPUPinnedDeviceContext(NPUPinnedPlace place);
......@@ -257,6 +267,8 @@ class NPUPinnedDeviceContext : public DeviceContext {
Eigen::DefaultDevice* eigen_device() const;
static const char* name() { return "NPUPinnedDeviceContext"; }
private:
NPUPinnedPlace place_;
std::unique_ptr<Eigen::DefaultDevice> eigen_device_;
......@@ -276,7 +288,9 @@ struct DefaultDeviceContextType<platform::CUDAPlace> {
};
// Currently, CUDAPinnedDeviceContext is only used to data copying.
class CUDAPinnedDeviceContext : public DeviceContext {
class CUDAPinnedDeviceContext
: public DeviceContext,
public phi::TypeInfoTraits<DeviceContext, CUDAPinnedDeviceContext> {
public:
CUDAPinnedDeviceContext();
explicit CUDAPinnedDeviceContext(CUDAPinnedPlace place);
......@@ -285,6 +299,8 @@ class CUDAPinnedDeviceContext : public DeviceContext {
Eigen::DefaultDevice* eigen_device() const;
static const char* name() { return "CUDAPinnedDeviceContext"; }
private:
CUDAPinnedPlace place_;
std::unique_ptr<Eigen::DefaultDevice> eigen_device_;
......
......@@ -122,77 +122,80 @@ using namespace ::phi::enforce; // NOLINT
#endif
/*
* Summary: This PADDLE_GET(_**) series macros are used to call paddle::get
* safely. paddle::get is not a completely safe api, although it will not
* go wrong in most cases, but in extreme cases, it may fail and directly
* throw a paddle::bad_variant_access const exception, without any stack
*information.
* This kind of problems is difficult to debug, so add these macros to
* enrich paddle::get error information. At the same time, we restrict
* the direct use of paddle::get by CI rule.
* Summary: This macro is used to get Variable or internal type
* data (such as LoDTensor or SelectedRows) of the Input and
* Output in op, generally used when call scope.FindVar(Input/
* Output("Name")) or ctx.Input<LoDTensor>().
* Firstly this macro check whether the obtained pointer is null,
* and then return data if it is not null.
*
* Note: This macro is only suitable for specific scenarios and
* does not intended to be widely used. If it cannot meet the
* requirements, please use other PADDLE_ENFORCE** check macro.
*
* Parameters:
*     __TYPE: the target variable type
* __VALUE: the target variable to get
*     __PTR: pointer
* __ROLE: (string), Input or Output
* __NAME: (string), Input or Output name
* __OP_TYPE: (string), the op type
*
* Examples:
* - unsafe writing: int x = paddle::get<int>(y);
* - safe writing: int x = PADDLE_GET(int, y);
* Return: The data pointed to by the pointer.
*
* Note: GCC 4.8 cannot select right overloaded function here, so need
* to define different functions and macros here, after we upgreade
* CI gcc version, we can only define one PADDLE_GET macro.
* Examples:
* GET_DATA_SAFELY(ctx.Input<LoDTensor>("X"), "Input", "X", "Mul");
*/
namespace details {
using namespace phi::enforce::details; // NOLINT
#define DEFINE_SAFE_PADDLE_GET( \
__InputType, __OutputType, __OutputTypePtr, __FuncName) \
template <typename OutputType, typename InputType> \
auto __FuncName( \
__InputType input, const char* expression, const char* file, int line) \
->typename std::conditional<std::is_pointer<InputType>::value, \
__OutputTypePtr, \
__OutputType>::type { \
try { \
return paddle::get<OutputType>(input); \
} catch (paddle::bad_variant_access const&) { \
HANDLE_THE_ERROR \
throw ::phi::enforce::EnforceNotMet( \
phi::errors::InvalidArgument( \
"paddle::get failed, cannot get value " \
"(%s) by type %s, its type is %s.", \
expression, \
phi::enforce::demangle(typeid(OutputType).name()), \
phi::enforce::demangle(input.type().name())), \
file, \
line); \
END_HANDLE_THE_ERROR \
} \
}
DEFINE_SAFE_PADDLE_GET(InputType&, OutputType&, OutputType*, SafeBoostGet);
DEFINE_SAFE_PADDLE_GET(const InputType&,
const OutputType&,
const OutputType*,
SafeBoostGetConst);
DEFINE_SAFE_PADDLE_GET(InputType&&,
OutputType,
OutputType*,
SafeBoostGetMutable);
} // namespace details
#define GET_DATA_SAFELY(__PTR, __ROLE, __NAME, __OP_TYPE) \
(([&]() -> std::add_lvalue_reference<decltype(*(__PTR))>::type { \
auto* __ptr = (__PTR); \
if (UNLIKELY(nullptr == __ptr)) { \
auto __summary__ = phi::errors::NotFound( \
"Unable to get %s data of %s %s in operator %s. " \
"Possible reasons are:\n" \
" 1. The %s is not the %s of operator %s;\n" \
" 2. The %s has no corresponding variable passed in;\n" \
" 3. The %s corresponding variable is not initialized.", \
phi::demangle( \
typeid(std::add_lvalue_reference<decltype(*__ptr)>::type) \
.name()), \
__ROLE, \
__NAME, \
__OP_TYPE, \
__NAME, \
__ROLE, \
__OP_TYPE, \
__NAME, \
__NAME); \
auto __message__ = ::paddle::string::Sprintf( \
"%s\n [Hint: pointer " #__PTR " should not be null.]", \
__summary__.error_message()); \
__THROW_ERROR_INTERNAL__( \
phi::ErrorSummary(__summary__.code(), __message__)); \
} \
return *__ptr; \
})())
#define PADDLE_GET(__TYPE, __VALUE) \
paddle::platform::details::SafeBoostGet<__TYPE>( \
__VALUE, #__VALUE, __FILE__, __LINE__)
#define PADDLE_GET_CONST(__TYPE, __VALUE) \
paddle::platform::details::SafeBoostGetConst<__TYPE>( \
__VALUE, #__VALUE, __FILE__, __LINE__)
#define PADDLE_GET_MUTABLE(__TYPE, __VALUE) \
paddle::platform::details::SafeBoostGetMutable<__TYPE>( \
__VALUE, #__VALUE, __FILE__, __LINE__)
/*
* Summary: This macro is used to check whether op has specified
* Input or Output Variables. Because op's Input and Output
* checking are written similarly, so abstract this macro.
*
* Parameters:
*     __EXPR: (bool), the bool expression
* __ROLE: (string), Input or Output
* __NAME: (string), Input or Output name
* __OP_TYPE: (string), the op type
*
* Examples:
* OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "Mul");
*/
#define OP_INOUT_CHECK(__EXPR, __ROLE, __NAME, __OP_TYPE) \
do { \
PADDLE_ENFORCE_EQ( \
__EXPR, \
true, \
phi::errors::NotFound( \
"No %s(%s) found for %s operator.", __ROLE, __NAME, __OP_TYPE)); \
} while (0)
/** OTHER EXCEPTION AND ENFORCE **/
......
......@@ -528,10 +528,9 @@ struct CannotToStringType {
};
TEST(enforce, cannot_to_string_type) {
static_assert(
!paddle::platform::details::CanToString<CannotToStringType>::kValue,
"CannotToStringType must not be converted to string");
static_assert(paddle::platform::details::CanToString<int>::kValue,
static_assert(!phi::enforce::details::CanToString<CannotToStringType>::kValue,
"CannotToStringType must not be converted to string");
static_assert(phi::enforce::details::CanToString<int>::kValue,
"int can be converted to string");
CannotToStringType obj1(3), obj2(4), obj3(3);
......
......@@ -312,8 +312,8 @@
func : conj
- backward_op : conv2d_grad
forward : conv2d (Tensor input, Tensor filter, int[] strides, int[] paddings, str paddding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search) -> Tensor(out)
args : (Tensor input, Tensor filter, Tensor out_grad, int[] strides, int[] paddings, str paddding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search)
forward : conv2d (Tensor input, Tensor filter, int[] strides, int[] paddings, str padding_algorithm, int[] dilations, int groups, str data_format) -> Tensor(out)
args : (Tensor input, Tensor filter, Tensor out_grad, int[] strides, int[] paddings, str padding_algorithm, int[] dilations, int groups, str data_format)
output : Tensor(input_grad), Tensor(filter_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
......@@ -324,8 +324,8 @@
backward : conv2d_grad_grad
- backward_op : conv2d_grad_grad
forward : conv2d_grad (Tensor input, Tensor filter, Tensor grad_out, int[] strides, int[] paddings, str paddding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search) -> Tensor(grad_input), Tensor(grad_filter)
args : (Tensor input, Tensor filter, Tensor grad_out, Tensor grad_input_grad, Tensor grad_filter_grad, int[] strides, int[] paddings, str paddding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search)
forward : conv2d_grad (Tensor input, Tensor filter, Tensor grad_out, int[] strides, int[] paddings, str padding_algorithm, int[] dilations, int groups, str data_format) -> Tensor(grad_input), Tensor(grad_filter)
args : (Tensor input, Tensor filter, Tensor grad_out, Tensor grad_input_grad, Tensor grad_filter_grad, int[] strides, int[] paddings, str padding_algorithm, int[] dilations, int groups, str data_format)
output : Tensor(input_grad), Tensor(filter_grad), Tensor(grad_out_grad)
infer_meta :
func : GeneralTernaryGradInferMeta
......@@ -357,8 +357,8 @@
backward : conv2d_transpose_double_grad
- backward_op : conv3d_grad
forward : conv3d (Tensor input, Tensor filter, int[] strides, int[] paddings, str paddding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search) -> Tensor(out)
args : (Tensor input, Tensor filter, Tensor out_grad, int[] strides, int[] paddings, str paddding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search)
forward : conv3d (Tensor input, Tensor filter, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search) -> Tensor(out)
args : (Tensor input, Tensor filter, Tensor out_grad, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search)
output : Tensor(input_grad), Tensor(filter_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
......@@ -369,8 +369,8 @@
backward : conv3d_grad_grad
- backward_op : conv3d_grad_grad
forward : conv3d_grad (Tensor input, Tensor filter, Tensor grad_out, int[] strides, int[] paddings, str paddding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search) -> Tensor(grad_input), Tensor(grad_filter)
args : (Tensor input, Tensor filter, Tensor grad_out, Tensor grad_input_grad, Tensor grad_filter_grad, int[] strides, int[] paddings, str paddding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search)
forward : conv3d_grad (Tensor input, Tensor filter, Tensor grad_out, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search) -> Tensor(grad_input), Tensor(grad_filter)
args : (Tensor input, Tensor filter, Tensor grad_out, Tensor grad_input_grad, Tensor grad_filter_grad, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search)
output : Tensor(input_grad), Tensor(filter_grad), Tensor(grad_out_grad)
infer_meta :
func : GeneralTernaryGradInferMeta
......@@ -439,21 +439,21 @@
optional : mask
- backward_op : depthwise_conv2d_grad
forward : depthwise_conv2d (Tensor input, Tensor filter, int[] strides, int[] paddings, str paddding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search, bool fuse_relu, bool use_gpudnn) -> Tensor(out)
args : (Tensor input, Tensor filter, Tensor out_grad, int[] strides, int[] paddings, str paddding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search, bool fuse_relu, bool use_gpudnn)
forward : depthwise_conv2d (Tensor input, Tensor filter, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search, bool fuse_relu, bool use_gpudnn) -> Tensor(out)
args : (Tensor input, Tensor filter, Tensor out_grad, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search, bool fuse_relu, bool use_gpudnn)
output : Tensor(input_grad), Tensor(filter_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
param : [input, filter]
kernel :
func : depthwise_conv2d_grad
param : [input, filter, out_grad, strides, paddings, paddding_algorithm, groups, dilations, data_format, use_addto, workspace_size_MB, exhaustive_search, fuse_relu]
param : [input, filter, out_grad, strides, paddings, padding_algorithm, groups, dilations, data_format, use_addto, workspace_size_MB, exhaustive_search, fuse_relu]
use_gpudnn : use_gpudnn
backward : depthwise_conv2d_grad_grad
- backward_op : depthwise_conv2d_grad_grad
forward : depthwise_conv2d_grad (Tensor input, Tensor filter, Tensor grad_out, int[] strides, int[] paddings, str paddding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search, bool fuse_relu, bool use_gpudnn) -> Tensor(grad_input), Tensor(grad_filter)
args : (Tensor input, Tensor filter, Tensor grad_out, Tensor grad_input_grad, Tensor grad_filter_grad, int[] strides, int[] paddings, str paddding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search, bool fuse_relu)
forward : depthwise_conv2d_grad (Tensor input, Tensor filter, Tensor grad_out, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search, bool fuse_relu, bool use_gpudnn) -> Tensor(grad_input), Tensor(grad_filter)
args : (Tensor input, Tensor filter, Tensor grad_out, Tensor grad_input_grad, Tensor grad_filter_grad, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search, bool fuse_relu)
output : Tensor(input_grad), Tensor(filter_grad), Tensor(grad_out_grad)
infer_meta :
func : GeneralTernaryGradInferMeta
......
......@@ -454,7 +454,7 @@
backward : conj_grad
- op : conv2d
args : (Tensor input, Tensor filter, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search)
args : (Tensor input, Tensor filter, int[] strides, int[] paddings, str padding_algorithm, int[] dilations, int groups, str data_format)
output : Tensor
infer_meta :
func : ConvInferMeta
......@@ -474,10 +474,10 @@
backward : conv2d_transpose_grad
- op : conv3d
args : (Tensor input, Tensor filter, int[] strides, int[] paddings, str paddding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search)
args : (Tensor input, Tensor filter, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search)
output : Tensor
infer_meta :
func : ConvInferMeta
func : Conv3DInferMeta
kernel :
func : conv3d
use_gpudnn : true
......@@ -564,7 +564,7 @@
args : (Tensor x, Tensor filter, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search, bool fuse_relu, bool use_gpudnn)
output : Tensor(out)
infer_meta :
func : ConvInferMeta
func : DepthwiseConvInferMeta
param : [x, filter, strides, paddings, padding_algorithm, groups, dilations, data_format, use_addto, workspace_size_MB, exhaustive_search]
kernel :
func : depthwise_conv2d
......
......@@ -23,9 +23,8 @@ limitations under the License. */
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/backends/custom/custom_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#ifdef PADDLE_WITH_XPU
#include "paddle/phi/backends/onednn/onednn_context.h"
#include "paddle/phi/backends/xpu/xpu_context.h"
#endif
#ifndef PADDLE_WITH_CUSTOM_KERNEL
// TODO(wilber): DeviceContextPool nees include fluid file.
......
......@@ -24,7 +24,8 @@ limitations under the License. */
namespace phi {
class PADDLE_API CPUContext : public DeviceContext {
class PADDLE_API CPUContext : public DeviceContext,
public TypeInfoTraits<DeviceContext, CPUContext> {
public:
CPUContext();
CPUContext(CPUContext&&);
......@@ -34,6 +35,8 @@ class PADDLE_API CPUContext : public DeviceContext {
Eigen::DefaultDevice* eigen_device() const;
const Place& GetPlace() const override;
static const char* name() { return "CPUContext"; }
protected:
// NOTE: External users manage resources. Used in inference scenarios.
// The Set interface is for inference only, DeviceContext will mark the
......
......@@ -21,7 +21,8 @@ limitations under the License. */
namespace phi {
class CustomContext : public DeviceContext {
class CustomContext : public DeviceContext,
public TypeInfoTraits<DeviceContext, CustomContext> {
public:
explicit CustomContext(const CustomPlace&);
......@@ -35,6 +36,8 @@ class CustomContext : public DeviceContext {
// Wait for all operations completion in the stream.
void Wait() const override;
static const char* name() { return "CustomContext"; }
public:
// NOTE: DeviceContext hold resources. Used in training scenarios.
// The interface used by the training scene, DeviceContext will initialize
......
......@@ -717,6 +717,23 @@ struct GPUContext::Impl {
}
}
bool HasDnnAttr(const std::string& attr_name) const {
return dnn_attrs_.count(attr_name) != 0UL;
}
const Attribute& GetDnnAttr(const std::string& attr_name) const {
auto iter = dnn_attrs_.find(attr_name);
PADDLE_ENFORCE_NE(
iter,
dnn_attrs_.end(),
phi::errors::NotFound("Attribute `%s` is not found in OneDNNContext."));
return iter->second;
}
void SetDnnAttr(const std::string& attr_name, Attribute attr) {
dnn_attrs_[attr_name] = attr;
}
// use one flag for all handles?
// they should be accessed consistently
bool owned_{false};
......@@ -780,8 +797,15 @@ struct GPUContext::Impl {
Allocator* allocator_{nullptr}; // external resource.
// A internal resouce to initinalize eigen_device.
std::unique_ptr<internal::EigenGpuStreamDevice> eigen_stream_{nullptr};
// Holds some attributes only used by the gpudnn kernel calculation
// Because DeviceContext is a global singleton, you need to ensure thread
// safety, use the thread_local variable
static thread_local AttributeMap dnn_attrs_;
};
thread_local AttributeMap GPUContext::Impl::dnn_attrs_ = {};
GPUContext::GPUContext(GPUContext&&) = default;
GPUContext& GPUContext::operator=(GPUContext&&) = default;
......@@ -1000,4 +1024,16 @@ void GPUContext::SetDriverVersion(int val) { impl_->driver_version_ = val; }
void GPUContext::SetRuntimeVersion(int val) { impl_->runtime_version_ = val; }
bool GPUContext::HasDnnAttr(const std::string& attr_name) const {
return impl_->HasDnnAttr(attr_name);
}
const Attribute& GPUContext::GetDnnAttr(const std::string& attr_name) const {
return impl_->GetDnnAttr(attr_name);
}
void GPUContext::SetDnnAttr(const std::string& attr_name, Attribute attr) {
return impl_->SetDnnAttr(attr_name, std::move(attr));
}
} // namespace phi
......@@ -24,6 +24,7 @@ limitations under the License. */
#include "paddle/phi/backends/gpu/gpu_helper.h"
#include "paddle/phi/backends/gpu/gpu_info.h"
#include "paddle/phi/common/place.h"
#include "paddle/phi/core/attribute.h"
#include "paddle/phi/core/device_context.h"
namespace phi {
......@@ -77,7 +78,8 @@ class DnnWorkspaceHandle {
std::unique_ptr<std::mutex> mtx_;
};
class PADDLE_API GPUContext : public DeviceContext {
class PADDLE_API GPUContext : public DeviceContext,
public TypeInfoTraits<DeviceContext, GPUContext> {
public:
explicit GPUContext(const GPUPlace& place, bool init = true);
......@@ -166,6 +168,13 @@ class PADDLE_API GPUContext : public DeviceContext {
void WaitStreamCallback() const;
// Several methods for adapting Dnn-specific attributes
bool HasDnnAttr(const std::string& attr_name) const;
const Attribute& GetDnnAttr(const std::string& attr_name) const;
void SetDnnAttr(const std::string& attr_name, Attribute attr);
static const char* name() { return "GPUContext"; }
public:
/*! \brief Return nccl communicators. */
ncclComm_t nccl_comm() const;
......@@ -250,10 +259,10 @@ class PADDLE_API GPUContext : public DeviceContext {
std::unique_ptr<Impl> impl_;
};
// Note: In order to register the kernel of CUDNN, GPUDNNContext is required.
// Note: In order to register the kernel of CUDNN, DnnContext is required.
// Currently, CUDNN kernel directly uses GPUContext. But if the kernel function
// has the same name, this will lead to duplicate instantiations of GPU kernel
// and GPUDNN kernel function, so if we using GPUDNNContext = GPUContext, we
// and Dnn kernel function, so if we using DnnContext = GPUContext, we
// must use different function name for cudnn kernel
using GPUDNNContext = GPUContext;
......
......@@ -16,9 +16,10 @@
#include "paddle/phi/common/place.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/utils/flat_hash_map.h"
#include "paddle/fluid/framework/expect.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/phi/core/expect.h"
namespace phi {
......@@ -284,6 +285,69 @@ struct OneDNNContext::Impl {
return key_it->second;
}
bool HasDnnAttr(const std::string& attr_name) const {
return dnn_attrs_.count(attr_name) != 0UL;
}
const Attribute& GetDnnAttr(const std::string& attr_name) const {
auto iter = dnn_attrs_.find(attr_name);
PADDLE_ENFORCE_NE(
iter,
dnn_attrs_.end(),
phi::errors::NotFound("Attribute `%s` is not found in OneDNNContext."));
return iter->second;
}
void SetDnnAttr(const std::string& attr_name, Attribute attr) {
dnn_attrs_[attr_name] = attr;
}
bool HasDnnInput(const std::string& input_name) const {
return dnn_inputs_.count(input_name) != 0UL;
}
const DenseTensor* GetDnnInput(const std::string& input_name) const {
auto iter = dnn_inputs_.find(input_name);
PADDLE_ENFORCE_NE(
iter,
dnn_inputs_.end(),
phi::errors::NotFound(
"Input DenseTensor `%s` is not found in OneDNNContext."));
return iter->second;
}
void SetDnnInput(const std::string& input_name, const DenseTensor* input) {
dnn_inputs_[input_name] = input;
}
void SetInputsName(const TensorNameMap& inputs_name) {
inputs_name_ = inputs_name;
}
void SetOutputsName(const TensorNameMap& outputs_name) {
outputs_name_ = outputs_name;
}
const std::vector<std::string>& GetInputsName(
const std::string& input) const {
auto it = inputs_name_.find(input);
PADDLE_ENFORCE_NE(it,
inputs_name_.end(),
phi::errors::NotFound(
"OneDnnContext does not have the input %s.", input));
return it->second;
}
const std::vector<std::string>& GetOutputsName(
const std::string& output) const {
auto it = outputs_name_.find(output);
PADDLE_ENFORCE_NE(
it,
outputs_name_.end(),
phi::errors::NotFound("OneDnnContext does not have the output %s.",
output));
return it->second;
}
std::shared_ptr<BlobMap> p_blobmap_;
// Map key is pointer of executor and value is a data(iterator in map) needed
// to erase
......@@ -291,8 +355,35 @@ struct OneDNNContext::Impl {
std::shared_ptr<std::mutex> p_mutex_;
// 0 - clearing is allowed. x > 0 do not clear.
unsigned int block_next_cache_clearing_ = 0;
// Holds some attributes only used by the onednn kernel calculation
// Since original mkldnn op kernel directly adds the operations that require
// fusion to the native kernel operations, and uses the attribute `fuse_xxx`
// to control, for onednn, there will be some attributes that seem to be
// independent of the device are also saved here.
// Here, the operation of fusion needs to be implemented separately as
// a fusion op and kernel, instead of patching it to a basic operation.
// Because DeviceContext is a global singleton, you need to ensure thread
// safety, use the thread_local variable
static thread_local AttributeMap dnn_attrs_;
// For onednn, in addition to extra attrs, there are also extra inputs,
// but the number is small. Hope that the implementation can be optimized
// to remove this member in the future.
static thread_local paddle::flat_hash_map<std::string, const DenseTensor*>
dnn_inputs_;
// Onednn need get input and output's name in current Kernel for generating
// unique_key.
static thread_local TensorNameMap inputs_name_;
static thread_local TensorNameMap outputs_name_;
};
thread_local AttributeMap OneDNNContext::Impl::dnn_attrs_ = {};
thread_local paddle::flat_hash_map<std::string, const DenseTensor*>
OneDNNContext::Impl::dnn_inputs_ = {};
thread_local TensorNameMap OneDNNContext::Impl::inputs_name_ = {};
thread_local TensorNameMap OneDNNContext::Impl::outputs_name_ = {};
OneDNNContext::OneDNNContext(const Place& place)
: CPUContext(place), impl_(std::make_unique<Impl>()) {}
......@@ -322,5 +413,49 @@ OneDNNContext::BlobPtr_t<void> OneDNNContext::GetBlob(
return impl_->GetBlob(name);
}
bool OneDNNContext::HasDnnAttr(const std::string& attr_name) const {
return impl_->HasDnnAttr(attr_name);
}
const Attribute& OneDNNContext::GetDnnAttr(const std::string& attr_name) const {
return impl_->GetDnnAttr(attr_name);
}
void OneDNNContext::SetDnnAttr(const std::string& attr_name, Attribute attr) {
return impl_->SetDnnAttr(attr_name, std::move(attr));
}
bool OneDNNContext::HasDnnInput(const std::string& input_name) const {
return impl_->HasDnnInput(input_name);
}
const DenseTensor* OneDNNContext::GetDnnInput(
const std::string& input_name) const {
return impl_->GetDnnInput(input_name);
}
void OneDNNContext::SetDnnInput(const std::string& input_name,
const DenseTensor* input) {
return impl_->SetDnnInput(input_name, input);
}
void OneDNNContext::SetInputsName(const TensorNameMap& inputs_name) {
impl_->SetInputsName(inputs_name);
}
void OneDNNContext::SetOutputsName(const TensorNameMap& outputs_name) {
impl_->SetOutputsName(outputs_name);
}
const std::vector<std::string>& OneDNNContext::GetInputsName(
const std::string& input) const {
return impl_->GetInputsName(input);
}
const std::vector<std::string>& OneDNNContext::GetOutputsName(
const std::string& output) const {
return impl_->GetOutputsName(output);
}
} // namespace phi
#endif
......@@ -20,9 +20,12 @@ limitations under the License. */
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/common/layout.h"
#include "paddle/phi/common/place.h"
#include "paddle/phi/core/attribute.h"
namespace phi {
using TensorNameMap = std::map<std::string, std::vector<std::string>>;
class OneDNNContextThreadLocals {
// default mkldnn session id
......@@ -134,6 +137,26 @@ class OneDNNContext : public CPUContext {
return OneDNNContextThreadLocals::fetch();
}
// Several methods for adapting ONEDNN-specific attributes and inputs
bool HasDnnAttr(const std::string& attr_name) const;
const Attribute& GetDnnAttr(const std::string& attr_name) const;
void SetDnnAttr(const std::string& attr_name, Attribute attr);
bool HasDnnInput(const std::string& input_name) const;
const DenseTensor* GetDnnInput(const std::string& input_name) const;
void SetDnnInput(const std::string& input_name, const DenseTensor* input);
void SetInputsName(const TensorNameMap& inputs_name);
void SetOutputsName(const TensorNameMap& outputs_name);
const std::vector<std::string>& GetInputsName(const std::string& input) const;
const std::vector<std::string>& GetOutputsName(
const std::string& output) const;
static const char* name() { return "OneDNNContext"; }
private:
struct Impl;
std::unique_ptr<Impl> impl_;
......
......@@ -195,6 +195,41 @@ inline std::string CreateKey(const OneDNNContext& dev_ctx, ArgTypes&&... args) {
return key;
}
inline std::vector<std::vector<int64_t>> ToOnednnPadding(
const std::vector<int64_t>& paddings) {
if (paddings.size() == 6) {
int padding_front = paddings[0];
int padding_back = paddings[1];
int padding_top = paddings[2];
int padding_bottom = paddings[3];
int padding_left = paddings[4];
int padding_right = paddings[5];
return {{padding_front, padding_top, padding_left},
{padding_back, padding_bottom, padding_right}};
} else {
int padding_top = paddings[0];
int padding_bottom = paddings[1];
int padding_left = paddings[2];
int padding_right = paddings[3];
return {{padding_top, padding_left}, {padding_bottom, padding_right}};
}
}
// The function adjusts the vector of weight dimensions for group convolutions
inline void GetGroupConvWeightsTz(std::vector<int64_t>& weights_tz, // NOLINT
const int groups) {
if (groups > 1) {
// if (is_conv3d) [o, i, d, h, w]->[g, o/g, i, d, h, w]
// else [o, i, h, w] -> [g, o/g, i, h, w]
weights_tz.push_back(0);
std::rotate(weights_tz.begin(), weights_tz.end() - 1, weights_tz.end());
weights_tz[0] = groups;
weights_tz[1] = weights_tz[1] / groups;
}
}
inline void MatchShapeToLayout(DenseTensor* tensor_in,
DataLayout from,
DataLayout to) {
......
......@@ -39,6 +39,67 @@ using memory = dnnl::memory;
using OneDNNMemoryFormat = dnnl::memory::format_tag;
static void AppendActivation(const OneDNNContext& dev_ctx,
dnnl::post_ops& post_ops, // NOLINT
float activation_scale = 1.0f) {
const auto invalid_attribute =
dev_ctx.HasDnnAttr("fuse_activation")
? PADDLE_GET_CONST(std::string, dev_ctx.GetDnnAttr("fuse_activation"))
.empty()
: true;
if (invalid_attribute) return;
const auto fuse_activation =
dev_ctx.HasDnnAttr("fuse_activation")
? PADDLE_GET_CONST(std::string, dev_ctx.GetDnnAttr("fuse_activation"))
: "";
const auto fuse_alpha =
dev_ctx.HasDnnAttr("fuse_alpha")
? PADDLE_GET_CONST(float, dev_ctx.GetDnnAttr("fuse_alpha"))
: 0.0f;
const auto fuse_beta =
dev_ctx.HasDnnAttr("fuse_beta")
? PADDLE_GET_CONST(float, dev_ctx.GetDnnAttr("fuse_beta"))
: 0.0f;
if (fuse_activation == "hard_sigmoid") {
post_ops.append_eltwise(activation_scale,
dnnl::algorithm::eltwise_linear,
fuse_alpha,
fuse_beta);
post_ops.append_eltwise(
activation_scale, dnnl::algorithm::eltwise_clip, 0.0f, 1.0f);
} else {
const std::unordered_map<std::string, dnnl::algorithm> activation_map = {
{"abs", dnnl::algorithm::eltwise_abs},
{"clip", dnnl::algorithm::eltwise_clip},
{"gelu", dnnl::algorithm::eltwise_gelu_erf},
{"gelu_erf", dnnl::algorithm::eltwise_gelu_erf},
{"gelu_tanh", dnnl::algorithm::eltwise_gelu_tanh},
{"hard_swish", dnnl::algorithm::eltwise_hardswish},
{"leaky_relu", dnnl::algorithm::eltwise_relu},
{"mish", dnnl::algorithm::eltwise_mish},
{"relu", dnnl::algorithm::eltwise_relu},
{"relu6", dnnl::algorithm::eltwise_bounded_relu},
{"sigmoid", dnnl::algorithm::eltwise_logistic},
{"sqrt", dnnl::algorithm::eltwise_sqrt},
{"swish", dnnl::algorithm::eltwise_swish},
{"tanh", dnnl::algorithm::eltwise_tanh}};
const auto& activation_type = activation_map.find(fuse_activation);
PADDLE_ENFORCE_NE(
activation_type,
activation_map.end(),
phi::errors::InvalidArgument(
"Activation '%s' not found in oneDNN algorithms mapper",
fuse_activation));
post_ops.append_eltwise(
activation_scale, activation_type->second, fuse_alpha, fuse_beta);
}
}
template <typename T,
typename TForward,
typename TBackward = onednn_dummy_primitive,
......@@ -1085,5 +1146,6 @@ class ClipOneDNNHandler
to_void_cast<T>(input_data));
}
};
} // namespace funcs
} // namespace phi
......@@ -14,6 +14,8 @@ limitations under the License. */
#pragma once
#ifdef PADDLE_WITH_XPU
#include <memory>
#include "paddle/phi/backends/xpu/forwards.h"
......@@ -26,7 +28,8 @@ namespace xpu = baidu::xpu::api;
namespace phi {
class XPUContext : public DeviceContext {
class XPUContext : public DeviceContext,
public TypeInfoTraits<DeviceContext, XPUContext> {
public:
XPUContext();
......@@ -65,6 +68,8 @@ class XPUContext : public DeviceContext {
XPUStream stream() const;
static const char* name() { return "XPUContext"; }
private:
struct Impl;
std::unique_ptr<Impl> impl_;
......@@ -79,3 +84,5 @@ using KPSContext = XPUContext;
#endif
} // namespace phi
#endif
......@@ -48,6 +48,6 @@ using Attribute = paddle::variant<bool,
DataLayout,
Place>;
using RuntimeAttrs = paddle::flat_hash_map<std::string, Attribute>;
using AttributeMap = paddle::flat_hash_map<std::string, Attribute>;
} // namespace phi
......@@ -21,6 +21,7 @@ limitations under the License. */
#include "paddle/phi/common/place.h"
#include "paddle/phi/core/allocator.h"
#include "paddle/phi/core/generator.h"
#include "paddle/phi/core/utils/type_registry.h"
namespace phi {
class TensorBase;
......@@ -188,9 +189,21 @@ class PADDLE_API DeviceContext {
*/
Generator* GetHostGenerator() const;
/**
* @brief Return the type information of the derived class to support
* safely downcast in non-rtti environment.
*
* @return The type information of the derived class.
*/
TypeInfo<DeviceContext> type_info() const { return type_info_; }
private:
struct Impl;
std::unique_ptr<Impl> impl_;
template <typename T, typename U>
friend class TypeInfoTraits;
TypeInfo<DeviceContext> type_info_{TypeInfo<DeviceContext>::kUnknownType};
};
} // namespace phi
......@@ -43,6 +43,7 @@ limitations under the License. */
#include "paddle/phi/core/errors.h"
#include "paddle/utils/string/printf.h"
#include "paddle/utils/string/to_string.h"
#include "paddle/utils/variant.h"
DECLARE_int32(call_stack_level);
......@@ -409,80 +410,75 @@ struct EnforceNotMet : public std::exception {
/** EXTENDED TOOL FUNCTIONS WITH CHECKING **/
/*
* Summary: This macro is used to get Variable or internal type
* data (such as LoDTensor or SelectedRows) of the Input and
* Output in op, generally used when call scope.FindVar(Input/
* Output("Name")) or ctx.Input<LoDTensor>().
* Firstly this macro check whether the obtained pointer is null,
* and then return data if it is not null.
*
* Note: This macro is only suitable for specific scenarios and
* does not intended to be widely used. If it cannot meet the
* requirements, please use other PADDLE_ENFORCE** check macro.
* Summary: This PADDLE_GET(_**) series macros are used to call paddle::get
* safely. paddle::get is not a completely safe api, although it will not
* go wrong in most cases, but in extreme cases, it may fail and directly
* throw a paddle::bad_variant_access const exception, without any stack
*information.
* This kind of problems is difficult to debug, so add these macros to
* enrich paddle::get error information. At the same time, we restrict
* the direct use of paddle::get by CI rule.
*
* Parameters:
*     __PTR: pointer
* __ROLE: (string), Input or Output
* __NAME: (string), Input or Output name
* __OP_TYPE: (string), the op type
*
* Return: The data pointed to by the pointer.
*     __TYPE: the target variable type
* __VALUE: the target variable to get
*
* Examples:
* GET_DATA_SAFELY(ctx.Input<LoDTensor>("X"), "Input", "X", "Mul");
*/
#define GET_DATA_SAFELY(__PTR, __ROLE, __NAME, __OP_TYPE) \
(([&]() -> std::add_lvalue_reference<decltype(*(__PTR))>::type { \
auto* __ptr = (__PTR); \
if (UNLIKELY(nullptr == __ptr)) { \
auto __summary__ = phi::errors::NotFound( \
"Unable to get %s data of %s %s in operator %s. " \
"Possible reasons are:\n" \
" 1. The %s is not the %s of operator %s;\n" \
" 2. The %s has no corresponding variable passed in;\n" \
" 3. The %s corresponding variable is not initialized.", \
phi::demangle( \
typeid(std::add_lvalue_reference<decltype(*__ptr)>::type) \
.name()), \
__ROLE, \
__NAME, \
__OP_TYPE, \
__NAME, \
__ROLE, \
__OP_TYPE, \
__NAME, \
__NAME); \
auto __message__ = ::paddle::string::Sprintf( \
"%s\n [Hint: pointer " #__PTR " should not be null.]", \
__summary__.error_message()); \
__THROW_ERROR_INTERNAL__( \
phi::ErrorSummary(__summary__.code(), __message__)); \
} \
return *__ptr; \
})())
/*
* Summary: This macro is used to check whether op has specified
* Input or Output Variables. Because op's Input and Output
* checking are written similarly, so abstract this macro.
*
* Parameters:
*     __EXPR: (bool), the bool expression
* __ROLE: (string), Input or Output
* __NAME: (string), Input or Output name
* __OP_TYPE: (string), the op type
* - unsafe writing: int x = paddle::get<int>(y);
* - safe writing: int x = PADDLE_GET(int, y);
*
* Examples:
* OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "Mul");
* Note: GCC 4.8 cannot select right overloaded function here, so need
* to define different functions and macros here, after we upgrade
* CI gcc version, we can only define one PADDLE_GET macro.
*/
#define OP_INOUT_CHECK(__EXPR, __ROLE, __NAME, __OP_TYPE) \
do { \
PADDLE_ENFORCE_EQ( \
__EXPR, \
true, \
phi::errors::NotFound( \
"No %s(%s) found for %s operator.", __ROLE, __NAME, __OP_TYPE)); \
} while (0)
namespace details {
#define DEFINE_SAFE_PADDLE_GET( \
__InputType, __OutputType, __OutputTypePtr, __FuncName) \
template <typename OutputType, typename InputType> \
auto __FuncName( \
__InputType input, const char* expression, const char* file, int line) \
->typename std::conditional<std::is_pointer<InputType>::value, \
__OutputTypePtr, \
__OutputType>::type { \
try { \
return paddle::get<OutputType>(input); \
} catch (paddle::bad_variant_access const&) { \
HANDLE_THE_ERROR \
throw ::phi::enforce::EnforceNotMet( \
phi::errors::InvalidArgument( \
"paddle::get failed, cannot get value " \
"(%s) by type %s, its type is %s.", \
expression, \
phi::enforce::demangle(typeid(OutputType).name()), \
phi::enforce::demangle(input.type().name())), \
file, \
line); \
END_HANDLE_THE_ERROR \
} \
}
DEFINE_SAFE_PADDLE_GET(InputType&, OutputType&, OutputType*, SafeBoostGet);
DEFINE_SAFE_PADDLE_GET(const InputType&,
const OutputType&,
const OutputType*,
SafeBoostGetConst);
DEFINE_SAFE_PADDLE_GET(InputType&&,
OutputType,
OutputType*,
SafeBoostGetMutable);
} // namespace details
#define PADDLE_GET(__TYPE, __VALUE) \
phi::enforce::details::SafeBoostGet<__TYPE>( \
__VALUE, #__VALUE, __FILE__, __LINE__)
#define PADDLE_GET_CONST(__TYPE, __VALUE) \
phi::enforce::details::SafeBoostGetConst<__TYPE>( \
__VALUE, #__VALUE, __FILE__, __LINE__)
#define PADDLE_GET_MUTABLE(__TYPE, __VALUE) \
phi::enforce::details::SafeBoostGetMutable<__TYPE>( \
__VALUE, #__VALUE, __FILE__, __LINE__)
} // namespace enforce
using namespace enforce; // NOLINT
......
......@@ -138,8 +138,6 @@ class KernelContext {
template <typename AttrType>
const AttrType& AttrAt(size_t idx) const;
const RuntimeAttrs& GetRuntimeAttrs() const { return runtime_attrs_; }
size_t InputsSize() const { return inputs_.size(); }
size_t OutputsSize() const { return outputs_.size(); }
size_t AttrsSize() const { return attrs_.size(); }
......@@ -161,8 +159,6 @@ class KernelContext {
paddle::small_vector<std::pair<int, int>, kInputSmallVectorSize> input_range_;
paddle::small_vector<std::pair<int, int>, kOutputSmallVectorSize>
output_range_;
RuntimeAttrs runtime_attrs_;
};
} // namespace phi
......@@ -233,8 +233,6 @@ struct KernelArgsParseFunctor<Return_ (*)(Args_...)> {
args_def->AppendAttribute(AttributeType::DATA_LAYOUT);
} else if (arg_type == std::type_index(typeid(Place))) {
args_def->AppendAttribute(AttributeType::PLACE);
} else if (arg_type == std::type_index(typeid(RuntimeAttrs))) {
// do nothing
} else {
PADDLE_THROW(phi::errors::Unavailable(
"Unsupported kernel argument type `%s`.", arg_type.name()));
......
......@@ -14,13 +14,7 @@
#pragma once
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/backends/custom/custom_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/onednn/onednn_context.h"
#ifdef PADDLE_WITH_XPU
#include "paddle/phi/backends/xpu/xpu_context.h"
#endif
#include "paddle/phi/backends/all_context.h"
#include "paddle/phi/common/int_array.h"
#include "paddle/phi/common/scalar.h"
#include "paddle/phi/core/dense_tensor.h"
......@@ -330,21 +324,6 @@ struct KernelImpl<Return (*)(DevCtx, Args...), kernel_fn> {
PD_SPECIALIZE_KernelCallHelper_FOR_OUTPUT(TensorArray);
template <typename... Tail>
struct KernelCallHelper<const RuntimeAttrs&, Tail...> {
template <int dev_ctx_idx,
int in_idx,
int attr_idx,
int out_idx,
typename... PreviousArgs>
static void Compute(KernelContext* ctx, PreviousArgs&... pargs) {
const auto& runtime_attrs = ctx->GetRuntimeAttrs();
KernelCallHelper<Tail...>::
template Compute<dev_ctx_idx, in_idx, attr_idx, out_idx>(
ctx, pargs..., runtime_attrs);
}
};
/* End case */
template <typename T>
struct KernelCallHelper<TypeTag<T>> {
......
......@@ -409,12 +409,9 @@ void ConvInferMeta(const MetaTensor& input,
const std::vector<int>& strides,
const std::vector<int>& paddings_t,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations_t,
int groups,
const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search,
MetaTensor* out,
MetaConfig config) {
std::vector<int> paddings = paddings_t;
......@@ -559,27 +556,27 @@ void ConvInferMeta(const MetaTensor& input,
out->set_dtype(input.dtype());
}
void ConvInferInferMeta(const MetaTensor& input,
const MetaTensor& filter,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string& paddding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::string& data_format,
MetaTensor* out,
MetaConfig config) {
void Conv3DInferMeta(const MetaTensor& input,
const MetaTensor& filter,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search,
MetaTensor* out,
MetaConfig config) {
ConvInferMeta(input,
filter,
strides,
paddings,
paddding_algorithm,
groups,
padding_algorithm,
dilations,
groups,
data_format,
/*use_addto=*/false,
/*workspace_size_MB=*/512, // useless in infermeta
/*exhaustive_search=*/false,
out,
config);
}
......@@ -922,6 +919,31 @@ void CrossEntropyWithSoftmaxInferMeta(const MetaTensor& logits,
loss->share_lod(logits);
}
void DepthwiseConvInferMeta(const MetaTensor& input,
const MetaTensor& filter,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search,
MetaTensor* out,
MetaConfig config) {
ConvInferMeta(input,
filter,
strides,
paddings,
padding_algorithm,
dilations,
groups,
data_format,
out,
config);
}
void DistInferMeta(const MetaTensor& x,
const MetaTensor& y,
float p,
......@@ -2876,4 +2898,3 @@ void Unpool3dInferMeta(const MetaTensor& x,
} // namespace phi
PD_REGISTER_INFER_META_FN(add_raw, phi::ElementwiseRawInferMeta);
PD_REGISTER_INFER_META_FN(conv2d_infer, phi::ConvInferInferMeta);
......@@ -80,26 +80,26 @@ void ConvInferMeta(const MetaTensor& input,
const MetaTensor& filter,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string& paddding_algorithm,
int groups,
const std::string& padding_algorithm,
const std::vector<int>& dilations,
int groups,
const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search,
MetaTensor* out,
MetaConfig config = MetaConfig());
void ConvInferInferMeta(const MetaTensor& input,
const MetaTensor& filter,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string& paddding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::string& data_format,
MetaTensor* out,
MetaConfig config = MetaConfig());
void Conv3DInferMeta(const MetaTensor& input,
const MetaTensor& filter,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search,
MetaTensor* out,
MetaConfig config = MetaConfig());
void ConvTransposeInferMeta(const MetaTensor& x,
const MetaTensor& filter,
......@@ -143,6 +143,20 @@ void CrossEntropyWithSoftmaxInferMeta(const MetaTensor& logits,
MetaTensor* loss,
MetaConfig config = MetaConfig());
void DepthwiseConvInferMeta(const MetaTensor& input,
const MetaTensor& filter,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search,
MetaTensor* out,
MetaConfig config = MetaConfig());
void DistInferMeta(const MetaTensor& x,
const MetaTensor& y,
float p,
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/dense_tensor.h"
namespace phi {
template <typename T, typename Context>
void ConvGradGradKernel(const Context& dev_ctx,
const DenseTensor& input,
const DenseTensor& filter,
const DenseTensor& out_grad,
const paddle::optional<DenseTensor>& input_grad_grad,
const paddle::optional<DenseTensor>& filter_grad_grad,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string& paddding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search,
DenseTensor* input_grad,
DenseTensor* filter_grad,
DenseTensor* out_grad_grad);
template <typename T, typename Context>
void Conv3DGradGradKernel(const Context& dev_ctx,
const DenseTensor& input,
const DenseTensor& filter,
const DenseTensor& out_grad,
const paddle::optional<DenseTensor>& input_grad_grad,
const paddle::optional<DenseTensor>& filter_grad_grad,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string& paddding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search,
DenseTensor* input_grad,
DenseTensor* filter_grad,
DenseTensor* out_grad_grad);
} // namespace phi
......@@ -25,13 +25,10 @@ void ConvGradKernel(const Context& dev_ctx,
const DenseTensor& out_grad,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string& paddding_algorithm,
int groups,
const std::string& padding_algorithm,
const std::vector<int>& dilations,
int groups,
const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search,
DenseTensor* input_grad,
DenseTensor* filter_grad);
......@@ -42,7 +39,7 @@ void Conv3DGradKernel(const Context& dev_ctx,
const DenseTensor& out_grad,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string& paddding_algorithm,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::string& data_format,
......@@ -59,7 +56,7 @@ void DepthwiseConvGradKernel(const Context& dev_ctx,
const DenseTensor& out_grad,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string& paddding_algorithm,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::string& data_format,
......@@ -70,4 +67,41 @@ void DepthwiseConvGradKernel(const Context& dev_ctx,
DenseTensor* input_grad,
DenseTensor* filter_grad);
template <typename T, typename Context>
void ConvGradGradKernel(const Context& dev_ctx,
const DenseTensor& input,
const DenseTensor& filter,
const DenseTensor& out_grad,
const paddle::optional<DenseTensor>& input_grad_grad,
const paddle::optional<DenseTensor>& filter_grad_grad,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string& padding_algorithm,
const std::vector<int>& dilations,
int groups,
const std::string& data_format,
DenseTensor* input_grad,
DenseTensor* filter_grad,
DenseTensor* out_grad_grad);
template <typename T, typename Context>
void Conv3DGradGradKernel(const Context& dev_ctx,
const DenseTensor& input,
const DenseTensor& filter,
const DenseTensor& out_grad,
const paddle::optional<DenseTensor>& input_grad_grad,
const paddle::optional<DenseTensor>& filter_grad_grad,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search,
DenseTensor* input_grad,
DenseTensor* filter_grad,
DenseTensor* out_grad_grad);
} // namespace phi
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/conv_kernel.h"
#include "paddle/fluid/platform/cudnn_workspace_helper.h"
#include "paddle/phi/core/kernel_registry.h"
namespace phi {
template <typename T, typename Context>
void ConvInferKernel(const Context& dev_ctx,
const DenseTensor& input,
const DenseTensor& filter,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string& paddding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::string& data_format,
DenseTensor* out) {
ConvKernel<T, Context>(dev_ctx,
input,
filter,
strides,
paddings,
paddding_algorithm,
groups,
dilations,
data_format,
/*use_addto=*/false,
/*workspace_size_MB=*/
paddle::platform::GetDefaultConvWorkspaceSizeLimitMB(),
/*exhaustive_search=*/false,
out);
}
} // namespace phi
PD_REGISTER_KERNEL(
conv2d_infer, CPU, ALL_LAYOUT, phi::ConvInferKernel, float, double) {}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PD_REGISTER_KERNEL(
conv2d_infer, GPU, ALL_LAYOUT, phi::ConvInferKernel, float, double) {}
#endif
......@@ -25,12 +25,9 @@ void ConvKernel(const Context& dev_ctx,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations,
int groups,
const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search,
DenseTensor* out);
template <typename T, typename Context>
......@@ -54,7 +51,7 @@ void DepthwiseConvKernel(const Context& dev_ctx,
const DenseTensor& filter,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string& paddding_algorithm,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::string& data_format,
......@@ -64,16 +61,4 @@ void DepthwiseConvKernel(const Context& dev_ctx,
bool fuse_relu,
DenseTensor* out);
template <typename T, typename Context>
void ConvInferKernel(const Context& dev_ctx,
const DenseTensor& input,
const DenseTensor& filter,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string& paddding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::string& data_format,
DenseTensor* out);
} // namespace phi
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/conv_grad_grad_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/conv_grad_grad_kernel_impl.h"
namespace phi {
template <typename T, typename Context>
void Conv3DGradGradKernel(const Context& ctx,
const DenseTensor& input,
const DenseTensor& filter,
const DenseTensor& out_grad,
const paddle::optional<DenseTensor>& input_grad_grad,
const paddle::optional<DenseTensor>& filter_grad_grad,
const std::vector<int>& strides,
const std::vector<int>& paddings_t,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations_t,
const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search_t,
DenseTensor* input_grad,
DenseTensor* filter_grad,
DenseTensor* out_grad_grad) {
ConvGradGradKernel<T>(ctx,
input,
filter,
out_grad,
input_grad_grad,
filter_grad_grad,
strides,
paddings_t,
padding_algorithm,
groups,
dilations_t,
data_format,
use_addto,
workspace_size_MB,
exhaustive_search_t,
input_grad,
filter_grad,
out_grad_grad);
}
} // namespace phi
PD_REGISTER_KERNEL(
conv2d_grad_grad, CPU, ALL_LAYOUT, phi::ConvGradGradKernel, float, double) {
}
PD_REGISTER_KERNEL(conv3d_grad_grad,
CPU,
ALL_LAYOUT,
phi::Conv3DGradGradKernel,
float,
double) {}
......@@ -27,7 +27,7 @@ void DepthwiseConvGradKernel(const Context& dev_ctx,
const DenseTensor& out_grad,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string& paddding_algorithm,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::string& data_format,
......@@ -43,13 +43,10 @@ void DepthwiseConvGradKernel(const Context& dev_ctx,
out_grad,
strides,
paddings,
paddding_algorithm,
groups,
padding_algorithm,
dilations,
groups,
data_format,
use_addto,
workspace_size_MB,
exhaustive_search,
input_grad,
filter_grad);
}
......@@ -61,7 +58,7 @@ void Conv3DGradKernel(const Context& dev_ctx,
const DenseTensor& out_grad,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string& paddding_algorithm,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::string& data_format,
......@@ -76,17 +73,50 @@ void Conv3DGradKernel(const Context& dev_ctx,
out_grad,
strides,
paddings,
paddding_algorithm,
groups,
padding_algorithm,
dilations,
groups,
data_format,
use_addto,
workspace_size_MB,
exhaustive_search,
input_grad,
filter_grad);
}
template <typename T, typename Context>
void Conv3DGradGradKernel(const Context& ctx,
const DenseTensor& input,
const DenseTensor& filter,
const DenseTensor& out_grad,
const paddle::optional<DenseTensor>& input_grad_grad,
const paddle::optional<DenseTensor>& filter_grad_grad,
const std::vector<int>& strides,
const std::vector<int>& paddings_t,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations_t,
const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search_t,
DenseTensor* input_grad,
DenseTensor* filter_grad,
DenseTensor* out_grad_grad) {
ConvGradGradKernel<T>(ctx,
input,
filter,
out_grad,
input_grad_grad,
filter_grad_grad,
strides,
paddings_t,
padding_algorithm,
dilations_t,
groups,
data_format,
input_grad,
filter_grad,
out_grad_grad);
}
} // namespace phi
PD_REGISTER_KERNEL(
......@@ -101,3 +131,14 @@ PD_REGISTER_KERNEL(depthwise_conv2d_grad,
PD_REGISTER_KERNEL(
conv3d_grad, CPU, ALL_LAYOUT, phi::Conv3DGradKernel, float, double) {}
PD_REGISTER_KERNEL(
conv2d_grad_grad, CPU, ALL_LAYOUT, phi::ConvGradGradKernel, float, double) {
}
PD_REGISTER_KERNEL(conv3d_grad_grad,
CPU,
ALL_LAYOUT,
phi::Conv3DGradGradKernel,
float,
double) {}
......@@ -19,6 +19,30 @@
#include "paddle/phi/kernels/impl/conv_kernel_impl.h"
namespace phi {
template <typename T, typename Context>
void ConvKernel(const Context& dev_ctx,
const DenseTensor& input,
const DenseTensor& filter,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string& padding_algorithm,
const std::vector<int>& dilations,
int groups,
const std::string& data_format,
DenseTensor* out) {
ConvKernelImpl<T>(dev_ctx,
input,
filter,
strides,
paddings,
padding_algorithm,
groups,
dilations,
data_format,
out);
}
template <typename T, typename Context>
void DepthwiseConvKernel(const Context& dev_ctx,
const DenseTensor& input,
......@@ -34,19 +58,16 @@ void DepthwiseConvKernel(const Context& dev_ctx,
bool exhaustive_search,
bool fuse_relu,
DenseTensor* out) {
ConvKernel<T>(dev_ctx,
input,
filter,
strides,
paddings,
padding_algorithm,
groups,
dilations,
data_format,
use_addto,
workspace_size_MB,
exhaustive_search,
out);
ConvKernelImpl<T>(dev_ctx,
input,
filter,
strides,
paddings,
padding_algorithm,
groups,
dilations,
data_format,
out);
}
template <typename T, typename Context>
......@@ -63,19 +84,16 @@ void Conv3DKernel(const Context& dev_ctx,
int workspace_size_MB,
bool exhaustive_search,
DenseTensor* out) {
ConvKernel<T>(dev_ctx,
input,
filter,
strides,
paddings,
padding_algorithm,
groups,
dilations,
data_format,
use_addto,
workspace_size_MB,
exhaustive_search,
out);
ConvKernelImpl<T>(dev_ctx,
input,
filter,
strides,
paddings,
padding_algorithm,
groups,
dilations,
data_format,
out);
}
} // namespace phi
......
......@@ -12,6 +12,10 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef _USE_MATH_DEFINES
#define _USE_MATH_DEFINES // use M_2_SQRTPI on Windows
#endif
#include "paddle/phi/kernels/erfinv_grad_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
......
......@@ -12,10 +12,28 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef _USE_MATH_DEFINES
#define _USE_MATH_DEFINES // use M_2_SQRTPI on Windows
#endif
#include "paddle/phi/kernels/erfinv_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/erfinv_kernel_impl.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
namespace phi {
template <typename T, typename Context>
void ErfinvKernel(const Context& ctx, const DenseTensor& x, DenseTensor* out) {
ctx.template Alloc<T>(out);
auto eigen_in = EigenVector<T>::Flatten(x);
auto eigen_out = EigenVector<T>::Flatten(*out);
auto& place = *ctx.eigen_device();
constexpr T half = static_cast<T>(0.5);
constexpr T half_sqrt = static_cast<T>(M_SQRT1_2);
eigen_out.device(place) = (eigen_in * half + half).ndtri() * half_sqrt;
}
} // namespace phi
PD_REGISTER_KERNEL(erfinv, CPU, ALL_LAYOUT, phi::ErfinvKernel, float, double) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/conv_grad_grad_kernel.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/conv_grad_grad_kernel_impl.h"
PD_REGISTER_KERNEL(
conv2d_grad_grad, GPU, ALL_LAYOUT, phi::ConvGradGradKernel, float, double) {
}
......@@ -27,7 +27,7 @@ void Conv3DGradKernel(const Context& dev_ctx,
const DenseTensor& out_grad,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string& paddding_algorithm,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::string& data_format,
......@@ -42,13 +42,10 @@ void Conv3DGradKernel(const Context& dev_ctx,
out_grad,
strides,
paddings,
paddding_algorithm,
groups,
padding_algorithm,
dilations,
groups,
data_format,
use_addto,
workspace_size_MB,
exhaustive_search,
input_grad,
filter_grad);
}
......@@ -60,3 +57,7 @@ PD_REGISTER_KERNEL(
PD_REGISTER_KERNEL(
conv3d_grad, GPU, ALL_LAYOUT, phi::Conv3DGradKernel, float, double) {}
PD_REGISTER_KERNEL(
conv2d_grad_grad, GPU, ALL_LAYOUT, phi::ConvGradGradKernel, float, double) {
}
......@@ -20,6 +20,29 @@
namespace phi {
template <typename T, typename Context>
void ConvKernel(const Context& dev_ctx,
const DenseTensor& input,
const DenseTensor& filter,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string& padding_algorithm,
const std::vector<int>& dilations,
int groups,
const std::string& data_format,
DenseTensor* out) {
ConvKernelImpl<T>(dev_ctx,
input,
filter,
strides,
paddings,
padding_algorithm,
groups,
dilations,
data_format,
out);
}
template <typename T, typename Context>
void Conv3DKernel(const Context& dev_ctx,
const DenseTensor& input,
......@@ -34,19 +57,16 @@ void Conv3DKernel(const Context& dev_ctx,
int workspace_size_MB,
bool exhaustive_search,
DenseTensor* out) {
ConvKernel<T>(dev_ctx,
input,
filter,
strides,
paddings,
padding_algorithm,
groups,
dilations,
data_format,
use_addto,
workspace_size_MB,
exhaustive_search,
out);
ConvKernelImpl<T>(dev_ctx,
input,
filter,
strides,
paddings,
padding_algorithm,
groups,
dilations,
data_format,
out);
}
} // namespace phi
......
......@@ -12,6 +12,10 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef _USE_MATH_DEFINES
#define _USE_MATH_DEFINES // use M_2_SQRTPI on Windows
#endif
#include "paddle/phi/kernels/erfinv_grad_kernel.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
......
......@@ -42,18 +42,23 @@ void ConvCudnnKernel(const Context& ctx,
const std::vector<int>& strides,
const std::vector<int>& paddings_t,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations_t,
int groups,
const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search_t,
DenseTensor* output) {
ctx.template Alloc<T>(output);
std::vector<int> paddings = paddings_t;
std::vector<int> dilations = dilations_t;
bool exhaustive_search = FLAGS_cudnn_exhaustive_search || exhaustive_search_t;
bool has_exhaustive_search = ctx.HasDnnAttr("exhaustive_search");
VLOG(4) << "GPUContext contains `exhaustive_search`: "
<< has_exhaustive_search;
bool exhaustive_search_attr =
has_exhaustive_search
? PADDLE_GET_CONST(bool, ctx.GetDnnAttr("exhaustive_search"))
: false;
bool exhaustive_search =
FLAGS_cudnn_exhaustive_search || exhaustive_search_attr;
bool deterministic = FLAGS_cudnn_deterministic;
PADDLE_ENFORCE_EQ(exhaustive_search && deterministic,
false,
......@@ -402,12 +407,9 @@ void Conv3DCudnnKernel(const Context& dev_ctx,
strides,
paddings,
padding_algorithm,
groups,
dilations,
groups,
data_format,
use_addto,
workspace_size_MB,
exhaustive_search,
out);
}
......@@ -432,12 +434,9 @@ void DepthwiseConvCudnnKernel(const Context& dev_ctx,
strides,
paddings,
padding_algorithm,
groups,
dilations,
groups,
data_format,
use_addto,
workspace_size_MB,
exhaustive_search,
out);
}
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/operators/math/im2col.h"
#include "paddle/fluid/operators/math/vol2col.h"
#include "paddle/phi/kernels/conv_kernel.h"
#include "paddle/phi/kernels/cpu/conv_util.h"
#include "paddle/phi/kernels/funcs/batch_norm_utils.h"
#include "paddle/phi/kernels/funcs/blas/blas.h"
#include "paddle/phi/kernels/funcs/math_function.h"
namespace phi {
template <typename T, typename Context>
void ConvGradGradKernel(const Context& dev_ctx,
const DenseTensor& input,
const DenseTensor& filter,
const DenseTensor& out_grad,
const paddle::optional<DenseTensor>& input_grad_grad,
const paddle::optional<DenseTensor>& filter_grad_grad,
const std::vector<int>& strides_t,
const std::vector<int>& paddings_t,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations_t,
const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search,
DenseTensor* input_grad,
DenseTensor* filter_grad,
DenseTensor* out_grad_grad) {
const DenseTensor* X = &input;
const DenseTensor* dY = &out_grad;
const DenseTensor* ddX = input_grad_grad.get_ptr();
const DenseTensor* ddW_in = filter_grad_grad.get_ptr();
DenseTensor* ddY = out_grad_grad;
DenseTensor* dW = filter_grad;
DenseTensor* dX = input_grad;
DenseTensor W = filter;
if (!ddY && !dW && !dX) return;
const std::vector<int> strides = strides_t;
std::vector<int> paddings = paddings_t;
std::vector<int> dilations = dilations_t;
const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC");
// transform Tensor
DenseTensor transformed_X(X->type());
DenseTensor transformed_dY(dY->type());
DenseTensor transformed_ddX(X->type());
if (channel_last) {
ResizeToChannelFirst<Context, T>(dev_ctx, X, &transformed_X);
TransToChannelFirst<Context, T>(dev_ctx, X, &transformed_X);
ResizeToChannelFirst<Context, T>(dev_ctx, dY, &transformed_dY);
TransToChannelFirst<Context, T>(dev_ctx, dY, &transformed_dY);
if (ddX) {
ResizeToChannelFirst<Context, T>(dev_ctx, ddX, &transformed_ddX);
TransToChannelFirst<Context, T>(dev_ctx, ddX, &transformed_ddX);
}
} else {
transformed_X = *X;
transformed_dY = *dY;
if (ddX) {
transformed_ddX = *ddX;
}
}
// update padding and dilation
auto in_dims = transformed_X.dims();
auto filter_dims = W.dims();
DDim in_data_dims = slice_ddim(in_dims, 2, in_dims.size());
DDim filter_data_dims = slice_ddim(filter_dims, 2, filter_dims.size());
std::vector<int> ksize = vectorize<int>(filter_data_dims);
UpdatePaddingAndDilation(
&paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize);
const int batch_size = static_cast<int>(transformed_X.dims()[0]);
std::vector<int64_t> filter_shape_vec(vectorize(W.dims()));
std::vector<int64_t> output_shape_vec(vectorize(transformed_dY.dims()));
size_t data_dim = filter_shape_vec.size() - 2;
std::vector<int64_t> col_shape_vec(1 + 2 * data_dim);
// col_shape [in_channel/group, kh, kw, oh, ow]
col_shape_vec[0] = transformed_X.dims()[1] / groups;
for (size_t j = 0; j < data_dim; ++j) {
col_shape_vec[j + 1] = filter_shape_vec[j + 2];
col_shape_vec[j + data_dim + 1] = output_shape_vec[j + 2];
}
DDim col_shape(make_ddim(col_shape_vec));
// col_matrix_shape [in_channel/group * kh * kw, oh * ow]
DDim col_matrix_shape = flatten_to_2d(col_shape, data_dim + 1);
// input_shape [Cin, H, W]
DDim input_shape =
slice_ddim(transformed_X.dims(), 1, transformed_X.dims().size());
// filter_matrix_shape [Cout, Cin * kh * kw]
DDim filter_matrix_shape = {W.dims()[0], W.numel() / W.dims()[0]};
W.Resize(filter_matrix_shape);
DDim output_matrix_shape = {
transformed_dY.dims()[1],
transformed_dY.numel() /
(transformed_dY.dims()[0] * transformed_dY.dims()[1])};
int in_step = static_cast<int>(transformed_X.dims()[1]) / groups;
int out_step = static_cast<int>(transformed_dY.dims()[1]) / groups;
bool is_expand = IsExpand(filter_shape_vec, strides, paddings, dilations);
DenseTensor col;
DenseTensor col_matrix;
if (is_expand) {
col.Resize(col_shape);
dev_ctx.template Alloc<T>(&col);
col_matrix.ShareDataWith(col);
col_matrix.Resize(col_matrix_shape);
}
phi::funcs::SetConstant<Context, T> set_zero;
auto blas = phi::funcs::GetBlas<Context, T>(dev_ctx);
// dx convolution double grad: gemm + col2im(col2vol)
// dx = ddw * dy ==> dx(N, Cin, H, W), ddw(Cout, Cin, kh, kw), dy(N, Cout,
// oH, oW)
if (dX && ddW_in) {
Tensor ddW;
ddW.ShareDataWith(*ddW_in).Resize(filter_matrix_shape);
dev_ctx.template Alloc<T>(dX);
DenseTensor transformed_dX(dX->type());
if (channel_last) {
ResizeToChannelFirst<Context, T>(dev_ctx, dX, &transformed_dX);
} else {
transformed_dX = *dX;
}
// if is_expand is false, the operation of set_zero is unnecessary
// because math::matmul will reset dx
if (is_expand) {
set_zero(dev_ctx, &transformed_dX, static_cast<T>(0));
}
paddle::operators::math::Col2VolFunctor<Context, T> col2vol;
paddle::operators::math::
Col2ImFunctor<paddle::operators::math::ColFormat::kCFO, Context, T>
col2im;
for (int i = 0; i < batch_size; i++) {
DenseTensor dy_batch =
transformed_dY.Slice(i, i + 1).Resize(output_matrix_shape);
DenseTensor dx_batch = transformed_dX.Slice(i, i + 1).Resize(input_shape);
for (int g = 0; g < groups; g++) {
// gemm
DenseTensor dy_slice = dy_batch.Slice(g * out_step, (g + 1) * out_step);
DenseTensor ddw_slice = ddW.Slice(g * out_step, (g + 1) * out_step);
DenseTensor dx_slice = dx_batch.Slice(g * in_step, (g + 1) * in_step);
if (!is_expand) {
col_matrix.ShareDataWith(dx_slice);
col_matrix.Resize(col_matrix_shape);
}
blas.MatMul(
ddw_slice, true, dy_slice, false, T(1.0), &col_matrix, T(0.0));
if (is_expand && data_dim == 2U) {
col2im(dev_ctx,
col,
dilations,
strides,
std::vector<int>{
paddings[0], paddings[2], paddings[1], paddings[3]},
&dx_slice);
} else if (is_expand && data_dim == 3U) {
col2vol(dev_ctx, col, dilations, strides, paddings, &dx_slice);
}
}
}
if (channel_last) {
TransToChannelLast<Context, T>(dev_ctx, &transformed_dX, dX);
}
}
// dw = ddx * dy ==> dw(Cout, Cin, kh, kw), ddx(N, Cin, H, W), dy(N, Cout,
// oH, oW)
// dw convolution double grad: im2col(vol2col) + gemm
if (dW && ddX) {
dev_ctx.template Alloc<T>(dW);
set_zero(dev_ctx, dW, static_cast<T>(0));
DenseTensor dW_arr = *dW;
dW_arr.Resize(filter_matrix_shape);
paddle::operators::math::
Im2ColFunctor<paddle::operators::math::ColFormat::kCFO, Context, T>
im2col;
paddle::operators::math::Vol2ColFunctor<Context, T> vol2col;
for (int i = 0; i < batch_size; ++i) {
DenseTensor dy_batch =
transformed_dY.Slice(i, i + 1).Resize(output_matrix_shape);
Tensor ddx_batch = transformed_ddX.Slice(i, i + 1).Resize(input_shape);
for (int g = 0; g < groups; ++g) {
// im2col
DenseTensor dy_slice = dy_batch.Slice(g * out_step, (g + 1) * out_step);
DenseTensor ddx_slice = ddx_batch.Slice(g * in_step, (g + 1) * in_step);
if (!is_expand) {
col.ShareDataWith(ddx_slice);
col_matrix.ShareDataWith(col);
col_matrix.Resize(col_matrix_shape);
} else if (data_dim == 2U) {
im2col(dev_ctx,
ddx_slice,
dilations,
strides,
std::vector<int>{
paddings[0], paddings[2], paddings[1], paddings[3]},
&col);
} else if (data_dim == 3U) {
vol2col(dev_ctx, ddx_slice, dilations, strides, paddings, &col);
}
DenseTensor dw_slice = dW_arr.Slice(g * out_step, (g + 1) * out_step);
blas.MatMul(
dy_slice, false, col_matrix, true, T(1.0), &dw_slice, T(1.0));
}
}
}
// ddy = w * ddx + x * ddw ==> ddy(N, Cout, oH, oW), x/ddx(N, Cin, H, W),
// w/ddw(Cout, Cin, kh, kw)
// ddy convolution double grad: im2col(vol2col) + gemm
if (ddY) {
dev_ctx.template Alloc<T>(ddY);
DenseTensor transformed_ddY(ddY->type());
if (channel_last) {
ResizeToChannelFirst<Context, T>(dev_ctx, ddY, &transformed_ddY);
} else {
transformed_ddY = *ddY;
}
set_zero(dev_ctx, &transformed_ddY, static_cast<T>(0));
paddle::operators::math::
Im2ColFunctor<paddle::operators::math::ColFormat::kCFO, Context, T>
im2col;
paddle::operators::math::Vol2ColFunctor<Context, T> vol2col;
for (int i = 0; i < batch_size; ++i) {
DenseTensor ddy_batch =
transformed_ddY.Slice(i, i + 1).Resize(output_matrix_shape);
for (int g = 0; g < groups; ++g) {
// gemm
DenseTensor ddy_slice =
ddy_batch.Slice(g * out_step, (g + 1) * out_step);
if (ddX) {
DenseTensor ddx_batch =
transformed_ddX.Slice(i, i + 1).Resize(input_shape);
DenseTensor ddx_slice =
ddx_batch.Slice(g * in_step, (g + 1) * in_step);
if (!is_expand) {
col.ShareDataWith(ddx_slice);
col_matrix.ShareDataWith(col);
col_matrix.Resize(col_matrix_shape);
} else if (data_dim == 2U) {
im2col(dev_ctx,
ddx_slice,
dilations,
strides,
std::vector<int>{
paddings[0], paddings[2], paddings[1], paddings[3]},
&col);
} else if (data_dim == 3U) {
vol2col(dev_ctx, ddx_slice, dilations, strides, paddings, &col);
}
DenseTensor w_slice = W.Slice(g * out_step, (g + 1) * out_step);
blas.MatMul(
w_slice, false, col_matrix, false, T(1.0), &ddy_slice, T(0.0));
}
if (ddW_in) {
DenseTensor x_batch =
transformed_X.Slice(i, i + 1).Resize(input_shape);
DenseTensor x_slice = x_batch.Slice(g * in_step, (g + 1) * in_step);
DenseTensor ddW;
ddW.ShareDataWith(*ddW_in).Resize(filter_matrix_shape);
if (!is_expand) {
col.ShareDataWith(x_slice);
col_matrix.ShareDataWith(col);
col_matrix.Resize(col_matrix_shape);
} else if (data_dim == 2U) {
im2col(dev_ctx,
x_slice,
dilations,
strides,
std::vector<int>{
paddings[0], paddings[2], paddings[1], paddings[3]},
&col);
} else if (data_dim == 3U) {
vol2col(dev_ctx, x_slice, dilations, strides, paddings, &col);
}
// gemm
DenseTensor ddw_slice = ddW.Slice(g * out_step, (g + 1) * out_step);
blas.MatMul(
ddw_slice, false, col_matrix, false, T(1.0), &ddy_slice, T(1.0));
}
}
}
if (channel_last) {
TransToChannelLast<Context, T>(dev_ctx, &transformed_ddY, ddY);
}
}
}
} // namespace phi
......@@ -16,7 +16,6 @@
#include "paddle/fluid/operators/math/im2col.h"
#include "paddle/fluid/operators/math/vol2col.h"
#include "paddle/phi/kernels/conv_grad_kernel.h"
#include "paddle/phi/kernels/cpu/conv_util.h"
#include "paddle/phi/kernels/funcs/batch_norm_utils.h"
#include "paddle/phi/kernels/funcs/blas/blas.h"
......@@ -32,12 +31,9 @@ void ConvGradKernel(const Context& dev_ctx,
const std::vector<int>& strides,
const std::vector<int>& paddings_t,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations_t,
int groups,
const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search,
DenseTensor* input_grad,
DenseTensor* filter_grad) {
// The filter and filter_grad will be reshaped in the calculations,
......@@ -254,4 +250,304 @@ void ConvGradKernel(const Context& dev_ctx,
}
}
template <typename T, typename Context>
void ConvGradGradKernel(const Context& dev_ctx,
const DenseTensor& input,
const DenseTensor& filter,
const DenseTensor& out_grad,
const paddle::optional<DenseTensor>& input_grad_grad,
const paddle::optional<DenseTensor>& filter_grad_grad,
const std::vector<int>& strides_t,
const std::vector<int>& paddings_t,
const std::string& padding_algorithm,
const std::vector<int>& dilations_t,
int groups,
const std::string& data_format,
DenseTensor* input_grad,
DenseTensor* filter_grad,
DenseTensor* out_grad_grad) {
const DenseTensor* X = &input;
const DenseTensor* dY = &out_grad;
const DenseTensor* ddX = input_grad_grad.get_ptr();
const DenseTensor* ddW_in = filter_grad_grad.get_ptr();
DenseTensor* ddY = out_grad_grad;
DenseTensor* dW = filter_grad;
DenseTensor* dX = input_grad;
DenseTensor W = filter;
if (!ddY && !dW && !dX) return;
const std::vector<int> strides = strides_t;
std::vector<int> paddings = paddings_t;
std::vector<int> dilations = dilations_t;
const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC");
// transform Tensor
DenseTensor transformed_X(X->type());
DenseTensor transformed_dY(dY->type());
DenseTensor transformed_ddX(X->type());
if (channel_last) {
ResizeToChannelFirst<Context, T>(dev_ctx, X, &transformed_X);
TransToChannelFirst<Context, T>(dev_ctx, X, &transformed_X);
ResizeToChannelFirst<Context, T>(dev_ctx, dY, &transformed_dY);
TransToChannelFirst<Context, T>(dev_ctx, dY, &transformed_dY);
if (ddX) {
ResizeToChannelFirst<Context, T>(dev_ctx, ddX, &transformed_ddX);
TransToChannelFirst<Context, T>(dev_ctx, ddX, &transformed_ddX);
}
} else {
transformed_X = *X;
transformed_dY = *dY;
if (ddX) {
transformed_ddX = *ddX;
}
}
// update padding and dilation
auto in_dims = transformed_X.dims();
auto filter_dims = W.dims();
DDim in_data_dims = slice_ddim(in_dims, 2, in_dims.size());
DDim filter_data_dims = slice_ddim(filter_dims, 2, filter_dims.size());
std::vector<int> ksize = vectorize<int>(filter_data_dims);
UpdatePaddingAndDilation(
&paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize);
const int batch_size = static_cast<int>(transformed_X.dims()[0]);
std::vector<int64_t> filter_shape_vec(vectorize(W.dims()));
std::vector<int64_t> output_shape_vec(vectorize(transformed_dY.dims()));
size_t data_dim = filter_shape_vec.size() - 2;
std::vector<int64_t> col_shape_vec(1 + 2 * data_dim);
// col_shape [in_channel/group, kh, kw, oh, ow]
col_shape_vec[0] = transformed_X.dims()[1] / groups;
for (size_t j = 0; j < data_dim; ++j) {
col_shape_vec[j + 1] = filter_shape_vec[j + 2];
col_shape_vec[j + data_dim + 1] = output_shape_vec[j + 2];
}
DDim col_shape(make_ddim(col_shape_vec));
// col_matrix_shape [in_channel/group * kh * kw, oh * ow]
DDim col_matrix_shape = flatten_to_2d(col_shape, data_dim + 1);
// input_shape [Cin, H, W]
DDim input_shape =
slice_ddim(transformed_X.dims(), 1, transformed_X.dims().size());
// filter_matrix_shape [Cout, Cin * kh * kw]
DDim filter_matrix_shape = {W.dims()[0], W.numel() / W.dims()[0]};
W.Resize(filter_matrix_shape);
DDim output_matrix_shape = {
transformed_dY.dims()[1],
transformed_dY.numel() /
(transformed_dY.dims()[0] * transformed_dY.dims()[1])};
int in_step = static_cast<int>(transformed_X.dims()[1]) / groups;
int out_step = static_cast<int>(transformed_dY.dims()[1]) / groups;
bool is_expand = IsExpand(filter_shape_vec, strides, paddings, dilations);
DenseTensor col;
DenseTensor col_matrix;
if (is_expand) {
col.Resize(col_shape);
dev_ctx.template Alloc<T>(&col);
col_matrix.ShareDataWith(col);
col_matrix.Resize(col_matrix_shape);
}
phi::funcs::SetConstant<Context, T> set_zero;
auto blas = phi::funcs::GetBlas<Context, T>(dev_ctx);
// dx convolution double grad: gemm + col2im(col2vol)
// dx = ddw * dy ==> dx(N, Cin, H, W), ddw(Cout, Cin, kh, kw), dy(N, Cout,
// oH, oW)
if (dX && ddW_in) {
Tensor ddW;
ddW.ShareDataWith(*ddW_in).Resize(filter_matrix_shape);
dev_ctx.template Alloc<T>(dX);
DenseTensor transformed_dX(dX->type());
if (channel_last) {
ResizeToChannelFirst<Context, T>(dev_ctx, dX, &transformed_dX);
} else {
transformed_dX = *dX;
}
// if is_expand is false, the operation of set_zero is unnecessary
// because math::matmul will reset dx
if (is_expand) {
set_zero(dev_ctx, &transformed_dX, static_cast<T>(0));
}
paddle::operators::math::Col2VolFunctor<Context, T> col2vol;
paddle::operators::math::
Col2ImFunctor<paddle::operators::math::ColFormat::kCFO, Context, T>
col2im;
for (int i = 0; i < batch_size; i++) {
DenseTensor dy_batch =
transformed_dY.Slice(i, i + 1).Resize(output_matrix_shape);
DenseTensor dx_batch = transformed_dX.Slice(i, i + 1).Resize(input_shape);
for (int g = 0; g < groups; g++) {
// gemm
DenseTensor dy_slice = dy_batch.Slice(g * out_step, (g + 1) * out_step);
DenseTensor ddw_slice = ddW.Slice(g * out_step, (g + 1) * out_step);
DenseTensor dx_slice = dx_batch.Slice(g * in_step, (g + 1) * in_step);
if (!is_expand) {
col_matrix.ShareDataWith(dx_slice);
col_matrix.Resize(col_matrix_shape);
}
blas.MatMul(
ddw_slice, true, dy_slice, false, T(1.0), &col_matrix, T(0.0));
if (is_expand && data_dim == 2U) {
col2im(dev_ctx,
col,
dilations,
strides,
std::vector<int>{
paddings[0], paddings[2], paddings[1], paddings[3]},
&dx_slice);
} else if (is_expand && data_dim == 3U) {
col2vol(dev_ctx, col, dilations, strides, paddings, &dx_slice);
}
}
}
if (channel_last) {
TransToChannelLast<Context, T>(dev_ctx, &transformed_dX, dX);
}
}
// dw = ddx * dy ==> dw(Cout, Cin, kh, kw), ddx(N, Cin, H, W), dy(N, Cout,
// oH, oW)
// dw convolution double grad: im2col(vol2col) + gemm
if (dW && ddX) {
dev_ctx.template Alloc<T>(dW);
set_zero(dev_ctx, dW, static_cast<T>(0));
DenseTensor dW_arr = *dW;
dW_arr.Resize(filter_matrix_shape);
paddle::operators::math::
Im2ColFunctor<paddle::operators::math::ColFormat::kCFO, Context, T>
im2col;
paddle::operators::math::Vol2ColFunctor<Context, T> vol2col;
for (int i = 0; i < batch_size; ++i) {
DenseTensor dy_batch =
transformed_dY.Slice(i, i + 1).Resize(output_matrix_shape);
Tensor ddx_batch = transformed_ddX.Slice(i, i + 1).Resize(input_shape);
for (int g = 0; g < groups; ++g) {
// im2col
DenseTensor dy_slice = dy_batch.Slice(g * out_step, (g + 1) * out_step);
DenseTensor ddx_slice = ddx_batch.Slice(g * in_step, (g + 1) * in_step);
if (!is_expand) {
col.ShareDataWith(ddx_slice);
col_matrix.ShareDataWith(col);
col_matrix.Resize(col_matrix_shape);
} else if (data_dim == 2U) {
im2col(dev_ctx,
ddx_slice,
dilations,
strides,
std::vector<int>{
paddings[0], paddings[2], paddings[1], paddings[3]},
&col);
} else if (data_dim == 3U) {
vol2col(dev_ctx, ddx_slice, dilations, strides, paddings, &col);
}
DenseTensor dw_slice = dW_arr.Slice(g * out_step, (g + 1) * out_step);
blas.MatMul(
dy_slice, false, col_matrix, true, T(1.0), &dw_slice, T(1.0));
}
}
}
// ddy = w * ddx + x * ddw ==> ddy(N, Cout, oH, oW), x/ddx(N, Cin, H, W),
// w/ddw(Cout, Cin, kh, kw)
// ddy convolution double grad: im2col(vol2col) + gemm
if (ddY) {
dev_ctx.template Alloc<T>(ddY);
DenseTensor transformed_ddY(ddY->type());
if (channel_last) {
ResizeToChannelFirst<Context, T>(dev_ctx, ddY, &transformed_ddY);
} else {
transformed_ddY = *ddY;
}
set_zero(dev_ctx, &transformed_ddY, static_cast<T>(0));
paddle::operators::math::
Im2ColFunctor<paddle::operators::math::ColFormat::kCFO, Context, T>
im2col;
paddle::operators::math::Vol2ColFunctor<Context, T> vol2col;
for (int i = 0; i < batch_size; ++i) {
DenseTensor ddy_batch =
transformed_ddY.Slice(i, i + 1).Resize(output_matrix_shape);
for (int g = 0; g < groups; ++g) {
// gemm
DenseTensor ddy_slice =
ddy_batch.Slice(g * out_step, (g + 1) * out_step);
if (ddX) {
DenseTensor ddx_batch =
transformed_ddX.Slice(i, i + 1).Resize(input_shape);
DenseTensor ddx_slice =
ddx_batch.Slice(g * in_step, (g + 1) * in_step);
if (!is_expand) {
col.ShareDataWith(ddx_slice);
col_matrix.ShareDataWith(col);
col_matrix.Resize(col_matrix_shape);
} else if (data_dim == 2U) {
im2col(dev_ctx,
ddx_slice,
dilations,
strides,
std::vector<int>{
paddings[0], paddings[2], paddings[1], paddings[3]},
&col);
} else if (data_dim == 3U) {
vol2col(dev_ctx, ddx_slice, dilations, strides, paddings, &col);
}
DenseTensor w_slice = W.Slice(g * out_step, (g + 1) * out_step);
blas.MatMul(
w_slice, false, col_matrix, false, T(1.0), &ddy_slice, T(0.0));
}
if (ddW_in) {
DenseTensor x_batch =
transformed_X.Slice(i, i + 1).Resize(input_shape);
DenseTensor x_slice = x_batch.Slice(g * in_step, (g + 1) * in_step);
DenseTensor ddW;
ddW.ShareDataWith(*ddW_in).Resize(filter_matrix_shape);
if (!is_expand) {
col.ShareDataWith(x_slice);
col_matrix.ShareDataWith(col);
col_matrix.Resize(col_matrix_shape);
} else if (data_dim == 2U) {
im2col(dev_ctx,
x_slice,
dilations,
strides,
std::vector<int>{
paddings[0], paddings[2], paddings[1], paddings[3]},
&col);
} else if (data_dim == 3U) {
vol2col(dev_ctx, x_slice, dilations, strides, paddings, &col);
}
// gemm
DenseTensor ddw_slice = ddW.Slice(g * out_step, (g + 1) * out_step);
blas.MatMul(
ddw_slice, false, col_matrix, false, T(1.0), &ddy_slice, T(1.0));
}
}
}
if (channel_last) {
TransToChannelLast<Context, T>(dev_ctx, &transformed_ddY, ddY);
}
}
}
} // namespace phi
......@@ -13,9 +13,6 @@
// limitations under the License.
#pragma once
#ifndef _USE_MATH_DEFINES
#define _USE_MATH_DEFINES // use M_2_SQRTPI on Windows
#endif
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
......@@ -289,12 +289,9 @@ class Conv2D(layers.Layer):
self._stride,
self._padding,
"EXPLICIT",
self._groups if self._groups else 1,
self._dilation,
self._groups if self._groups else 1,
"NCHW",
False,
-1,
False,
)
if self.bias is not None:
pre_act = F.elementwise_add(pre_bias, self.bias, axis=1)
......
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册