未验证 提交 f3eccb3f 编写于 作者: H huangjiyi 提交者: GitHub

Support static graph code generation for conv2d, conv3d, depthwise_conv2d (#54201)

* update

* update cmake

* update

* update

* update

* update

* Revert "update cmake"

This reverts commit 1e1dc1b2bc9967b725201272607f939260070fd4.

* update

* update

* update

* update
上级 81c13b86
...@@ -456,7 +456,6 @@ if(WITH_MKLDNN) ...@@ -456,7 +456,6 @@ if(WITH_MKLDNN)
set(TEST_CONV_BN_PASS_DEPS set(TEST_CONV_BN_PASS_DEPS
conv_bn_fuse_pass conv_bn_fuse_pass
graph_to_program_pass graph_to_program_pass
conv_op
conv_transpose_op conv_transpose_op
batch_norm_op batch_norm_op
generated_op generated_op
......
此差异已折叠。
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <algorithm>
#include <string>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/layout_utils.h"
#include "paddle/phi/kernels/funcs/blas/blas.h"
#include "paddle/phi/kernels/funcs/im2col.h"
#include "paddle/phi/kernels/funcs/vol2col.h"
namespace paddle {
namespace operators {
// Base convolution operator definitions for other conv
// like operators to reuse the implementation.
inline int ConvOutputSize(
int input_size, int filter_size, int dilation, int padding, int stride) {
const int dkernel = dilation * (filter_size - 1) + 1;
int output_size = (input_size + 2 * padding - dkernel) / stride + 1;
PADDLE_ENFORCE_GT(
output_size,
0,
platform::errors::InvalidArgument(
"The output's size is expected to be greater than 0. "
"But received: output's size is %d. The output's size is computed by "
"((input_size + 2 * padding - (dilation * (filter_size - 1) + 1)) / "
"stride + 1), where input_size is %d, padding is %d, "
"filter_size is %d, dilation is %d, stride is %d.",
output_size,
input_size,
padding,
filter_size,
dilation,
stride));
return output_size;
}
inline int ConvOutputSize(int input_size,
int filter_size,
int dilation,
int padding_1,
int padding_2,
int stride) {
const int dkernel = dilation * (filter_size - 1) + 1;
int output_size = (input_size + padding_1 + padding_2 - dkernel) / stride + 1;
PADDLE_ENFORCE_GT(
output_size,
0,
platform::errors::InvalidArgument(
"The output's size is expected to be greater than 0. "
"But received: output's size is %d. The output's size is computed by "
"((input_size + padding_1 + padding_2 - (dilation * (filter_size - "
"1) + 1)) / stride + 1), where input_size is %d, padding is "
"(%d, %d), filter_size is %d, dilation is %d, stride is %d.",
output_size,
input_size,
padding_1,
padding_2,
filter_size,
dilation,
stride));
return output_size;
}
template <typename T = int>
inline void UpdatePaddingAndDilation(std::vector<T>* paddings,
std::vector<T>* dilation,
const std::string padding_algorithm,
const framework::DDim data_dims,
const std::vector<T>& strides,
const std::vector<T>& ksize) {
// set padding size == data_dims.size() * 2
auto data_shape = phi::vectorize<T>(data_dims);
if (static_cast<int>(paddings->size()) == data_dims.size()) {
for (int i = 0; i < data_dims.size(); ++i) {
T copy_pad = *(paddings->begin() + 2 * i);
paddings->insert(paddings->begin() + 2 * i + 1, copy_pad);
}
} else {
PADDLE_ENFORCE_EQ(
data_dims.size() * 2,
paddings->size(),
platform::errors::InvalidArgument(
"Attribute padding's size should be the same or twice as the "
"input's dimension. "
"But received: padding's size is %d, padding is [%s]; input's "
"dimension is %d, input's shape is [%s].",
paddings->size(),
phi::make_ddim(*paddings),
data_dims.size(),
data_dims));
}
// when padding_algorithm is "VALID" or "SAME"
if (padding_algorithm == "SAME") {
for (int i = 0; i < data_dims.size(); ++i) {
T out_size = (data_dims[i] + strides[i] - 1) / strides[i];
T pad_sum =
std::max((out_size - 1) * strides[i] + ksize[i] - data_shape[i],
static_cast<T>(0));
T pad_0 = pad_sum / 2;
T pad_1 = pad_sum - pad_0;
*(paddings->begin() + i * 2) = pad_0;
*(paddings->begin() + i * 2 + 1) = pad_1;
// dilation
*(dilation->begin() + i) = 1;
}
} else if (padding_algorithm == "VALID") {
for (auto it = paddings->begin(); it != paddings->end(); it++) {
*it = 0;
}
}
}
inline bool IsExpand(const std::vector<int64_t>& filter_dim,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::vector<int>& dilations) {
bool filter_1 = true, strides_1 = true, padding_0 = true, dilation_1 = true;
for (size_t j = 0; j < strides.size(); ++j) {
filter_1 = filter_1 && (static_cast<int>(filter_dim[j + 2]) == 1);
strides_1 = strides_1 && (strides[j] == 1);
padding_0 = padding_0 && (paddings[j] == 0);
dilation_1 = dilation_1 && (dilations[j] == 1);
}
if (paddings.size() != strides.size()) {
for (size_t j = 0; j < paddings.size(); ++j) {
padding_0 = padding_0 && (paddings[j] == 0);
}
}
return !(filter_1 && strides_1 && padding_0 && dilation_1);
}
// Define Op classes in .h file so that other conv
// operator implementations can reuse the code.
class Conv2DOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() final;
protected:
virtual void Apply() {}
};
class Conv3DOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() final;
protected:
virtual void Apply() {}
};
class ConvOpInferVarType : public framework::PassInDtypeAndVarTypeToOutput {
protected:
std::unordered_map<std::string, std::string>& GetInputOutputWithSameType()
const override {
static std::unordered_map<std::string, std::string> m{
{"Input", /*->*/ "Output"}};
return m;
}
};
class ConvOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
std::vector<int64_t> output_shape = ComputeOutputShape(ctx);
OP_INOUT_CHECK(ctx->HasOutput("Output"), "Output", "Output", "Conv");
ctx->SetOutputDim("Output", phi::make_ddim(output_shape));
ctx->ShareLoD("Input", "Output");
}
protected:
std::vector<int64_t> ComputeOutputShape(
framework::InferShapeContext* ctx) const;
phi::KernelKey GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override;
phi::KernelKey GetKernelTypeForVar(
const std::string& var_name,
const phi::DenseTensor& tensor,
const phi::KernelKey& expected_kernel_type) const override;
};
class ConvOpGrad : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override;
protected:
phi::KernelKey GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override;
phi::KernelKey GetKernelTypeForVar(
const std::string& var_name,
const phi::DenseTensor& tensor,
const phi::KernelKey& expected_kernel_type) const override;
};
class ConvOpDoubleGrad : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override;
protected:
phi::KernelKey GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override;
};
} // namespace operators
} // namespace paddle
...@@ -15,13 +15,42 @@ limitations under the License. */ ...@@ -15,13 +15,42 @@ limitations under the License. */
#include <string> #include <string>
#include <vector> #include <vector>
#include "paddle/fluid/operators/conv_op.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/generator/get_expected_kernel_func.h"
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
#include "paddle/phi/core/ddim.h" #include "paddle/phi/kernels/cpu/conv_util.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
inline int ConvOutputSize(int input_size,
int filter_size,
int dilation,
int padding_1,
int padding_2,
int stride) {
const int dkernel = dilation * (filter_size - 1) + 1;
int output_size = (input_size + padding_1 + padding_2 - dkernel) / stride + 1;
PADDLE_ENFORCE_GT(
output_size,
0,
platform::errors::InvalidArgument(
"The output's size is expected to be greater than 0. "
"But received: output's size is %d. The output's size is computed by "
"((input_size + padding_1 + padding_2 - (dilation * (filter_size - "
"1) + 1)) / stride + 1), where input_size is %d, padding is "
"(%d, %d), filter_size is %d, dilation is %d, stride is %d.",
output_size,
input_size,
padding_1,
padding_2,
filter_size,
dilation,
stride));
return output_size;
}
// This fused conv follows the equation: // This fused conv follows the equation:
// y = act ( alpha1 * conv(x) + alpha2 * z + bias ). // y = act ( alpha1 * conv(x) + alpha2 * z + bias ).
// here, y is Output, // here, y is Output,
...@@ -30,9 +59,36 @@ namespace operators { ...@@ -30,9 +59,36 @@ namespace operators {
// bias is Bias // bias is Bias
// When `split_channels` is set, y will be split into multiple outputs, // When `split_channels` is set, y will be split into multiple outputs,
// each output has split_channels[i] number of channels. // each output has split_channels[i] number of channels.
class Conv2DFusionOpMaker : public Conv2DOpMaker { class Conv2DFusionOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("Input", "(Tensor), input 0 of conv2d op.");
AddInput("Filter", "(Tensor), input 1 of conv2d op.");
AddOutput("Output", "(Tensor), output 0 of conv2d op.");
AddAttr<std::vector<int>>("strides",
"(std::vector<int>), attribute 0 for conv2d op.")
.SetDefault({1, 1});
AddAttr<std::vector<int>>("paddings",
"(std::vector<int>), attribute 1 for conv2d op.")
.SetDefault({0, 0});
AddAttr<std::string>("padding_algorithm",
"(std::string), attribute 2 for conv2d op.")
.SetDefault("EXPLICIT");
AddAttr<std::vector<int>>("dilations",
"(std::vector<int>), attribute 3 for conv2d op.")
.SetDefault({1, 1});
AddAttr<int>("groups", "(int), attribute 4 for conv2d op.").SetDefault(1);
AddAttr<std::string>("data_format",
"(std::string), attribute 5 for conv2d op.")
.SetDefault("NCHW");
AddComment(R"DOC(
TODO: Documentation of conv2d op.
)DOC");
Apply();
}
protected: protected:
void Apply() override { void Apply() {
AddInput("Bias", AddInput("Bias",
"(Tensor) Bias to be added to each output of filter application." "(Tensor) Bias to be added to each output of filter application."
"The format of output tensor is X (one-dimensional) of size equal" "The format of output tensor is X (one-dimensional) of size equal"
...@@ -73,9 +129,9 @@ class Conv2DFusionOpMaker : public Conv2DOpMaker { ...@@ -73,9 +129,9 @@ class Conv2DFusionOpMaker : public Conv2DOpMaker {
} }
}; };
class Conv2DFusionOp : public operators::ConvOp { class Conv2DFusionOp : public framework::OperatorWithKernel {
public: public:
using operators::ConvOp::ConvOp; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContext* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
...@@ -275,7 +331,7 @@ class Conv2DFusionOp : public operators::ConvOp { ...@@ -275,7 +331,7 @@ class Conv2DFusionOp : public operators::ConvOp {
} }
std::vector<int> ksize = phi::vectorize<int>(filter_data_dims); std::vector<int> ksize = phi::vectorize<int>(filter_data_dims);
UpdatePaddingAndDilation( phi::UpdatePaddingAndDilation(
&paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize); &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize);
std::vector<int64_t> output_shape({in_dims[0]}); std::vector<int64_t> output_shape({in_dims[0]});
...@@ -301,6 +357,11 @@ class Conv2DFusionOp : public operators::ConvOp { ...@@ -301,6 +357,11 @@ class Conv2DFusionOp : public operators::ConvOp {
return output_shape; return output_shape;
} }
phi::KernelKey GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
return GetConvExpectedKernelType(ctx, this);
}
}; };
// TODO(qingqing): add gradient operator for conv2d_fusion // TODO(qingqing): add gradient operator for conv2d_fusion
...@@ -313,7 +374,6 @@ REGISTER_OPERATOR( ...@@ -313,7 +374,6 @@ REGISTER_OPERATOR(
conv2d_fusion, conv2d_fusion,
ops::Conv2DFusionOp, ops::Conv2DFusionOp,
ops::Conv2DFusionOpMaker, ops::Conv2DFusionOpMaker,
ops::ConvOpInferVarType,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>, paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>); paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
...@@ -323,6 +383,5 @@ REGISTER_OPERATOR( ...@@ -323,6 +383,5 @@ REGISTER_OPERATOR(
conv2d_fusion_cutlass, conv2d_fusion_cutlass,
ops::Conv2DFusionOp, ops::Conv2DFusionOp,
ops::Conv2DFusionOpMaker, ops::Conv2DFusionOpMaker,
ops::ConvOpInferVarType,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>, paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>); paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
...@@ -15,14 +15,44 @@ limitations under the License. */ ...@@ -15,14 +15,44 @@ limitations under the License. */
#include <string> #include <string>
#include <vector> #include <vector>
#include "paddle/fluid/operators/conv_op.h" #include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/generator/get_expected_kernel_func.h"
#include "paddle/phi/infermeta/multiary.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
class FusedConvOpMaker : public Conv2DOpMaker { class FusedConvOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("Input", "(Tensor), input 0 of conv2d op.");
AddInput("Filter", "(Tensor), input 1 of conv2d op.");
AddOutput("Output", "(Tensor), output 0 of conv2d op.");
AddAttr<std::vector<int>>("strides",
"(std::vector<int>), attribute 0 for conv2d op.")
.SetDefault({1, 1});
AddAttr<std::vector<int>>("paddings",
"(std::vector<int>), attribute 1 for conv2d op.")
.SetDefault({0, 0});
AddAttr<std::string>("padding_algorithm",
"(std::string), attribute 2 for conv2d op.")
.SetDefault("EXPLICIT");
AddAttr<std::vector<int>>("dilations",
"(std::vector<int>), attribute 3 for conv2d op.")
.SetDefault({1, 1});
AddAttr<int>("groups", "(int), attribute 4 for conv2d op.").SetDefault(1);
AddAttr<std::string>("data_format",
"(std::string), attribute 5 for conv2d op.")
.SetDefault("NCHW");
AddComment(R"DOC(
TODO: Documentation of conv2d op.
)DOC");
Apply();
}
protected: protected:
void Apply() override { void Apply() {
AddInput("Bias", AddInput("Bias",
"(Tensor) Bias to be added to each output of filter application." "(Tensor) Bias to be added to each output of filter application."
"The format of output tensor is X (one-dimensional) of size equal" "The format of output tensor is X (one-dimensional) of size equal"
...@@ -84,25 +114,43 @@ $$ ...@@ -84,25 +114,43 @@ $$
} }
}; };
class FusedConvOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
phi::KernelKey GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
return GetConvExpectedKernelType(ctx, this);
}
};
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
DECLARE_INFER_SHAPE_FUNCTOR(fused_conv2d,
FusedConv2DInferShapeFunctor,
PD_INFER_META(phi::FusedConvInferMeta));
DECLARE_INFER_SHAPE_FUNCTOR(fused_conv3d,
FusedConv3DInferShapeFunctor,
PD_INFER_META(phi::FusedConvInferMeta));
// fused_conv2d is only used for onednn inference. // fused_conv2d is only used for onednn inference.
REGISTER_OPERATOR( REGISTER_OPERATOR(
fused_conv2d, fused_conv2d,
ops::ConvOp, ops::FusedConvOp,
ops::FusedConvOpMaker, ops::FusedConvOpMaker,
ops::ConvOpInferVarType,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>, paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>); paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>,
FusedConv2DInferShapeFunctor);
// fused_conv3d is only used for onednn inference. // fused_conv3d is only used for onednn inference.
REGISTER_OPERATOR( REGISTER_OPERATOR(
fused_conv3d, fused_conv3d,
ops::ConvOp, ops::FusedConvOp,
ops::FusedConvOpMaker, ops::FusedConvOpMaker,
ops::ConvOpInferVarType,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>, paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>); paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>,
FusedConv3DInferShapeFunctor);
...@@ -14,7 +14,6 @@ ...@@ -14,7 +14,6 @@
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/conv_op.h"
#include "paddle/fluid/platform/device/device_wrapper.h" #include "paddle/fluid/platform/device/device_wrapper.h"
#include "paddle/fluid/platform/device/xpu/xpu_header.h" #include "paddle/fluid/platform/device/xpu/xpu_header.h"
......
...@@ -326,5 +326,31 @@ phi::KernelKey GetLayerNormExpectedKernelType( ...@@ -326,5 +326,31 @@ phi::KernelKey GetLayerNormExpectedKernelType(
return phi::KernelKey(input_data_type, ctx.GetPlace()); return phi::KernelKey(input_data_type, ctx.GetPlace());
} }
phi::KernelKey GetConvExpectedKernelType(
const framework::ExecutionContext& ctx,
const framework::OperatorWithKernel* op_ptr) {
auto input_data_type = op_ptr->IndicateVarDataType(ctx, "Input");
// todo enable data layout when it's ready
// (https://github.com/PaddlePaddle/Paddle/pull/20042)
if (input_data_type != framework::proto::VarType::INT8 &&
input_data_type != framework::proto::VarType::UINT8 &&
input_data_type != framework::proto::VarType::BF16) {
auto filter_data_type = framework::TransToProtoVarType(
ctx.Input<phi::DenseTensor>("Filter")->dtype());
PADDLE_ENFORCE_EQ(
input_data_type,
filter_data_type,
platform::errors::InvalidArgument(
"input and filter data type should be consistent, "
"but received input data type is %s and filter type "
"is %s",
paddle::framework::DataTypeToString(input_data_type),
paddle::framework::DataTypeToString(filter_data_type)));
}
return phi::KernelKey(input_data_type, ctx.GetPlace());
}
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
...@@ -80,5 +80,9 @@ phi::KernelKey GetLayerNormExpectedKernelType( ...@@ -80,5 +80,9 @@ phi::KernelKey GetLayerNormExpectedKernelType(
const framework::ExecutionContext& ctx, const framework::ExecutionContext& ctx,
const framework::OperatorWithKernel* op_ptr); const framework::OperatorWithKernel* op_ptr);
phi::KernelKey GetConvExpectedKernelType(
const framework::ExecutionContext& ctx,
const framework::OperatorWithKernel* op_ptr);
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
...@@ -324,6 +324,54 @@ ...@@ -324,6 +324,54 @@
output : Tensor(x_grad) output : Tensor(x_grad)
invoke : conj(out_grad) invoke : conj(out_grad)
- backward_op : conv2d_grad
forward : conv2d (Tensor input, Tensor filter, int[] strides={1, 1}, int[] paddings={0, 0}, str padding_algorithm="EXPLICIT", int[] dilations={1, 1}, int groups=1, str data_format="NCHW") -> Tensor(out)
args : (Tensor input, Tensor filter, Tensor out_grad, int[] strides, int[] paddings, str padding_algorithm, int[] dilations, int groups, str data_format)
output : Tensor(input_grad), Tensor(filter_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
param : [input, filter]
kernel :
func : conv2d_grad
data_type : input
backward : conv2d_grad_grad
- backward_op : conv2d_grad_grad
forward : conv2d_grad (Tensor input, Tensor filter, Tensor grad_out, int[] strides, int[] paddings, str padding_algorithm, int[] dilations, int groups, str data_format) -> Tensor(grad_input), Tensor(grad_filter)
args : (Tensor input, Tensor filter, Tensor grad_out, Tensor grad_input_grad, Tensor grad_filter_grad, int[] strides, int[] paddings, str padding_algorithm, int[] dilations, int groups, str data_format)
output : Tensor(input_grad), Tensor(filter_grad), Tensor(grad_out_grad)
infer_meta :
func : GeneralTernaryGradInferMeta
param: [input, filter, grad_out]
kernel :
func : conv2d_double_grad
data_type : input
optional : grad_input_grad, grad_filter_grad
- backward_op : conv3d_double_grad
forward : conv3d_grad (Tensor input, Tensor filter, Tensor grad_out, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format) -> Tensor(grad_input), Tensor(grad_filter)
args : (Tensor input, Tensor filter, Tensor grad_out, Tensor grad_input_grad, Tensor grad_filter_grad, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format)
output : Tensor(input_grad), Tensor(filter_grad), Tensor(grad_out_grad)
infer_meta :
func : GeneralTernaryGradInferMeta
param: [input, filter, grad_out]
kernel :
func : conv3d_double_grad
data_type : input
optional : grad_input_grad, grad_filter_grad
- backward_op : conv3d_grad
forward : conv3d (Tensor input, Tensor filter, int[] strides={1, 1, 1}, int[] paddings={0, 0, 0}, str padding_algorithm="EXPLICIT", int groups=1, int[] dilations={1, 1, 1}, str data_format="NCDHW") -> Tensor(out)
args : (Tensor input, Tensor filter, Tensor out_grad, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format)
output : Tensor(input_grad), Tensor(filter_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
param : [input, filter]
kernel :
func : conv3d_grad
data_type : input
backward : conv3d_double_grad
- backward_op : cos_double_grad - backward_op : cos_double_grad
forward : cos_grad (Tensor x, Tensor grad_out) -> Tensor(grad_x) forward : cos_grad (Tensor x, Tensor grad_out) -> Tensor(grad_x)
args : (Tensor x, Tensor grad_out, Tensor grad_x_grad) args : (Tensor x, Tensor grad_out, Tensor grad_x_grad)
...@@ -415,6 +463,30 @@ ...@@ -415,6 +463,30 @@
kernel : kernel :
func : cumprod_grad func : cumprod_grad
- backward_op : depthwise_conv2d_double_grad
forward : depthwise_conv2d_grad (Tensor input, Tensor filter, Tensor grad_out, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format) -> Tensor(grad_input), Tensor(grad_filter)
args : (Tensor input, Tensor filter, Tensor grad_out, Tensor grad_input_grad, Tensor grad_filter_grad, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format)
output : Tensor(input_grad), Tensor(filter_grad), Tensor(grad_out_grad)
infer_meta :
func : GeneralTernaryGradInferMeta
param: [input, filter, grad_out]
kernel :
func : depthwise_conv2d_double_grad
data_type : input
optional : grad_input_grad, grad_filter_grad
- backward_op : depthwise_conv2d_grad
forward : depthwise_conv2d (Tensor input, Tensor filter, int[] strides={1, 1}, int[] paddings={0, 0}, str padding_algorithm="EXPLICIT", int groups=1, int[] dilations={1, 1}, str data_format="NCHW") -> Tensor(out)
args : (Tensor input, Tensor filter, Tensor out_grad, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format)
output : Tensor(input_grad), Tensor(filter_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
param : [input, filter]
kernel :
func : depthwise_conv2d_grad
data_type : input
backward : depthwise_conv2d_double_grad
- backward_op : det_grad - backward_op : det_grad
forward : det (Tensor x) -> Tensor(out) forward : det (Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out, Tensor out_grad) args : (Tensor x, Tensor out, Tensor out_grad)
......
...@@ -137,28 +137,6 @@ ...@@ -137,28 +137,6 @@
no_need_buffer : x no_need_buffer : x
backward : concat_double_grad backward : concat_double_grad
- backward_op : conv2d_grad
forward : conv2d (Tensor input, Tensor filter, int[] strides, int[] paddings, str padding_algorithm, int[] dilations, int groups, str data_format) -> Tensor(out)
args : (Tensor input, Tensor filter, Tensor out_grad, int[] strides, int[] paddings, str padding_algorithm, int[] dilations, int groups, str data_format)
output : Tensor(input_grad), Tensor(filter_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
param : [input, filter]
kernel :
func : conv2d_grad
backward : conv2d_grad_grad
- backward_op : conv2d_grad_grad
forward : conv2d_grad (Tensor input, Tensor filter, Tensor grad_out, int[] strides, int[] paddings, str padding_algorithm, int[] dilations, int groups, str data_format) -> Tensor(grad_input), Tensor(grad_filter)
args : (Tensor input, Tensor filter, Tensor grad_out, Tensor grad_input_grad, Tensor grad_filter_grad, int[] strides, int[] paddings, str padding_algorithm, int[] dilations, int groups, str data_format)
output : Tensor(input_grad), Tensor(filter_grad), Tensor(grad_out_grad)
infer_meta :
func : GeneralTernaryGradInferMeta
param: [input, filter, grad_out]
kernel :
func : conv2d_double_grad
optional : grad_input_grad, grad_filter_grad
- backward_op : conv2d_transpose_double_grad - backward_op : conv2d_transpose_double_grad
forward : conv2d_transpose_grad(Tensor x, Tensor filter, Tensor grad_out, int[] strides, int[] paddings, int[] output_padding, IntArray output_size, str padding_algorithm, int groups, int[] dilations, str data_format) -> Tensor(grad_x), Tensor(grad_filter) forward : conv2d_transpose_grad(Tensor x, Tensor filter, Tensor grad_out, int[] strides, int[] paddings, int[] output_padding, IntArray output_size, str padding_algorithm, int groups, int[] dilations, str data_format) -> Tensor(grad_x), Tensor(grad_filter)
args : (Tensor x, Tensor filter, Tensor grad_out, Tensor grad_x_grad, Tensor grad_filter_grad, int[] strides, int[] paddings, int[] output_padding, IntArray output_size, str padding_algorithm, int groups, int[] dilations, str data_format) args : (Tensor x, Tensor filter, Tensor grad_out, Tensor grad_x_grad, Tensor grad_filter_grad, int[] strides, int[] paddings, int[] output_padding, IntArray output_size, str padding_algorithm, int groups, int[] dilations, str data_format)
...@@ -178,28 +156,6 @@ ...@@ -178,28 +156,6 @@
func : conv2d_transpose_grad func : conv2d_transpose_grad
backward : conv2d_transpose_double_grad backward : conv2d_transpose_double_grad
- backward_op : conv3d_double_grad
forward : conv3d_grad (Tensor input, Tensor filter, Tensor grad_out, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format) -> Tensor(grad_input), Tensor(grad_filter)
args : (Tensor input, Tensor filter, Tensor grad_out, Tensor grad_input_grad, Tensor grad_filter_grad, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format)
output : Tensor(input_grad), Tensor(filter_grad), Tensor(grad_out_grad)
infer_meta :
func : GeneralTernaryGradInferMeta
param: [input, filter, grad_out]
kernel :
func : conv3d_double_grad
optional : grad_input_grad, grad_filter_grad
- backward_op : conv3d_grad
forward : conv3d (Tensor input, Tensor filter, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format) -> Tensor(out)
args : (Tensor input, Tensor filter, Tensor out_grad, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format)
output : Tensor(input_grad), Tensor(filter_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
param : [input, filter]
kernel :
func : conv3d_grad
backward : conv3d_double_grad
- backward_op : conv3d_transpose_grad - backward_op : conv3d_transpose_grad
forward : conv3d_transpose(Tensor x, Tensor filter, int[] strides, int[] paddings, int[] output_padding, int[] output_size, str padding_algorithm, int groups, int[] dilations, str data_format) -> Tensor(out) forward : conv3d_transpose(Tensor x, Tensor filter, int[] strides, int[] paddings, int[] output_padding, int[] output_size, str padding_algorithm, int groups, int[] dilations, str data_format) -> Tensor(out)
args : (Tensor x, Tensor filter, Tensor out_grad, int[] strides, int[] paddings, int[] output_padding, int[] output_size, str padding_algorithm, int groups, int[] dilations, str data_format) args : (Tensor x, Tensor filter, Tensor out_grad, int[] strides, int[] paddings, int[] output_padding, int[] output_size, str padding_algorithm, int groups, int[] dilations, str data_format)
...@@ -232,29 +188,6 @@ ...@@ -232,29 +188,6 @@
data_type : x data_type : x
optional : mask optional : mask
- backward_op : depthwise_conv2d_double_grad
forward : depthwise_conv2d_grad (Tensor input, Tensor filter, Tensor grad_out, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format) -> Tensor(grad_input), Tensor(grad_filter)
args : (Tensor input, Tensor filter, Tensor grad_out, Tensor grad_input_grad, Tensor grad_filter_grad, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format)
output : Tensor(input_grad), Tensor(filter_grad), Tensor(grad_out_grad)
infer_meta :
func : GeneralTernaryGradInferMeta
param: [input, filter, grad_out]
kernel :
func : depthwise_conv2d_double_grad
optional : grad_input_grad, grad_filter_grad
- backward_op : depthwise_conv2d_grad
forward : depthwise_conv2d (Tensor input, Tensor filter, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format) -> Tensor(out)
args : (Tensor input, Tensor filter, Tensor out_grad, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format)
output : Tensor(input_grad), Tensor(filter_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
param : [input, filter]
kernel :
func : depthwise_conv2d_grad
param : [input, filter, out_grad, strides, paddings, padding_algorithm, groups, dilations, data_format]
backward : depthwise_conv2d_double_grad
- backward_op : depthwise_conv2d_transpose_grad - backward_op : depthwise_conv2d_transpose_grad
forward : depthwise_conv2d_transpose(Tensor x, Tensor filter, int[] strides, int[] paddings, int[] output_padding, IntArray output_size, str padding_algorithm, int groups, int[] dilations, str data_format) -> Tensor(out) forward : depthwise_conv2d_transpose(Tensor x, Tensor filter, int[] strides, int[] paddings, int[] output_padding, IntArray output_size, str padding_algorithm, int groups, int[] dilations, str data_format) -> Tensor(out)
args : (Tensor x, Tensor filter, Tensor out_grad, int[] strides, int[] paddings, int[] output_padding, IntArray output_size, str padding_algorithm, int groups, int[] dilations, str data_format) args : (Tensor x, Tensor filter, Tensor out_grad, int[] strides, int[] paddings, int[] output_padding, IntArray output_size, str padding_algorithm, int groups, int[] dilations, str data_format)
......
...@@ -172,15 +172,6 @@ ...@@ -172,15 +172,6 @@
func : concat func : concat
backward : concat_grad backward : concat_grad
- op : conv2d
args : (Tensor input, Tensor filter, int[] strides, int[] paddings, str padding_algorithm, int[] dilations, int groups, str data_format)
output : Tensor
infer_meta :
func : ConvInferMeta
kernel :
func : conv2d
backward : conv2d_grad
- op : conv2d_transpose - op : conv2d_transpose
args : (Tensor x, Tensor filter, int[] strides, int[] paddings, int[] output_padding, IntArray output_size, str padding_algorithm, int groups, int[] dilations, str data_format) args : (Tensor x, Tensor filter, int[] strides, int[] paddings, int[] output_padding, IntArray output_size, str padding_algorithm, int groups, int[] dilations, str data_format)
output : Tensor(out) output : Tensor(out)
...@@ -190,15 +181,6 @@ ...@@ -190,15 +181,6 @@
func : conv2d_transpose func : conv2d_transpose
backward : conv2d_transpose_grad backward : conv2d_transpose_grad
- op : conv3d
args : (Tensor input, Tensor filter, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format)
output : Tensor
infer_meta :
func : Conv3DInferMeta
kernel :
func : conv3d
backward : conv3d_grad
- op : conv3d_transpose - op : conv3d_transpose
args : (Tensor x, Tensor filter, int[] strides, int[] paddings, int[] output_padding, int[] output_size, str padding_algorithm, int groups, int[] dilations, str data_format) args : (Tensor x, Tensor filter, int[] strides, int[] paddings, int[] output_padding, int[] output_size, str padding_algorithm, int groups, int[] dilations, str data_format)
output : Tensor(out) output : Tensor(out)
...@@ -244,17 +226,6 @@ ...@@ -244,17 +226,6 @@
optional : mask optional : mask
backward : deformable_conv_grad backward : deformable_conv_grad
- op : depthwise_conv2d
args : (Tensor x, Tensor filter, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format)
output : Tensor(out)
infer_meta :
func : DepthwiseConvInferMeta
param : [x, filter, strides, paddings, padding_algorithm, groups, dilations, data_format]
kernel :
func : depthwise_conv2d
param : [x, filter, strides, paddings, padding_algorithm, groups, dilations, data_format]
backward : depthwise_conv2d_grad
- op : depthwise_conv2d_transpose - op : depthwise_conv2d_transpose
args : (Tensor x, Tensor filter, int[] strides, int[] paddings, int[] output_padding, IntArray output_size, str padding_algorithm, int groups, int[] dilations, str data_format) args : (Tensor x, Tensor filter, int[] strides, int[] paddings, int[] output_padding, IntArray output_size, str padding_algorithm, int groups, int[] dilations, str data_format)
output : Tensor(out) output : Tensor(out)
......
...@@ -479,11 +479,17 @@ ...@@ -479,11 +479,17 @@
out : Out out : Out
- op : conv2d - op : conv2d
backward : conv2d_grad backward : conv2d_grad, conv2d_grad_grad
inputs :
{input : Input, filter : Filter}
outputs :
out : Output
extra : extra :
attrs : [bool is_test = false, bool use_cudnn = true, bool use_mkldnn = false, bool use_addto = false, attrs : [bool is_test = false, bool use_cudnn = true, bool use_mkldnn = false, bool use_addto = false,
bool force_fp32_output = false, bool force_fp32_output = false,
int workspace_size_MB = phi::backends::gpu::GetDefaultConvWorkspaceSizeLimitMB(), bool exhaustive_search = false] int workspace_size_MB = phi::backends::gpu::GetDefaultConvWorkspaceSizeLimitMB(), bool exhaustive_search = false]
get_expected_kernel_type :
conv2d : GetConvExpectedKernelType
- op : conv2d_fusion - op : conv2d_fusion
extra : extra :
...@@ -503,12 +509,18 @@ ...@@ -503,12 +509,18 @@
int workspace_size_MB = phi::backends::gpu::GetDefaultConvWorkspaceSizeLimitMB()] int workspace_size_MB = phi::backends::gpu::GetDefaultConvWorkspaceSizeLimitMB()]
- op : conv3d - op : conv3d
backward : conv3d_grad backward : conv3d_grad, conv3d_double_grad (conv3d_grad_grad)
inputs :
{input : Input, filter : Filter}
outputs :
out : Output
extra : extra :
attrs : [bool is_test = false, bool use_cudnn = true, bool use_mkldnn = false, str mkldnn_data_type = "float32", bool fuse_relu = false, attrs : [bool is_test = false, bool use_cudnn = true, bool use_mkldnn = false, str mkldnn_data_type = "float32", bool fuse_relu = false,
str fuse_activation = "", float fuse_alpha = 0.0f, float fuse_beta = 0.0f, str fuse_activation = "", float fuse_alpha = 0.0f, float fuse_beta = 0.0f,
bool use_addto = false, bool fuse_residual_connection = false, bool force_fp32_output = false, bool use_addto = false, bool fuse_residual_connection = false, bool force_fp32_output = false,
int workspace_size_MB = phi::backends::gpu::GetDefaultConvWorkspaceSizeLimitMB(), bool exhaustive_search = false] int workspace_size_MB = phi::backends::gpu::GetDefaultConvWorkspaceSizeLimitMB(), bool exhaustive_search = false]
get_expected_kernel_type :
conv3d : GetConvExpectedKernelType
- op : conv3d_transpose - op : conv3d_transpose
backward : conv3d_transpose_grad backward : conv3d_transpose_grad
...@@ -603,14 +615,20 @@ ...@@ -603,14 +615,20 @@
out : Output out : Output
- op : depthwise_conv2d - op : depthwise_conv2d
backward : depthwise_conv2d_grad backward : depthwise_conv2d_grad, depthwise_conv2d_double_grad (depthwise_conv2d_grad_grad)
inputs :
{input : Input, filter : Filter}
outputs :
out : Output
extra : extra :
attrs : [bool is_test = false, bool fuse_relu_before_depthwise_conv = false, bool use_mkldnn = false, attrs : [bool is_test = false, bool use_cudnn = false, bool fuse_relu_before_depthwise_conv = false, bool use_mkldnn = false,
bool use_quantizer = false, str mkldnn_data_type = "float32", bool fuse_relu = false, bool use_quantizer = false, str mkldnn_data_type = "float32", bool fuse_relu = false,
str fuse_activation = "", float fuse_alpha = 0.0f, float fuse_beta = 0.0f, bool use_addto = false, str fuse_activation = "", float fuse_alpha = 0.0f, float fuse_beta = 0.0f, bool use_addto = false,
bool fuse_residual_connection = false, float Scale_in = 1.0f, float Scale_out = 1.0f, bool fuse_residual_connection = false, float Scale_in = 1.0f, float Scale_out = 1.0f,
float Scale_in_eltwise = 1.0f, 'float[] Scale_weights = {1.0f}', bool force_fp32_output = false, float Scale_in_eltwise = 1.0f, 'float[] Scale_weights = {1.0f}', bool force_fp32_output = false,
int workspace_size_MB = phi::backends::gpu::GetDefaultConvWorkspaceSizeLimitMB(), bool exhaustive_search = false] int workspace_size_MB = phi::backends::gpu::GetDefaultConvWorkspaceSizeLimitMB(), bool exhaustive_search = false]
get_expected_kernel_type :
depthwise_conv2d : GetConvExpectedKernelType
- op : depthwise_conv2d_transpose - op : depthwise_conv2d_transpose
backward : depthwise_conv2d_transpose_grad backward : depthwise_conv2d_transpose_grad
......
...@@ -90,6 +90,33 @@ ...@@ -90,6 +90,33 @@
of each place to be compatible with before. of each place to be compatible with before.
default : -1 default : -1
- op : conv2d
version :
- checkpoint : Upgrade conv2d, add a new attribute [use_addto].
action :
- add_attr : use_addto
comment : In order to support new feature (inplace addto strategy) for
gradient accumulation.
default : "false"
- op : conv3d
version :
- checkpoint : Upgrade conv3d, add a new attribute [use_addto].
action :
- add_attr : use_addto
comment : In order to support new feature (inplace addto strategy) for
gradient accumulation.
default : "false"
- op : depthwise_conv2d
version :
- checkpoint : Upgrade depthwise_conv2d, add a new attribute [use_addto].
action :
- add_attr : use_addto
comment : In order to support new feature (inplace addto strategy) for
gradient accumulation.
default : "false"
- op : embedding - op : embedding
version : version :
- checkpoint : Upgrade flip, add new attr [axis] and delete attr [dims] - checkpoint : Upgrade flip, add new attr [axis] and delete attr [dims]
......
...@@ -454,6 +454,24 @@ ...@@ -454,6 +454,24 @@
func : conj func : conj
backward : conj_grad backward : conj_grad
- op : conv2d
args : (Tensor input, Tensor filter, int[] strides={1, 1}, int[] paddings={0, 0}, str padding_algorithm="EXPLICIT", int[] dilations={1, 1}, int groups=1, str data_format="NCHW")
output : Tensor
infer_meta :
func : ConvInferMeta
kernel :
func : conv2d
backward : conv2d_grad
- op : conv3d
args : (Tensor input, Tensor filter, int[] strides={1, 1, 1}, int[] paddings={0, 0, 0}, str padding_algorithm="EXPLICIT", int groups=1, int[] dilations={1, 1, 1}, str data_format="NCDHW")
output : Tensor
infer_meta :
func : Conv3DInferMeta
kernel :
func : conv3d
backward : conv3d_grad
- op : cos - op : cos
args : (Tensor x) args : (Tensor x)
output : Tensor output : Tensor
...@@ -513,6 +531,15 @@ ...@@ -513,6 +531,15 @@
func : cumprod func : cumprod
backward : cumprod_grad backward : cumprod_grad
- op : depthwise_conv2d
args : (Tensor input, Tensor filter, int[] strides={1, 1}, int[] paddings={0, 0}, str padding_algorithm="EXPLICIT", int groups=1, int[] dilations={1, 1}, str data_format="NCHW")
output : Tensor(out)
infer_meta :
func : DepthwiseConvInferMeta
kernel :
func : depthwise_conv2d
backward : depthwise_conv2d_grad
- op : det - op : det
args : (Tensor x) args : (Tensor x)
output : Tensor output : Tensor
......
...@@ -24,6 +24,7 @@ limitations under the License. */ ...@@ -24,6 +24,7 @@ limitations under the License. */
#include "paddle/phi/core/infermeta_utils.h" #include "paddle/phi/core/infermeta_utils.h"
#include "paddle/phi/core/meta_tensor.h" #include "paddle/phi/core/meta_tensor.h"
#include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/core/utils/data_type.h"
#include "paddle/phi/infermeta/binary.h"
#include "paddle/phi/kernels/funcs/common_shape.h" #include "paddle/phi/kernels/funcs/common_shape.h"
#include "paddle/phi/kernels/funcs/concat_funcs.h" #include "paddle/phi/kernels/funcs/concat_funcs.h"
...@@ -3320,6 +3321,34 @@ void FusedAdamInferMeta( ...@@ -3320,6 +3321,34 @@ void FusedAdamInferMeta(
} }
} }
void FusedConvInferMeta(const MetaTensor& input,
const MetaTensor& filter,
const MetaTensor& bias,
const MetaTensor& residual_param,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string& padding_algorithm,
const std::vector<int>& dilations,
int groups,
const std::string& data_format,
const std::string& mkldnn_data_type,
const std::string& fuse_activation,
bool fuse_residual_conn,
bool force_fp32_output,
MetaTensor* out,
MetaConfig config) {
ConvInferMeta(input,
filter,
strides,
paddings,
padding_algorithm,
dilations,
groups,
data_format,
out,
config);
}
void MoeInferMeta(const MetaTensor& x, void MoeInferMeta(const MetaTensor& x,
const MetaTensor& gate, const MetaTensor& gate,
const MetaTensor& bmm0, const MetaTensor& bmm0,
......
...@@ -619,6 +619,23 @@ void FusedAdamInferMeta( ...@@ -619,6 +619,23 @@ void FusedAdamInferMeta(
std::vector<MetaTensor*> beta2_pows_out, std::vector<MetaTensor*> beta2_pows_out,
std::vector<MetaTensor*> master_params_out); std::vector<MetaTensor*> master_params_out);
void FusedConvInferMeta(const MetaTensor& input,
const MetaTensor& filter,
const MetaTensor& bias,
const MetaTensor& residual_param,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string& padding_algorithm,
const std::vector<int>& dilations,
int groups,
const std::string& data_format,
const std::string& mkldnn_data_type,
const std::string& fuse_activation,
bool fuse_residual_conn,
bool force_fp32_output,
MetaTensor* out,
MetaConfig config);
void MoeInferMeta(const MetaTensor& x, void MoeInferMeta(const MetaTensor& x,
const MetaTensor& gate, const MetaTensor& gate,
const MetaTensor& bmm0, const MetaTensor& bmm0,
......
...@@ -133,6 +133,29 @@ void FusedConv3DKernel(const Context& dev_ctx, ...@@ -133,6 +133,29 @@ void FusedConv3DKernel(const Context& dev_ctx,
out); out);
} }
KernelKey ConvGetKernelTypeForVar(const GetKernelTypeForVarContext* ctx) {
const std::string& var_name = ctx->GetVarName();
const DenseTensor& tensor = ctx->GetTensor();
const KernelKey& expected_kernel_type = ctx->GetKernelKey();
const AttributeMap& attrs = ctx->GetAttrs();
// Only input require reshaping, weights and
// bias are having shape in NCHW order
if ((var_name == "Input") &&
(expected_kernel_type.layout() == phi::DataLayout::ONEDNN) &&
(tensor.layout() != phi::DataLayout::ONEDNN)) {
auto it = attrs.find("data_format");
const std::string data_format = PADDLE_GET_CONST(std::string, it->second);
auto dl = phi::StringToDataLayout(data_format);
// Some models may have intentionally set "AnyLayout" for conv
// op. Treat this as NCHW (default data_format value)
if (dl != phi::DataLayout::kAnyLayout) {
return phi::KernelKey(tensor.place(), dl, expected_kernel_type.dtype());
}
}
return phi::KernelKey(
tensor.place(), tensor.layout(), expected_kernel_type.dtype());
}
} // namespace fusion } // namespace fusion
} // namespace phi } // namespace phi
...@@ -143,7 +166,11 @@ PD_REGISTER_KERNEL(fused_conv2d, ...@@ -143,7 +166,11 @@ PD_REGISTER_KERNEL(fused_conv2d,
float, float,
phi::dtype::bfloat16, phi::dtype::bfloat16,
uint8_t, uint8_t,
int8_t) {} int8_t) {
kernel->get_kerneltype_forvar_fn_ = phi::fusion::ConvGetKernelTypeForVar;
}
PD_REGISTER_KERNEL( PD_REGISTER_KERNEL(
fused_conv3d, OneDNN, ONEDNN, phi::fusion::FusedConv3DKernel, float) {} fused_conv3d, OneDNN, ONEDNN, phi::fusion::FusedConv3DKernel, float) {
kernel->get_kerneltype_forvar_fn_ = phi::fusion::ConvGetKernelTypeForVar;
}
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include "paddle/phi/kernels/conv_grad_kernel.h" #include "paddle/phi/kernels/conv_grad_kernel.h"
#include "paddle/phi/core/compat/get_kerneltype_forvar_utils.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/visit_type.h" #include "paddle/phi/core/visit_type.h"
#include "paddle/phi/kernels/funcs/data_layout_transform.h" #include "paddle/phi/kernels/funcs/data_layout_transform.h"
...@@ -235,6 +236,29 @@ void Conv3DGradKernel(const Context& dev_ctx, ...@@ -235,6 +236,29 @@ void Conv3DGradKernel(const Context& dev_ctx,
filter_grad); filter_grad);
} }
KernelKey ConvGradGetKernelTypeForVar(const GetKernelTypeForVarContext* ctx) {
const std::string& var_name = ctx->GetVarName();
const DenseTensor& tensor = ctx->GetTensor();
const KernelKey& expected_kernel_type = ctx->GetKernelKey();
const AttributeMap& attrs = ctx->GetAttrs();
// Only input require reshaping, weights and
// bias are having shape in NCHW order
if (((var_name == "Input") || (var_name == "Output@GRAD")) &&
(expected_kernel_type.layout() == phi::DataLayout::ONEDNN) &&
(tensor.layout() != phi::DataLayout::ONEDNN)) {
auto it = attrs.find("data_format");
const std::string data_format = PADDLE_GET_CONST(std::string, it->second);
auto dl = phi::StringToDataLayout(data_format);
// Some models may have intentionally set "AnyLayout" for pool
// op. Treat this as NCHW (default data_format value)
if (dl != phi::DataLayout::kAnyLayout) {
return phi::KernelKey(tensor.place(), dl, expected_kernel_type.dtype());
}
}
return phi::KernelKey(
tensor.place(), tensor.layout(), expected_kernel_type.dtype());
}
} // namespace phi } // namespace phi
PD_REGISTER_KERNEL(conv2d_grad, PD_REGISTER_KERNEL(conv2d_grad,
...@@ -242,13 +266,19 @@ PD_REGISTER_KERNEL(conv2d_grad, ...@@ -242,13 +266,19 @@ PD_REGISTER_KERNEL(conv2d_grad,
ONEDNN, ONEDNN,
phi::ConvGradKernel, phi::ConvGradKernel,
float, float,
phi::dtype::bfloat16) {} phi::dtype::bfloat16) {
kernel->get_kerneltype_forvar_fn_ = phi::ConvGradGetKernelTypeForVar;
}
PD_REGISTER_KERNEL(depthwise_conv2d_grad, PD_REGISTER_KERNEL(depthwise_conv2d_grad,
OneDNN, OneDNN,
ONEDNN, ONEDNN,
phi::DepthwiseConvGradKernel, phi::DepthwiseConvGradKernel,
float, float,
phi::dtype::bfloat16) {} phi::dtype::bfloat16) {
kernel->get_kerneltype_forvar_fn_ = phi::ConvGradGetKernelTypeForVar;
}
PD_REGISTER_KERNEL(conv3d_grad, OneDNN, ONEDNN, phi::Conv3DGradKernel, float) {} PD_REGISTER_KERNEL(conv3d_grad, OneDNN, ONEDNN, phi::Conv3DGradKernel, float) {
kernel->get_kerneltype_forvar_fn_ = phi::ConvGradGetKernelTypeForVar;
}
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include "paddle/phi/kernels/conv_kernel.h" #include "paddle/phi/kernels/conv_kernel.h"
#include "paddle/phi/core/compat/get_kerneltype_forvar_utils.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/visit_type.h" #include "paddle/phi/core/visit_type.h"
#include "paddle/phi/kernels/funcs/data_layout_transform.h" #include "paddle/phi/kernels/funcs/data_layout_transform.h"
...@@ -111,6 +112,29 @@ void Conv3DKernel(const Context& dev_ctx, ...@@ -111,6 +112,29 @@ void Conv3DKernel(const Context& dev_ctx,
out); out);
} }
KernelKey ConvGetKernelTypeForVar(const GetKernelTypeForVarContext* ctx) {
const std::string& var_name = ctx->GetVarName();
const DenseTensor& tensor = ctx->GetTensor();
const KernelKey& expected_kernel_type = ctx->GetKernelKey();
const AttributeMap& attrs = ctx->GetAttrs();
// Only input require reshaping, weights and
// bias are having shape in NCHW order
if ((var_name == "Input") &&
(expected_kernel_type.layout() == phi::DataLayout::ONEDNN) &&
(tensor.layout() != phi::DataLayout::ONEDNN)) {
auto it = attrs.find("data_format");
const std::string data_format = PADDLE_GET_CONST(std::string, it->second);
auto dl = phi::StringToDataLayout(data_format);
// Some models may have intentionally set "AnyLayout" for conv
// op. Treat this as NCHW (default data_format value)
if (dl != phi::DataLayout::kAnyLayout) {
return phi::KernelKey(tensor.place(), dl, expected_kernel_type.dtype());
}
}
return phi::KernelKey(
tensor.place(), tensor.layout(), expected_kernel_type.dtype());
}
} // namespace phi } // namespace phi
PD_REGISTER_KERNEL(conv2d, PD_REGISTER_KERNEL(conv2d,
...@@ -120,7 +144,9 @@ PD_REGISTER_KERNEL(conv2d, ...@@ -120,7 +144,9 @@ PD_REGISTER_KERNEL(conv2d,
float, float,
phi::dtype::bfloat16, phi::dtype::bfloat16,
uint8_t, uint8_t,
int8_t) {} int8_t) {
kernel->get_kerneltype_forvar_fn_ = phi::ConvGetKernelTypeForVar;
}
PD_REGISTER_KERNEL(depthwise_conv2d, PD_REGISTER_KERNEL(depthwise_conv2d,
OneDNN, OneDNN,
...@@ -129,6 +155,10 @@ PD_REGISTER_KERNEL(depthwise_conv2d, ...@@ -129,6 +155,10 @@ PD_REGISTER_KERNEL(depthwise_conv2d,
float, float,
phi::dtype::bfloat16, phi::dtype::bfloat16,
uint8_t, uint8_t,
int8_t) {} int8_t) {
kernel->get_kerneltype_forvar_fn_ = phi::ConvGetKernelTypeForVar;
}
PD_REGISTER_KERNEL(conv3d, OneDNN, ONEDNN, phi::Conv3DKernel, float) {} PD_REGISTER_KERNEL(conv3d, OneDNN, ONEDNN, phi::Conv3DKernel, float) {
kernel->get_kerneltype_forvar_fn_ = phi::ConvGetKernelTypeForVar;
}
...@@ -16,45 +16,6 @@ ...@@ -16,45 +16,6 @@
namespace phi { namespace phi {
KernelSignature Conv2dOpArgumentMapping(
const ArgumentMappingContext& ctx UNUSED) {
return KernelSignature("conv2d",
{"Input", "Filter"},
{"strides",
"paddings",
"padding_algorithm",
"dilations",
"groups",
"data_format"},
{"Output"});
}
KernelSignature Conv2dGradOpArgumentMapping(
const ArgumentMappingContext& ctx UNUSED) {
return KernelSignature("conv2d_grad",
{"Input", "Filter", "Output@GRAD"},
{"strides",
"paddings",
"padding_algorithm",
"dilations",
"groups",
"data_format"},
{"Input@GRAD", "Filter@GRAD"});
}
KernelSignature Conv2dDoubleGradOpArgumentMapping(
const ArgumentMappingContext& ctx UNUSED) {
return KernelSignature("conv2d_double_grad",
{"Input", "Filter", "DOutput", "DDInput", "DDFilter"},
{"strides",
"paddings",
"padding_algorithm",
"dilations",
"groups",
"data_format"},
{"DInput", "DFilter", "DDOutput"});
}
KernelSignature Conv2dFusionArgumentMapping( KernelSignature Conv2dFusionArgumentMapping(
const ArgumentMappingContext& ctx UNUSED) { const ArgumentMappingContext& ctx UNUSED) {
return KernelSignature("conv2d_fusion_cutlass", return KernelSignature("conv2d_fusion_cutlass",
...@@ -71,9 +32,5 @@ KernelSignature Conv2dFusionArgumentMapping( ...@@ -71,9 +32,5 @@ KernelSignature Conv2dFusionArgumentMapping(
} }
} // namespace phi } // namespace phi
PD_REGISTER_ARG_MAPPING_FN(conv2d, phi::Conv2dOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(conv2d_fusion_cutlass, PD_REGISTER_ARG_MAPPING_FN(conv2d_fusion_cutlass,
phi::Conv2dFusionArgumentMapping); phi::Conv2dFusionArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(conv2d_grad, phi::Conv2dGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(conv2d_grad_grad,
phi::Conv2dDoubleGradOpArgumentMapping);
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/core/compat/op_utils.h"
namespace phi {
KernelSignature Conv3dOpArgumentMapping(
const ArgumentMappingContext& ctx UNUSED) {
return KernelSignature("conv3d",
{"Input", "Filter"},
{
"strides",
"paddings",
"padding_algorithm",
"groups",
"dilations",
"data_format",
},
{"Output"});
}
KernelSignature Conv3dGradOpArgumentMapping(
const ArgumentMappingContext& ctx UNUSED) {
return KernelSignature("conv3d_grad",
{"Input", "Filter", "Output@GRAD"},
{"strides",
"paddings",
"padding_algorithm",
"groups",
"dilations",
"data_format"},
{"Input@GRAD", "Filter@GRAD"});
}
KernelSignature Conv3dDoubleGradOpArgumentMapping(
const ArgumentMappingContext& ctx UNUSED) {
return KernelSignature("conv3d_double_grad",
{"Input", "Filter", "DOutput", "DDInput", "DDFilter"},
{"strides",
"paddings",
"padding_algorithm",
"groups",
"dilations",
"data_format"},
{"DInput", "DFilter", "DDOutput"});
}
} // namespace phi
PD_REGISTER_BASE_KERNEL_NAME(conv3d_grad_grad, conv3d_double_grad);
PD_REGISTER_ARG_MAPPING_FN(conv3d, phi::Conv3dOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(conv3d_grad, phi::Conv3dGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(conv3d_grad_grad,
phi::Conv3dDoubleGradOpArgumentMapping);
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/core/compat/op_utils.h"
namespace phi {
KernelSignature DepthwiseConv2dOpArgumentMapping(
const ArgumentMappingContext& ctx UNUSED) {
return KernelSignature("depthwise_conv2d",
{"Input", "Filter"},
{"strides",
"paddings",
"padding_algorithm",
"groups",
"dilations",
"data_format"},
{"Output"});
}
KernelSignature DepthwiseConv2dGradOpArgumentMapping(
const ArgumentMappingContext& ctx UNUSED) {
return KernelSignature("depthwise_conv2d_grad",
{"Input", "Filter", "Output@GRAD"},
{"strides",
"paddings",
"padding_algorithm",
"groups",
"dilations",
"data_format"},
{"Input@GRAD", "Filter@GRAD"});
}
KernelSignature DepthwiseConv2dDoubleGradOpArgumentMapping(
const ArgumentMappingContext& ctx UNUSED) {
return KernelSignature("depthwise_conv2d_double_grad",
{"Input", "Filter", "DOutput", "DDInput", "DDFilter"},
{"strides",
"paddings",
"padding_algorithm",
"groups",
"dilations",
"data_format"},
{"DInput", "DFilter", "DDOutput"});
}
} // namespace phi
PD_REGISTER_BASE_KERNEL_NAME(depthwise_conv2d_grad_grad,
depthwise_conv2d_double_grad);
PD_REGISTER_ARG_MAPPING_FN(depthwise_conv2d,
phi::DepthwiseConv2dOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(depthwise_conv2d_grad,
phi::DepthwiseConv2dGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(depthwise_conv2d_grad_grad,
phi::DepthwiseConv2dDoubleGradOpArgumentMapping);
...@@ -43,7 +43,7 @@ if(WITH_GPU OR WITH_ROCM) ...@@ -43,7 +43,7 @@ if(WITH_GPU OR WITH_ROCM)
cc_test( cc_test(
test_cudnn_norm_conv test_cudnn_norm_conv
SRCS cudnn_norm_conv_test.cc SRCS cudnn_norm_conv_test.cc
DEPS conv_op DEPS generated_op
depthwise_conv depthwise_conv
tensor tensor
op_registry op_registry
......
...@@ -16,7 +16,6 @@ set(TEST_MKLDNN_CACHING_DEPS ...@@ -16,7 +16,6 @@ set(TEST_MKLDNN_CACHING_DEPS
elementwise_mul_op elementwise_mul_op
elementwise_add_op elementwise_add_op
activation_op activation_op
conv_op
phi phi
scope scope
device_context device_context
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册