提交 4f6b43a0 编写于 作者: A Adam 提交者: Tao Luo

Minor MKL-DNN conv int8 performance fixes (#20768)

test=develop
上级 392fa55c
...@@ -338,8 +338,6 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -338,8 +338,6 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
const auto& mkldnn_engine = dev_ctx.GetEngine(); const auto& mkldnn_engine = dev_ctx.GetEngine();
auto* input = ctx.Input<Tensor>("Input"); auto* input = ctx.Input<Tensor>("Input");
auto* filter = ctx.Input<Tensor>("Filter");
auto* bias = ctx.HasInput("Bias") ? ctx.Input<Tensor>("Bias") : nullptr;
auto* output = ctx.Output<Tensor>("Output"); auto* output = ctx.Output<Tensor>("Output");
PADDLE_ENFORCE_EQ(input->layout(), DataLayout::kMKLDNN, PADDLE_ENFORCE_EQ(input->layout(), DataLayout::kMKLDNN,
...@@ -347,11 +345,6 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -347,11 +345,6 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
PADDLE_ENFORCE_NE(input->format(), MKLDNNMemoryFormat::format_undef, PADDLE_ENFORCE_NE(input->format(), MKLDNNMemoryFormat::format_undef,
"Wrong format set for Input tensor"); "Wrong format set for Input tensor");
PADDLE_ENFORCE_EQ(filter->layout(), DataLayout::kMKLDNN,
"Wrong layout set for Filter tensor");
PADDLE_ENFORCE_NE(filter->format(), MKLDNNMemoryFormat::format_undef,
"Wrong format set for Filter tensor");
PADDLE_ENFORCE_GE( PADDLE_ENFORCE_GE(
input->dims().size(), 4, input->dims().size(), 4,
"Input must be with 4 or 5 dimensions, i.e. NCHW or NCDHW"); "Input must be with 4 or 5 dimensions, i.e. NCHW or NCDHW");
...@@ -359,57 +352,14 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -359,57 +352,14 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
input->dims().size(), 5, input->dims().size(), 5,
"Input must be with 4 or 5 dimensions, i.e. NCHW or NCDHW"); "Input must be with 4 or 5 dimensions, i.e. NCHW or NCDHW");
PADDLE_ENFORCE_GE(
filter->dims().size(), 4,
"Filter must be with 4 or 5 dimensions, i.e. OIHW or OIDHW");
PADDLE_ENFORCE_LE(
filter->dims().size(), 5,
"Filter must be with 4 or 5 dimensions, i.e. OIHW or OIDHW");
if (bias) {
PADDLE_ENFORCE_EQ(bias->layout(), DataLayout::kMKLDNN,
"Wrong layout set for Bias tensor");
PADDLE_ENFORCE_NE(bias->format(), MKLDNNMemoryFormat::format_undef,
"Wrong format set for Bias tensor");
PADDLE_ENFORCE_EQ(bias->dims().size(), 1,
"Bias must only have 1 dimension, i.e. X");
}
std::vector<int> strides = ctx.Attr<std::vector<int>>("strides");
std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings");
std::vector<int> dilations = ctx.Attr<std::vector<int>>("dilations");
int groups = ctx.Attr<int>("groups");
std::string fuse_activation = ctx.Attr<std::string>("fuse_activation"); std::string fuse_activation = ctx.Attr<std::string>("fuse_activation");
float fuse_alpha = ctx.Attr<float>("fuse_alpha");
float fuse_beta = ctx.Attr<float>("fuse_beta");
bool fuse_residual_conn = ctx.Attr<bool>("fuse_residual_connection"); bool fuse_residual_conn = ctx.Attr<bool>("fuse_residual_connection");
bool force_fp32_output = ctx.Attr<bool>("force_fp32_output");
bool unsigned_output = bool unsigned_output =
(fuse_activation == "relu" || fuse_activation == "relu6"); (fuse_activation == "relu" || fuse_activation == "relu6");
PADDLE_ENFORCE(!fuse_residual_conn || !force_fp32_output,
"residual fusion does not support force output with fp32");
bool is_conv3d = strides.size() == 3U;
PADDLE_ENFORCE(
is_conv3d
? dilations.size() == 3 && dilations[0] == 1 && dilations[1] == 1 &&
dilations[2] == 1
: dilations.size() == 2 && dilations[0] == 1 && dilations[1] == 1,
"dilation in convolution is not implemented yet");
PADDLE_ENFORCE_NE(is_conv3d, true,
"int8 does not support conv3d currently");
const T* input_data = input->data<T>(); const T* input_data = input->data<T>();
auto src_tz = paddle::framework::vectorize<int>(input->dims()); auto src_tz = paddle::framework::vectorize<int>(input->dims());
auto weights_tz = paddle::framework::vectorize<int>(filter->dims());
int g = std::max(groups, 1);
GetWeightsTz(weights_tz, g, is_conv3d);
auto dst_tz = paddle::framework::vectorize<int>(output->dims());
mkldnn::memory::data_type src_dt = mkldnn::memory::data_type src_dt =
paddle::framework::ToMKLDNNDataType(input->type()); paddle::framework::ToMKLDNNDataType(input->type());
...@@ -448,6 +398,63 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -448,6 +398,63 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
dev_ctx.GetBlob(prim_key)); dev_ctx.GetBlob(prim_key));
if (conv_p == nullptr || !is_test) { if (conv_p == nullptr || !is_test) {
float fuse_alpha = ctx.Attr<float>("fuse_alpha");
float fuse_beta = ctx.Attr<float>("fuse_beta");
bool force_fp32_output = ctx.Attr<bool>("force_fp32_output");
auto* filter = ctx.Input<Tensor>("Filter");
PADDLE_ENFORCE_EQ(filter->layout(), DataLayout::kMKLDNN,
"Wrong layout set for Filter tensor");
PADDLE_ENFORCE_NE(filter->format(), MKLDNNMemoryFormat::format_undef,
"Wrong format set for Filter tensor");
PADDLE_ENFORCE_GE(
filter->dims().size(), 4,
"Filter must be with 4 or 5 dimensions, i.e. OIHW or OIDHW");
PADDLE_ENFORCE_LE(
filter->dims().size(), 5,
"Filter must be with 4 or 5 dimensions, i.e. OIHW or OIDHW");
PADDLE_ENFORCE_EQ(
!fuse_residual_conn || !force_fp32_output, true,
"residual fusion does not support force output with fp32");
auto* bias = ctx.HasInput("Bias") ? ctx.Input<Tensor>("Bias") : nullptr;
if (bias) {
PADDLE_ENFORCE_EQ(bias->layout(), DataLayout::kMKLDNN,
"Wrong layout set for Bias tensor");
PADDLE_ENFORCE_NE(bias->format(), MKLDNNMemoryFormat::format_undef,
"Wrong format set for Bias tensor");
PADDLE_ENFORCE_EQ(bias->dims().size(), 1,
"Bias must only have 1 dimension, i.e. X");
}
std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings");
std::vector<int> dilations = ctx.Attr<std::vector<int>>("dilations");
std::vector<int> strides = ctx.Attr<std::vector<int>>("strides");
bool is_conv3d = strides.size() == 3U;
PADDLE_ENFORCE_NE(is_conv3d, true,
"int8 does not support conv3d currently");
int groups = ctx.Attr<int>("groups");
auto weights_tz = paddle::framework::vectorize<int>(filter->dims());
int g = std::max(groups, 1);
GetWeightsTz(weights_tz, g, is_conv3d);
auto dst_tz = paddle::framework::vectorize<int>(output->dims());
PADDLE_ENFORCE_EQ(
is_conv3d
? dilations.size() == 3 && dilations[0] == 1 &&
dilations[1] == 1 && dilations[2] == 1
: dilations.size() == 2 && dilations[0] == 1 && dilations[1] == 1,
true, "dilation in convolution is not implemented yet");
const K* filter_data = filter->data<K>(); const K* filter_data = filter->data<K>();
auto scale_in_data = ctx.Attr<float>("Scale_in"); auto scale_in_data = ctx.Attr<float>("Scale_in");
auto scale_in_eltwise_data = ctx.Attr<float>("Scale_in_eltwise"); auto scale_in_eltwise_data = ctx.Attr<float>("Scale_in_eltwise");
......
...@@ -205,7 +205,7 @@ inline void AppendKey(std::string* key, const std::vector<int>& dims) { ...@@ -205,7 +205,7 @@ inline void AppendKey(std::string* key, const std::vector<int>& dims) {
template <typename... ArgTypes> template <typename... ArgTypes>
inline std::string CreateKey(ArgTypes&&... args) { inline std::string CreateKey(ArgTypes&&... args) {
std::string key; std::string key;
key.reserve(256); key.reserve(64);
using expand_type = int[]; using expand_type = int[];
expand_type{0, (AppendKey(&key, std::forward<ArgTypes>(args)), 0)...}; expand_type{0, (AppendKey(&key, std::forward<ArgTypes>(args)), 0)...};
return key; return key;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册