未验证 提交 559e43ee 编写于 作者: J joanna.wozna.intel 提交者: GitHub

Small change in conv2d and quantize pass (#26671)

上级 33afeb31
...@@ -39,11 +39,8 @@ void CPUQuantizePlacementPass::ApplyImpl(ir::Graph* graph) const { ...@@ -39,11 +39,8 @@ void CPUQuantizePlacementPass::ApplyImpl(ir::Graph* graph) const {
if (op->GetAttrIfExists<bool>("use_quantizer")) { if (op->GetAttrIfExists<bool>("use_quantizer")) {
op->SetAttr("mkldnn_data_type", std::string("int8")); op->SetAttr("mkldnn_data_type", std::string("int8"));
} }
if (op_types_list.empty()) { if (std::find(op_types_list.begin(), op_types_list.end(), op->Type()) !=
op->SetAttr("mkldnn_data_type", std::string("int8")); op_types_list.end()) {
op->SetAttr("use_quantizer", true);
} else if (std::find(op_types_list.begin(), op_types_list.end(),
op->Type()) != op_types_list.end()) {
op->SetAttr("mkldnn_data_type", std::string("int8")); op->SetAttr("mkldnn_data_type", std::string("int8"));
op->SetAttr("use_quantizer", true); op->SetAttr("use_quantizer", true);
} }
...@@ -61,7 +58,10 @@ REGISTER_PASS(cpu_quantize_placement_pass, ...@@ -61,7 +58,10 @@ REGISTER_PASS(cpu_quantize_placement_pass,
// a vector of operator type names to be quantized ("conv2d" etc.) // a vector of operator type names to be quantized ("conv2d" etc.)
// the second param is the default value for this vector // the second param is the default value for this vector
.DefaultPassAttr("quantize_enabled_op_types", .DefaultPassAttr("quantize_enabled_op_types",
new std::unordered_set<std::string>()) new std::unordered_set<std::string>(
{"concat", "conv2d", "elementwise_add", "fc", "matmul",
"pool2d", "prior_box", "relu", "reshape2",
"transpose2"}))
// a vector of operator ids that are to be excluded from quantization // a vector of operator ids that are to be excluded from quantization
// the second param is the default value for this vector // the second param is the default value for this vector
.DefaultPassAttr("quantize_excluded_op_ids", new std::unordered_set<int>()); .DefaultPassAttr("quantize_excluded_op_ids", new std::unordered_set<int>());
...@@ -130,9 +130,9 @@ TEST(QuantizerPlacementPass, enabled_conv_excluded_one) { ...@@ -130,9 +130,9 @@ TEST(QuantizerPlacementPass, enabled_conv_excluded_one) {
MainTest({"conv2d"}, {4}, 1); MainTest({"conv2d"}, {4}, 1);
} }
TEST(QuantizerPlacementPass, excluded_none) { TEST(QuantizerPlacementPass, empty_list) {
// all operators quantized // no operator quantized
MainTest({}, {}, 6); MainTest({}, {}, 0);
} }
TEST(QuantizerPlacementPass, default_attr_value) { TEST(QuantizerPlacementPass, default_attr_value) {
......
...@@ -72,7 +72,7 @@ static mkldnn::memory::data_type GetDstType(bool is_int8, ...@@ -72,7 +72,7 @@ static mkldnn::memory::data_type GetDstType(bool is_int8,
return dst_dt; return dst_dt;
} }
template <typename T> template <typename T, typename K, typename T_out>
class ConvMKLDNNHandlerT class ConvMKLDNNHandlerT
: public platform::MKLDNNHandlerT<T, mkldnn::convolution_forward> { : public platform::MKLDNNHandlerT<T, mkldnn::convolution_forward> {
public: public:
...@@ -227,7 +227,7 @@ class ConvMKLDNNHandlerT ...@@ -227,7 +227,7 @@ class ConvMKLDNNHandlerT
platform::MKLDNNMemDesc(weights_tz, platform::MKLDNNGetDataType<T>(), platform::MKLDNNMemDesc(weights_tz, platform::MKLDNNGetDataType<T>(),
MKLDNNMemoryFormat::any); MKLDNNMemoryFormat::any);
const auto dst_md = platform::MKLDNNMemDesc( const auto dst_md = platform::MKLDNNMemDesc(
dst_tz, platform::MKLDNNGetDataType<T>(), chosen_memory_format); dst_tz, platform::MKLDNNGetDataType<T_out>(), chosen_memory_format);
const auto fwd_prop_kind = is_test ? mkldnn::prop_kind::forward_inference const auto fwd_prop_kind = is_test ? mkldnn::prop_kind::forward_inference
: mkldnn::prop_kind::forward_training; : mkldnn::prop_kind::forward_training;
...@@ -313,29 +313,29 @@ class ConvMKLDNNHandlerT ...@@ -313,29 +313,29 @@ class ConvMKLDNNHandlerT
if (is_test && weights_mem_p) { if (is_test && weights_mem_p) {
return weights_mem_p; return weights_mem_p;
} else { } else {
const T* filter_data = filter->data<T>(); const K* filter_data = filter->data<K>();
auto weights_tz = framework::vectorize(filter->dims()); auto weights_tz = framework::vectorize(filter->dims());
GetWeightsTz(weights_tz, groups); GetWeightsTz(weights_tz, groups);
auto user_src_md = platform::MKLDNNMemDesc( auto user_src_md = platform::MKLDNNMemDesc(
weights_tz, platform::MKLDNNGetDataType<T>(), weights_tz, platform::MKLDNNGetDataType<K>(),
GetWeightsFormat(filter->format(), groups, is_conv3d)); GetWeightsFormat(filter->format(), groups, is_conv3d));
return this->AcquireMemoryWithReorder( return this->AcquireMemoryWithReorder(
user_src_md, this->fwd_pd_->weights_desc(), user_src_md, this->fwd_pd_->weights_desc(),
to_void_cast<T>(filter_data), "@weights_mem_p", is_test); to_void_cast<K>(filter_data), "@weights_mem_p", is_test);
} }
} }
std::shared_ptr<mkldnn::memory> AcquireBiasMemoryWithReorder( std::shared_ptr<mkldnn::memory> AcquireBiasMemoryWithReorder(
const framework::Tensor* bias, const bool is_test) { const framework::Tensor* bias, const bool is_test) {
const T* bias_data = bias->data<T>(); const K* bias_data = bias->data<K>();
auto user_bias_md = platform::MKLDNNMemDesc( auto user_bias_md = platform::MKLDNNMemDesc(
framework::vectorize(bias->dims()), platform::MKLDNNGetDataType<T>(), framework::vectorize(bias->dims()), platform::MKLDNNGetDataType<K>(),
MKLDNNMemoryFormat::x); MKLDNNMemoryFormat::x);
return this->AcquireMemoryWithReorder( return this->AcquireMemoryWithReorder(
user_bias_md, this->fwd_pd_->bias_desc(), to_void_cast<T>(bias_data), user_bias_md, this->fwd_pd_->bias_desc(), to_void_cast<K>(bias_data),
"@bias_mem_p", is_test); "@bias_mem_p", is_test);
} }
...@@ -358,14 +358,14 @@ class ConvMKLDNNHandlerT ...@@ -358,14 +358,14 @@ class ConvMKLDNNHandlerT
if (residual_param->format() != if (residual_param->format() !=
platform::GetMKLDNNFormat(this->fwd_pd_->dst_desc())) { platform::GetMKLDNNFormat(this->fwd_pd_->dst_desc())) {
auto residual_memory_p = this->AcquireResidualMemory(residual_param); auto residual_memory_p = this->AcquireResidualMemory(residual_param);
dst_memory_p = this->AcquireDstMemory(output); dst_memory_p = this->template AcquireDstMemory<T_out>(output);
this->AcquireReorder(residual_memory_p, dst_memory_p, "@residual_dst"); this->AcquireReorder(residual_memory_p, dst_memory_p, "@residual_dst");
} else { } else {
// Changing ShareDataWith to TensorCopy results in performance drop // Changing ShareDataWith to TensorCopy results in performance drop
// on ResNet architectures // on ResNet architectures
// (https://github.com/PaddlePaddle/Paddle/issues/22964) // (https://github.com/PaddlePaddle/Paddle/issues/22964)
output->ShareDataWith(*residual_param); output->ShareDataWith(*residual_param);
dst_memory_p = this->AcquireDstMemory(output); dst_memory_p = this->template AcquireDstMemory<T_out>(output);
} }
return dst_memory_p; return dst_memory_p;
} }
...@@ -381,7 +381,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -381,7 +381,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
bool is_INT8 = bool is_INT8 =
std::is_same<T, int8_t>::value || std::is_same<T, uint8_t>::value; std::is_same<T, int8_t>::value || std::is_same<T, uint8_t>::value;
if (!is_INT8) { if (!is_INT8) {
ComputeFP32(ctx); ComputeFP32<float>(ctx);
} else { } else {
std::string fuse_activation = ctx.Attr<std::string>("fuse_activation"); std::string fuse_activation = ctx.Attr<std::string>("fuse_activation");
bool fuse_residual_conn = ctx.Attr<bool>("fuse_residual_connection"); bool fuse_residual_conn = ctx.Attr<bool>("fuse_residual_connection");
...@@ -399,6 +399,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -399,6 +399,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
} }
} }
template <typename T_out>
void ComputeFP32(const paddle::framework::ExecutionContext& ctx) const { void ComputeFP32(const paddle::framework::ExecutionContext& ctx) const {
auto& dev_ctx = auto& dev_ctx =
ctx.template device_context<paddle::platform::MKLDNNDeviceContext>(); ctx.template device_context<paddle::platform::MKLDNNDeviceContext>();
...@@ -414,7 +415,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -414,7 +415,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
ctx.HasInput("Bias") ? ctx.Input<Tensor>("Bias") : nullptr; ctx.HasInput("Bias") ? ctx.Input<Tensor>("Bias") : nullptr;
auto* output = ctx.Output<Tensor>("Output"); auto* output = ctx.Output<Tensor>("Output");
ConvMKLDNNHandlerT<T> handler( ConvMKLDNNHandlerT<T, K, T_out> handler(
ctx, dev_ctx, mkldnn_engine, ctx.GetPlace(), input, filter, bias, ctx, dev_ctx, mkldnn_engine, ctx.GetPlace(), input, filter, bias,
output, ctx.InputName("Input") + ctx.InputName("Filter")); output, ctx.InputName("Input") + ctx.InputName("Filter"));
...@@ -429,7 +430,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -429,7 +430,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
dst_memory_p = dst_memory_p =
handler.AcquireDstMemoryWithResidual(output, residual_param); handler.AcquireDstMemoryWithResidual(output, residual_param);
} else { } else {
dst_memory_p = handler.AcquireDstMemory(output); dst_memory_p = handler.template AcquireDstMemory<T_out>(output);
} }
auto conv_p = handler.AcquireForwardPrimitive(); auto conv_p = handler.AcquireForwardPrimitive();
......
...@@ -82,17 +82,21 @@ class MKLDNNHandlerT { ...@@ -82,17 +82,21 @@ class MKLDNNHandlerT {
fwd_pd_->src_desc(), to_void_cast<T>(input_data), "@src_mem_p"); fwd_pd_->src_desc(), to_void_cast<T>(input_data), "@src_mem_p");
} }
template <typename T_out = T>
std::shared_ptr<mkldnn::memory> AcquireDstMemory(framework::Tensor* output) { std::shared_ptr<mkldnn::memory> AcquireDstMemory(framework::Tensor* output) {
T* ptr = output->mutable_data<T>(place_, fwd_pd_->dst_desc().get_size()); T_out* ptr =
output->mutable_data<T_out>(place_, fwd_pd_->dst_desc().get_size());
return this->AcquireMemoryFromPrimitive(fwd_pd_->dst_desc(), ptr, return this->AcquireMemoryFromPrimitive(fwd_pd_->dst_desc(), ptr,
"@dst_mem_p"); "@dst_mem_p");
} }
template <typename T_out = T>
std::shared_ptr<mkldnn::memory> AcquireDstMemory( std::shared_ptr<mkldnn::memory> AcquireDstMemory(
const framework::Tensor* output) { const framework::Tensor* output) {
const T* output_data = output->data<T>(); const T_out* output_data = output->data<T_out>();
return this->AcquireMemoryFromPrimitive( return this->AcquireMemoryFromPrimitive(bwd_pd_->dst_desc(),
bwd_pd_->dst_desc(), to_void_cast<T>(output_data), "@bwd-dst_mem_p"); to_void_cast<T_out>(output_data),
"@bwd-dst_mem_p");
} }
std::shared_ptr<mkldnn::memory> AcquireDiffDstMemory( std::shared_ptr<mkldnn::memory> AcquireDiffDstMemory(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册