未验证 提交 559e43ee 编写于 作者: J joanna.wozna.intel 提交者: GitHub

Small change in conv2d and quantize pass (#26671)

上级 33afeb31
......@@ -39,11 +39,8 @@ void CPUQuantizePlacementPass::ApplyImpl(ir::Graph* graph) const {
if (op->GetAttrIfExists<bool>("use_quantizer")) {
op->SetAttr("mkldnn_data_type", std::string("int8"));
}
if (op_types_list.empty()) {
op->SetAttr("mkldnn_data_type", std::string("int8"));
op->SetAttr("use_quantizer", true);
} else if (std::find(op_types_list.begin(), op_types_list.end(),
op->Type()) != op_types_list.end()) {
if (std::find(op_types_list.begin(), op_types_list.end(), op->Type()) !=
op_types_list.end()) {
op->SetAttr("mkldnn_data_type", std::string("int8"));
op->SetAttr("use_quantizer", true);
}
......@@ -61,7 +58,10 @@ REGISTER_PASS(cpu_quantize_placement_pass,
// a vector of operator type names to be quantized ("conv2d" etc.)
// the second param is the default value for this vector
.DefaultPassAttr("quantize_enabled_op_types",
new std::unordered_set<std::string>())
new std::unordered_set<std::string>(
{"concat", "conv2d", "elementwise_add", "fc", "matmul",
"pool2d", "prior_box", "relu", "reshape2",
"transpose2"}))
// a vector of operator ids that are to be excluded from quantization
// the second param is the default value for this vector
.DefaultPassAttr("quantize_excluded_op_ids", new std::unordered_set<int>());
......@@ -130,9 +130,9 @@ TEST(QuantizerPlacementPass, enabled_conv_excluded_one) {
MainTest({"conv2d"}, {4}, 1);
}
TEST(QuantizerPlacementPass, excluded_none) {
// all operators quantized
MainTest({}, {}, 6);
TEST(QuantizerPlacementPass, empty_list) {
// no operator quantized
MainTest({}, {}, 0);
}
TEST(QuantizerPlacementPass, default_attr_value) {
......
......@@ -72,7 +72,7 @@ static mkldnn::memory::data_type GetDstType(bool is_int8,
return dst_dt;
}
template <typename T>
template <typename T, typename K, typename T_out>
class ConvMKLDNNHandlerT
: public platform::MKLDNNHandlerT<T, mkldnn::convolution_forward> {
public:
......@@ -227,7 +227,7 @@ class ConvMKLDNNHandlerT
platform::MKLDNNMemDesc(weights_tz, platform::MKLDNNGetDataType<T>(),
MKLDNNMemoryFormat::any);
const auto dst_md = platform::MKLDNNMemDesc(
dst_tz, platform::MKLDNNGetDataType<T>(), chosen_memory_format);
dst_tz, platform::MKLDNNGetDataType<T_out>(), chosen_memory_format);
const auto fwd_prop_kind = is_test ? mkldnn::prop_kind::forward_inference
: mkldnn::prop_kind::forward_training;
......@@ -313,29 +313,29 @@ class ConvMKLDNNHandlerT
if (is_test && weights_mem_p) {
return weights_mem_p;
} else {
const T* filter_data = filter->data<T>();
const K* filter_data = filter->data<K>();
auto weights_tz = framework::vectorize(filter->dims());
GetWeightsTz(weights_tz, groups);
auto user_src_md = platform::MKLDNNMemDesc(
weights_tz, platform::MKLDNNGetDataType<T>(),
weights_tz, platform::MKLDNNGetDataType<K>(),
GetWeightsFormat(filter->format(), groups, is_conv3d));
return this->AcquireMemoryWithReorder(
user_src_md, this->fwd_pd_->weights_desc(),
to_void_cast<T>(filter_data), "@weights_mem_p", is_test);
to_void_cast<K>(filter_data), "@weights_mem_p", is_test);
}
}
std::shared_ptr<mkldnn::memory> AcquireBiasMemoryWithReorder(
const framework::Tensor* bias, const bool is_test) {
const T* bias_data = bias->data<T>();
const K* bias_data = bias->data<K>();
auto user_bias_md = platform::MKLDNNMemDesc(
framework::vectorize(bias->dims()), platform::MKLDNNGetDataType<T>(),
framework::vectorize(bias->dims()), platform::MKLDNNGetDataType<K>(),
MKLDNNMemoryFormat::x);
return this->AcquireMemoryWithReorder(
user_bias_md, this->fwd_pd_->bias_desc(), to_void_cast<T>(bias_data),
user_bias_md, this->fwd_pd_->bias_desc(), to_void_cast<K>(bias_data),
"@bias_mem_p", is_test);
}
......@@ -358,14 +358,14 @@ class ConvMKLDNNHandlerT
if (residual_param->format() !=
platform::GetMKLDNNFormat(this->fwd_pd_->dst_desc())) {
auto residual_memory_p = this->AcquireResidualMemory(residual_param);
dst_memory_p = this->AcquireDstMemory(output);
dst_memory_p = this->template AcquireDstMemory<T_out>(output);
this->AcquireReorder(residual_memory_p, dst_memory_p, "@residual_dst");
} else {
// Changing ShareDataWith to TensorCopy results in performance drop
// on ResNet architectures
// (https://github.com/PaddlePaddle/Paddle/issues/22964)
output->ShareDataWith(*residual_param);
dst_memory_p = this->AcquireDstMemory(output);
dst_memory_p = this->template AcquireDstMemory<T_out>(output);
}
return dst_memory_p;
}
......@@ -381,7 +381,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
bool is_INT8 =
std::is_same<T, int8_t>::value || std::is_same<T, uint8_t>::value;
if (!is_INT8) {
ComputeFP32(ctx);
ComputeFP32<float>(ctx);
} else {
std::string fuse_activation = ctx.Attr<std::string>("fuse_activation");
bool fuse_residual_conn = ctx.Attr<bool>("fuse_residual_connection");
......@@ -399,6 +399,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
}
}
template <typename T_out>
void ComputeFP32(const paddle::framework::ExecutionContext& ctx) const {
auto& dev_ctx =
ctx.template device_context<paddle::platform::MKLDNNDeviceContext>();
......@@ -414,7 +415,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
ctx.HasInput("Bias") ? ctx.Input<Tensor>("Bias") : nullptr;
auto* output = ctx.Output<Tensor>("Output");
ConvMKLDNNHandlerT<T> handler(
ConvMKLDNNHandlerT<T, K, T_out> handler(
ctx, dev_ctx, mkldnn_engine, ctx.GetPlace(), input, filter, bias,
output, ctx.InputName("Input") + ctx.InputName("Filter"));
......@@ -429,7 +430,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
dst_memory_p =
handler.AcquireDstMemoryWithResidual(output, residual_param);
} else {
dst_memory_p = handler.AcquireDstMemory(output);
dst_memory_p = handler.template AcquireDstMemory<T_out>(output);
}
auto conv_p = handler.AcquireForwardPrimitive();
......
......@@ -82,17 +82,21 @@ class MKLDNNHandlerT {
fwd_pd_->src_desc(), to_void_cast<T>(input_data), "@src_mem_p");
}
template <typename T_out = T>
std::shared_ptr<mkldnn::memory> AcquireDstMemory(framework::Tensor* output) {
T* ptr = output->mutable_data<T>(place_, fwd_pd_->dst_desc().get_size());
T_out* ptr =
output->mutable_data<T_out>(place_, fwd_pd_->dst_desc().get_size());
return this->AcquireMemoryFromPrimitive(fwd_pd_->dst_desc(), ptr,
"@dst_mem_p");
}
template <typename T_out = T>
std::shared_ptr<mkldnn::memory> AcquireDstMemory(
const framework::Tensor* output) {
const T* output_data = output->data<T>();
return this->AcquireMemoryFromPrimitive(
bwd_pd_->dst_desc(), to_void_cast<T>(output_data), "@bwd-dst_mem_p");
const T_out* output_data = output->data<T_out>();
return this->AcquireMemoryFromPrimitive(bwd_pd_->dst_desc(),
to_void_cast<T_out>(output_data),
"@bwd-dst_mem_p");
}
std::shared_ptr<mkldnn::memory> AcquireDiffDstMemory(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册