提交 2099618d 编写于 作者: M Michał Gallus 提交者: Tao Luo

MKL-DNN] Added mkl-dnn cache clearing when creating Executor instance (#20241) (#20693)

test=release/1.6

* - Flushing mkl-dnn cache

test=develop

- Disabled clearing cache for LoadModel

- Added clearing of mkl-dnn cache when Executor is created

test=develop

- Do not clear for GPU places

test=develop

- compilation fix

test=develop

* - Moved clearing of mkl-dnn cache in destructor of executor

test=develop

* - Compilation fix

test=develop

- Reverted conditional clearing of mkl-dnn cache in Executors's
  destructor

test=develop

- compilation fix
上级 a77d75cd
......@@ -93,6 +93,20 @@ ExecutorPrepareContext::~ExecutorPrepareContext() {
Executor::Executor(const platform::Place& place) : place_(place) {}
Executor::~Executor() {
#ifdef PADDLE_WITH_MKLDNN
// Clear mkl-dnn cache, unless explicitly
// (as set in constructor) marked not to do so
// this is needed to have mkl-dnn unit tests working
if (platform::is_cpu_place(place_)) {
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
platform::MKLDNNDeviceContext* dev_ctx =
(platform::MKLDNNDeviceContext*)pool.Get(place_);
dev_ctx->ResetBlobMap();
}
#endif
}
void Executor::Close() {
#ifdef PADDLE_WITH_DISTRIBUTE
// TODO(typhoonzero): complete message will need to use real trainer_id,
......
......@@ -58,6 +58,7 @@ class Executor {
explicit Executor(const platform::Place& place);
~Executor();
/*
* Close this Executor.
* Calling this method will send complete messages to all pserver instances.
......
......@@ -136,9 +136,8 @@ class EltwiseAddMKLDNNKernel : public framework::OpKernel<T> {
std::vector<memory::primitive_desc> srcs_pd;
std::vector<float> scales = {1.0f, 1.0f};
const std::string key = platform::CreateKey(
src_x_tz, ctx.op().Output("Out") + std::to_string(x->format()) +
std::to_string(y->format()));
const std::string key =
platform::CreateKey(src_x_tz, ctx.op().Output("Out"));
platform::SumMKLDNNHandler handler(dev_ctx, mkldnn_engine, key);
......
......@@ -40,8 +40,7 @@ class BatchNormMKLDNNHandler
: platform::MKLDNNHandlerT<T, mkldnn::batch_normalization_forward,
mkldnn::batch_normalization_backward>(
dev_ctx, dev_ctx.GetEngine(), cpu_place,
platform::CreateKey(dims, epsilon, flags, global_stats, fmt,
uniq_name)) {
platform::CreateKey(dims, uniq_name)) {
auto md = mkldnn::memory::desc(dims, platform::MKLDNNGetDataType<T>(), fmt);
this->AcquireForwardPrimitiveDescriptor(
......@@ -59,8 +58,7 @@ class BatchNormMKLDNNHandler
: platform::MKLDNNHandlerT<T, mkldnn::batch_normalization_forward,
mkldnn::batch_normalization_backward>(
dev_ctx, dev_ctx.GetEngine(), cpu_place,
platform::CreateKey(dims, epsilon, flags, false, src_fmt,
uniq_name)) {
platform::CreateKey(dims, uniq_name)) {
auto diff_dst_md =
mkldnn::memory::desc(dims, platform::MKLDNNGetDataType<T>(), diff_fmt);
auto src_md =
......
......@@ -150,9 +150,8 @@ class ConcatMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
ConcatPrimitiveFactory<T> prim_creator;
std::string key = platform::CreateKey(
paddle::framework::vectorize<int>(multi_input[0]->dims()), concat_axis,
ctx.op().Output("Out"), dt, multi_input[0]->format(),
platform::ThreadIDasStr());
paddle::framework::vectorize<int>(multi_input[0]->dims()),
ctx.op().Output("Out"), dt, platform::ThreadIDasStr());
const std::string key_prim = key + "@concat_p";
const std::string key_concat_pd = key + "@concat_pd";
const std::string key_srcs = key + "@concat_srcs";
......
......@@ -220,8 +220,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
// Get unique name for storing MKLDNN primitives
const std::string key = platform::CreateKey(
src_tz, weights_tz, fuse_activation, strides, paddings, dilations,
groups, ctx.op().Input("Input") + ctx.op().Input("Filter"));
src_tz, ctx.op().Input("Input") + ctx.op().Input("Filter"));
std::vector<primitive> pipeline;
......@@ -450,9 +449,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
paddle::framework::ToMKLDNNDataType(input->type());
std::string key = platform::CreateKey(
src_tz, weights_tz, strides, paddings, dilations, groups, src_dt,
input->format(), fuse_activation, fuse_residual_conn,
ctx.op().Input("Input") + ctx.op().Input("Filter"));
src_tz, src_dt, ctx.op().Input("Input") + ctx.op().Input("Filter"));
std::shared_ptr<mkldnn::convolution_forward> conv_p;
std::shared_ptr<mkldnn::memory> src_memory_p;
......@@ -662,8 +659,7 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
// as well as attributes of primitive to be created
// This name will be used as key when saving info into device context
const std::string key = platform::CreateKey(
src_tz, weights_tz, "", strides, paddings, dilations, groups,
ctx.op().Input("Input") + ctx.op().Input("Filter"));
src_tz, ctx.op().Input("Input") + ctx.op().Input("Filter"));
const std::string key_conv_pd = key + "@conv_pd";
std::vector<primitive> pipeline;
......
......@@ -128,8 +128,7 @@ class ConvTransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
// Get unique name for storing MKLDNN primitives
const std::string key =
platform::CreateKey(src_tz, weights_tz, strides, paddings, dilations,
groups, ctx.op().Output("Output"));
platform::CreateKey(src_tz, ctx.op().Output("Output"));
std::vector<mkldnn::primitive> pipeline;
......
......@@ -52,8 +52,8 @@ class DeQuantOpKernel : public framework::OpKernel<T> {
mkldnn::memory::data_type src_dt =
paddle::framework::ToMKLDNNDataType(input->type());
MKLDNNMemoryFormat src_fmt = input->format();
std::string key = platform::CreateKey(src_dt, src_tz, reorder_scale[0],
ctx.op().Output("Output"));
std::string key =
platform::CreateKey(src_dt, src_tz, ctx.op().Output("Output"));
const std::string key_prim = key + "@reorder_p";
const std::string key_src_mem = key + "@src_mem";
const std::string key_dst_mem = key + "@dst_mem";
......
......@@ -228,8 +228,7 @@ std::shared_ptr<FCPrimitiveFactory<T>> GetPrimitiveFactory(
const Tensor* input, const Tensor* weights,
const mkldnn::engine& mkldnn_engine) {
const std::string key = platform::CreateKey(
input->format(), framework::vectorize<int>(weights->dims()),
ctx.op().Output("Out"));
framework::vectorize<int>(weights->dims()), ctx.op().Output("Out"));
auto prim_creator =
std::static_pointer_cast<FCPrimitiveFactory<T>>(dev_ctx.GetBlob(key));
......
......@@ -340,8 +340,7 @@ std::shared_ptr<MulPrimitiveFactory<XT, YT, OT>> GetPrimitiveFactory(
const Tensor *input_x, const Tensor *input_y,
const mkldnn::engine &mkldnn_engine, bool enable_quant) {
const std::string key = platform::CreateKey(
input_x->format(), input_x->type(),
framework::vectorize<int>(input_x->dims()), input_y->format(),
input_x->type(), framework::vectorize<int>(input_x->dims()),
input_y->type(), framework::vectorize<int>(input_y->dims()),
ctx.op().Output("Out"));
......
......@@ -45,7 +45,7 @@ class SoftmaxMKLDNNHandler
: platform::MKLDNNHandlerT<T, mkldnn::softmax_forward,
mkldnn::softmax_backward>(
dev_ctx, dev_ctx.GetEngine(), cpu_place,
platform::CreateKey(dims, axis, uniq_name)) {
platform::CreateKey(dims, uniq_name)) {
auto md = mkldnn::memory::desc(dims, platform::MKLDNNGetDataType<T>(), fmt);
this->AcquireForwardPrimitiveDescriptor(prop_kind::forward_scoring, md,
......@@ -60,7 +60,7 @@ class SoftmaxMKLDNNHandler
: platform::MKLDNNHandlerT<T, mkldnn::softmax_forward,
mkldnn::softmax_backward>(
dev_ctx, dev_ctx.GetEngine(), cpu_place,
platform::CreateKey(dims, axis, uniq_name)) {
platform::CreateKey(dims, uniq_name)) {
auto data_softmax_md =
mkldnn::memory::desc(dims, platform::MKLDNNGetDataType<T>(), fmt);
auto diff_softmax_md =
......
......@@ -46,8 +46,7 @@ class TransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto nchw_tz = paddle::framework::vectorize<int>(input->dims());
const std::string key =
platform::CreateKey(nchw_tz, axis, ctx.op().Output("Out") +
std::to_string(input->format()));
platform::CreateKey(nchw_tz, ctx.op().Output("Out"));
platform::TransposeMKLDNNHandler handler(nchw_tz, axis, dev_ctx,
mkldnn_engine, key);
......@@ -100,7 +99,7 @@ class TransposeMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
auto nchw_tz = paddle::framework::vectorize<int>(out_grad->dims());
const std::string key = platform::CreateKey(
nchw_tz, axis, ctx.op().Output(framework::GradVarName("X")));
nchw_tz, ctx.op().Output(framework::GradVarName("X")));
platform::TransposeMKLDNNHandler handler(nchw_tz, reversed_axis, dev_ctx,
mkldnn_engine, key);
......
......@@ -420,8 +420,7 @@ class ActivationMKLDNNHandler
: platform::MKLDNNHandlerT<T, mkldnn::eltwise_forward,
mkldnn::eltwise_backward>(
dev_ctx, dev_ctx.GetEngine(), cpu_place,
platform::CreateKey(dims, algorithm, fmt, alpha, beta,
unique_name)) {
platform::CreateKey(dims, unique_name)) {
auto md = mkldnn::memory::desc(dims, platform::MKLDNNGetDataType<T>(), fmt);
this->AcquireForwardPrimitiveDescriptor(
......@@ -441,8 +440,7 @@ class ActivationMKLDNNHandler
: platform::MKLDNNHandlerT<T, mkldnn::eltwise_forward,
mkldnn::eltwise_backward>(
dev_ctx, dev_ctx.GetEngine(), cpu_place,
platform::CreateKey(dims, algorithm, fmt, alpha, beta,
unique_name)) {
platform::CreateKey(dims, unique_name)) {
auto diff_dst_md = platform::MKLDNNMemDesc(
dims, platform::MKLDNNGetDataType<T>(), diff_fmt);
auto src_md =
......@@ -473,7 +471,7 @@ class LRNMKLDNNHandler
: platform::MKLDNNHandlerT<T, mkldnn::lrn_forward, mkldnn::lrn_backward>(
dev_ctx, dev_ctx.GetEngine(), cpu_place,
platform::CreateKey(dims, n, alpha, beta, k, fmt, unique_name)) {
platform::CreateKey(dims, unique_name)) {
auto src_md =
mkldnn::memory::desc(dims, platform::MKLDNNGetDataType<T>(), fmt);
this->AcquireForwardPrimitiveDescriptor(
......@@ -491,7 +489,7 @@ class LRNMKLDNNHandler
: platform::MKLDNNHandlerT<T, mkldnn::lrn_forward, mkldnn::lrn_backward>(
dev_ctx, dev_ctx.GetEngine(), cpu_place,
platform::CreateKey(dims, n, alpha, beta, k, fmt, unique_name)) {
platform::CreateKey(dims, unique_name)) {
auto src_md =
mkldnn::memory::desc(dims, platform::MKLDNNGetDataType<T>(), fmt);
auto diff_md =
......@@ -533,8 +531,7 @@ class PoolingMKLDNNHandler : public MKLDNNHandlerT<T, mkldnn::pooling_forward,
: platform::MKLDNNHandlerT<T, mkldnn::pooling_forward,
mkldnn::pooling_backward>(
dev_ctx, dev_ctx.GetEngine(), cpu_place,
platform::CreateKey(src_dims, pooling_type, ksize, strides,
paddings, dt, fmt, unique_name)) {
platform::CreateKey(src_dims, dt, unique_name)) {
auto src_md = mkldnn::memory::desc(src_dims, dt, fmt);
/* create memory descriptor for pooling without specified format
* ('any') which lets a primitive (pooling in this case) choose
......@@ -574,8 +571,7 @@ class PoolingMKLDNNHandler : public MKLDNNHandlerT<T, mkldnn::pooling_forward,
: platform::MKLDNNHandlerT<T, mkldnn::pooling_forward,
mkldnn::pooling_backward>(
dev_ctx, dev_ctx.GetEngine(), cpu_place,
platform::CreateKey(diff_src_dims, pooling_type, ksize, strides,
paddings, dt, fmt, unique_name)) {
platform::CreateKey(diff_src_dims, dt, unique_name)) {
auto diff_dst_md = mkldnn::memory::desc(
diff_dst_dims, platform::MKLDNNGetDataType<T>(), diff_dst_fmt);
auto diff_src_md =
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册