From a1cd27f13fdae3485a456435ceebfde36ddd38f5 Mon Sep 17 00:00:00 2001 From: Jacek Czaja Date: Thu, 17 Oct 2019 10:41:42 +0200 Subject: [PATCH] [MKL-DNN] Added mkl-dnn cache clearing when creating Executor instance (#20241) * - Flushing mkl-dnn cache test=develop - Disabled clearing cache for LoadModel - Added clearing of mkl-dnn cache when Executor is created test=develop - Do not clear for GPU places test=develop - compilation fix test=develop * - Moved clearing of mkl-dnn cache in destructor of executor test=develop * - Compilation fix test=develop - Reverted conditional clearing of mkl-dnn cache in Executors's destructor test=develop - compilation fix --- paddle/fluid/framework/executor.cc | 14 ++++++++++++++ paddle/fluid/framework/executor.h | 1 + .../mkldnn/elementwise_add_mkldnn_op.cc | 5 ++--- .../operators/mkldnn/batch_norm_mkldnn_op.cc | 6 ++---- .../fluid/operators/mkldnn/concat_mkldnn_op.cc | 5 ++--- paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc | 10 +++------- .../operators/mkldnn/conv_transpose_mkldnn_op.cc | 3 +-- .../operators/mkldnn/dequantize_mkldnn_op.cc | 4 ++-- paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc | 3 +-- paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc | 3 +-- .../fluid/operators/mkldnn/softmax_mkldnn_op.cc | 4 ++-- .../operators/mkldnn/transpose_mkldnn_op.cc | 5 ++--- paddle/fluid/platform/mkldnn_reuse.h | 16 ++++++---------- 13 files changed, 39 insertions(+), 40 deletions(-) diff --git a/paddle/fluid/framework/executor.cc b/paddle/fluid/framework/executor.cc index 44646f7ab1a..14dd821367e 100644 --- a/paddle/fluid/framework/executor.cc +++ b/paddle/fluid/framework/executor.cc @@ -93,6 +93,20 @@ ExecutorPrepareContext::~ExecutorPrepareContext() { Executor::Executor(const platform::Place& place) : place_(place) {} +Executor::~Executor() { +#ifdef PADDLE_WITH_MKLDNN + // Clear mkl-dnn cache, unless explicitly + // (as set in constructor) marked not to do so + // this is needed to have mkl-dnn unit tests working + if (platform::is_cpu_place(place_)) { + platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); + platform::MKLDNNDeviceContext* dev_ctx = + (platform::MKLDNNDeviceContext*)pool.Get(place_); + dev_ctx->ResetBlobMap(); + } +#endif +} + void Executor::Close() { #ifdef PADDLE_WITH_DISTRIBUTE // TODO(typhoonzero): complete message will need to use real trainer_id, diff --git a/paddle/fluid/framework/executor.h b/paddle/fluid/framework/executor.h index 587ac1a8a6f..6ca50b70031 100644 --- a/paddle/fluid/framework/executor.h +++ b/paddle/fluid/framework/executor.h @@ -58,6 +58,7 @@ class Executor { explicit Executor(const platform::Place& place); + ~Executor(); /* * Close this Executor. * Calling this method will send complete messages to all pserver instances. diff --git a/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc b/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc index 1f4a4fb0e16..a3a5a031e97 100644 --- a/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc +++ b/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc @@ -136,9 +136,8 @@ class EltwiseAddMKLDNNKernel : public framework::OpKernel { std::vector srcs_pd; std::vector scales = {1.0f, 1.0f}; - const std::string key = platform::CreateKey( - src_x_tz, ctx.op().Output("Out") + std::to_string(x->format()) + - std::to_string(y->format())); + const std::string key = + platform::CreateKey(src_x_tz, ctx.op().Output("Out")); platform::SumMKLDNNHandler handler(dev_ctx, mkldnn_engine, key); diff --git a/paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc index 6b1c870c3c1..189f512d316 100644 --- a/paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc @@ -40,8 +40,7 @@ class BatchNormMKLDNNHandler : platform::MKLDNNHandlerT( dev_ctx, dev_ctx.GetEngine(), cpu_place, - platform::CreateKey(dims, epsilon, flags, global_stats, fmt, - uniq_name)) { + platform::CreateKey(dims, uniq_name)) { auto md = mkldnn::memory::desc(dims, platform::MKLDNNGetDataType(), fmt); this->AcquireForwardPrimitiveDescriptor( @@ -59,8 +58,7 @@ class BatchNormMKLDNNHandler : platform::MKLDNNHandlerT( dev_ctx, dev_ctx.GetEngine(), cpu_place, - platform::CreateKey(dims, epsilon, flags, false, src_fmt, - uniq_name)) { + platform::CreateKey(dims, uniq_name)) { auto diff_dst_md = mkldnn::memory::desc(dims, platform::MKLDNNGetDataType(), diff_fmt); auto src_md = diff --git a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc index 8010b52a1db..4baf65fb742 100644 --- a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc @@ -150,9 +150,8 @@ class ConcatMKLDNNOpKernel : public paddle::framework::OpKernel { ConcatPrimitiveFactory prim_creator; std::string key = platform::CreateKey( - paddle::framework::vectorize(multi_input[0]->dims()), concat_axis, - ctx.op().Output("Out"), dt, multi_input[0]->format(), - platform::ThreadIDasStr()); + paddle::framework::vectorize(multi_input[0]->dims()), + ctx.op().Output("Out"), dt, platform::ThreadIDasStr()); const std::string key_prim = key + "@concat_p"; const std::string key_concat_pd = key + "@concat_pd"; const std::string key_srcs = key + "@concat_srcs"; diff --git a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc index 86c7d7a5cc6..a9fc17ce89b 100644 --- a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc @@ -220,8 +220,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { // Get unique name for storing MKLDNN primitives const std::string key = platform::CreateKey( - src_tz, weights_tz, fuse_activation, strides, paddings, dilations, - groups, ctx.op().Input("Input") + ctx.op().Input("Filter")); + src_tz, ctx.op().Input("Input") + ctx.op().Input("Filter")); std::vector pipeline; @@ -450,9 +449,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { paddle::framework::ToMKLDNNDataType(input->type()); std::string key = platform::CreateKey( - src_tz, weights_tz, strides, paddings, dilations, groups, src_dt, - input->format(), fuse_activation, fuse_residual_conn, - ctx.op().Input("Input") + ctx.op().Input("Filter")); + src_tz, src_dt, ctx.op().Input("Input") + ctx.op().Input("Filter")); std::shared_ptr conv_p; std::shared_ptr src_memory_p; @@ -662,8 +659,7 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel { // as well as attributes of primitive to be created // This name will be used as key when saving info into device context const std::string key = platform::CreateKey( - src_tz, weights_tz, "", strides, paddings, dilations, groups, - ctx.op().Input("Input") + ctx.op().Input("Filter")); + src_tz, ctx.op().Input("Input") + ctx.op().Input("Filter")); const std::string key_conv_pd = key + "@conv_pd"; std::vector pipeline; diff --git a/paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc index 84240d30fe1..2af20228c81 100644 --- a/paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc @@ -128,8 +128,7 @@ class ConvTransposeMKLDNNOpKernel : public paddle::framework::OpKernel { // Get unique name for storing MKLDNN primitives const std::string key = - platform::CreateKey(src_tz, weights_tz, strides, paddings, dilations, - groups, ctx.op().Output("Output")); + platform::CreateKey(src_tz, ctx.op().Output("Output")); std::vector pipeline; diff --git a/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc index b74e7127ea3..4353c621365 100644 --- a/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc @@ -52,8 +52,8 @@ class DeQuantOpKernel : public framework::OpKernel { mkldnn::memory::data_type src_dt = paddle::framework::ToMKLDNNDataType(input->type()); MKLDNNMemoryFormat src_fmt = input->format(); - std::string key = platform::CreateKey(src_dt, src_tz, reorder_scale[0], - ctx.op().Output("Output")); + std::string key = + platform::CreateKey(src_dt, src_tz, ctx.op().Output("Output")); const std::string key_prim = key + "@reorder_p"; const std::string key_src_mem = key + "@src_mem"; const std::string key_dst_mem = key + "@dst_mem"; diff --git a/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc index a910deef528..349dbffb386 100644 --- a/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc @@ -228,8 +228,7 @@ std::shared_ptr> GetPrimitiveFactory( const Tensor* input, const Tensor* weights, const mkldnn::engine& mkldnn_engine) { const std::string key = platform::CreateKey( - input->format(), framework::vectorize(weights->dims()), - ctx.op().Output("Out")); + framework::vectorize(weights->dims()), ctx.op().Output("Out")); auto prim_creator = std::static_pointer_cast>(dev_ctx.GetBlob(key)); diff --git a/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc index 4bdd93d08ec..7d41960214d 100644 --- a/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc @@ -340,8 +340,7 @@ std::shared_ptr> GetPrimitiveFactory( const Tensor *input_x, const Tensor *input_y, const mkldnn::engine &mkldnn_engine, bool enable_quant) { const std::string key = platform::CreateKey( - input_x->format(), input_x->type(), - framework::vectorize(input_x->dims()), input_y->format(), + input_x->type(), framework::vectorize(input_x->dims()), input_y->type(), framework::vectorize(input_y->dims()), ctx.op().Output("Out")); diff --git a/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc index 690f9271fb7..f6b0a9ac528 100644 --- a/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc @@ -45,7 +45,7 @@ class SoftmaxMKLDNNHandler : platform::MKLDNNHandlerT( dev_ctx, dev_ctx.GetEngine(), cpu_place, - platform::CreateKey(dims, axis, uniq_name)) { + platform::CreateKey(dims, uniq_name)) { auto md = mkldnn::memory::desc(dims, platform::MKLDNNGetDataType(), fmt); this->AcquireForwardPrimitiveDescriptor(prop_kind::forward_scoring, md, @@ -60,7 +60,7 @@ class SoftmaxMKLDNNHandler : platform::MKLDNNHandlerT( dev_ctx, dev_ctx.GetEngine(), cpu_place, - platform::CreateKey(dims, axis, uniq_name)) { + platform::CreateKey(dims, uniq_name)) { auto data_softmax_md = mkldnn::memory::desc(dims, platform::MKLDNNGetDataType(), fmt); auto diff_softmax_md = diff --git a/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc index bcf919fadcf..a091122c5c1 100644 --- a/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc @@ -46,8 +46,7 @@ class TransposeMKLDNNOpKernel : public paddle::framework::OpKernel { auto nchw_tz = paddle::framework::vectorize(input->dims()); const std::string key = - platform::CreateKey(nchw_tz, axis, ctx.op().Output("Out") + - std::to_string(input->format())); + platform::CreateKey(nchw_tz, ctx.op().Output("Out")); platform::TransposeMKLDNNHandler handler(nchw_tz, axis, dev_ctx, mkldnn_engine, key); @@ -100,7 +99,7 @@ class TransposeMKLDNNGradOpKernel : public paddle::framework::OpKernel { auto nchw_tz = paddle::framework::vectorize(out_grad->dims()); const std::string key = platform::CreateKey( - nchw_tz, axis, ctx.op().Output(framework::GradVarName("X"))); + nchw_tz, ctx.op().Output(framework::GradVarName("X"))); platform::TransposeMKLDNNHandler handler(nchw_tz, reversed_axis, dev_ctx, mkldnn_engine, key); diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h index 50c629e53ef..aa0b3d7d335 100644 --- a/paddle/fluid/platform/mkldnn_reuse.h +++ b/paddle/fluid/platform/mkldnn_reuse.h @@ -420,8 +420,7 @@ class ActivationMKLDNNHandler : platform::MKLDNNHandlerT( dev_ctx, dev_ctx.GetEngine(), cpu_place, - platform::CreateKey(dims, algorithm, fmt, alpha, beta, - unique_name)) { + platform::CreateKey(dims, unique_name)) { auto md = mkldnn::memory::desc(dims, platform::MKLDNNGetDataType(), fmt); this->AcquireForwardPrimitiveDescriptor( @@ -441,8 +440,7 @@ class ActivationMKLDNNHandler : platform::MKLDNNHandlerT( dev_ctx, dev_ctx.GetEngine(), cpu_place, - platform::CreateKey(dims, algorithm, fmt, alpha, beta, - unique_name)) { + platform::CreateKey(dims, unique_name)) { auto diff_dst_md = platform::MKLDNNMemDesc( dims, platform::MKLDNNGetDataType(), diff_fmt); auto src_md = @@ -473,7 +471,7 @@ class LRNMKLDNNHandler : platform::MKLDNNHandlerT( dev_ctx, dev_ctx.GetEngine(), cpu_place, - platform::CreateKey(dims, n, alpha, beta, k, fmt, unique_name)) { + platform::CreateKey(dims, unique_name)) { auto src_md = mkldnn::memory::desc(dims, platform::MKLDNNGetDataType(), fmt); this->AcquireForwardPrimitiveDescriptor( @@ -491,7 +489,7 @@ class LRNMKLDNNHandler : platform::MKLDNNHandlerT( dev_ctx, dev_ctx.GetEngine(), cpu_place, - platform::CreateKey(dims, n, alpha, beta, k, fmt, unique_name)) { + platform::CreateKey(dims, unique_name)) { auto src_md = mkldnn::memory::desc(dims, platform::MKLDNNGetDataType(), fmt); auto diff_md = @@ -533,8 +531,7 @@ class PoolingMKLDNNHandler : public MKLDNNHandlerT( dev_ctx, dev_ctx.GetEngine(), cpu_place, - platform::CreateKey(src_dims, pooling_type, ksize, strides, - paddings, dt, fmt, unique_name)) { + platform::CreateKey(src_dims, dt, unique_name)) { auto src_md = mkldnn::memory::desc(src_dims, dt, fmt); /* create memory descriptor for pooling without specified format * ('any') which lets a primitive (pooling in this case) choose @@ -574,8 +571,7 @@ class PoolingMKLDNNHandler : public MKLDNNHandlerT( dev_ctx, dev_ctx.GetEngine(), cpu_place, - platform::CreateKey(diff_src_dims, pooling_type, ksize, strides, - paddings, dt, fmt, unique_name)) { + platform::CreateKey(diff_src_dims, dt, unique_name)) { auto diff_dst_md = mkldnn::memory::desc( diff_dst_dims, platform::MKLDNNGetDataType(), diff_dst_fmt); auto diff_src_md = -- GitLab