提交 a1cd27f1 编写于 作者: J Jacek Czaja 提交者: Tao Luo

[MKL-DNN] Added mkl-dnn cache clearing when creating Executor instance (#20241)

* - Flushing mkl-dnn cache

test=develop

- Disabled clearing cache for LoadModel

- Added clearing of mkl-dnn cache when Executor is created

test=develop

- Do not clear for GPU places

test=develop

- compilation fix

test=develop

* - Moved clearing of mkl-dnn cache in destructor of executor

test=develop

* - Compilation fix

test=develop

- Reverted conditional clearing of mkl-dnn cache in Executors's
  destructor

test=develop

- compilation fix
上级 10505faf
...@@ -93,6 +93,20 @@ ExecutorPrepareContext::~ExecutorPrepareContext() { ...@@ -93,6 +93,20 @@ ExecutorPrepareContext::~ExecutorPrepareContext() {
Executor::Executor(const platform::Place& place) : place_(place) {} Executor::Executor(const platform::Place& place) : place_(place) {}
Executor::~Executor() {
#ifdef PADDLE_WITH_MKLDNN
// Clear mkl-dnn cache, unless explicitly
// (as set in constructor) marked not to do so
// this is needed to have mkl-dnn unit tests working
if (platform::is_cpu_place(place_)) {
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
platform::MKLDNNDeviceContext* dev_ctx =
(platform::MKLDNNDeviceContext*)pool.Get(place_);
dev_ctx->ResetBlobMap();
}
#endif
}
void Executor::Close() { void Executor::Close() {
#ifdef PADDLE_WITH_DISTRIBUTE #ifdef PADDLE_WITH_DISTRIBUTE
// TODO(typhoonzero): complete message will need to use real trainer_id, // TODO(typhoonzero): complete message will need to use real trainer_id,
......
...@@ -58,6 +58,7 @@ class Executor { ...@@ -58,6 +58,7 @@ class Executor {
explicit Executor(const platform::Place& place); explicit Executor(const platform::Place& place);
~Executor();
/* /*
* Close this Executor. * Close this Executor.
* Calling this method will send complete messages to all pserver instances. * Calling this method will send complete messages to all pserver instances.
......
...@@ -136,9 +136,8 @@ class EltwiseAddMKLDNNKernel : public framework::OpKernel<T> { ...@@ -136,9 +136,8 @@ class EltwiseAddMKLDNNKernel : public framework::OpKernel<T> {
std::vector<memory::primitive_desc> srcs_pd; std::vector<memory::primitive_desc> srcs_pd;
std::vector<float> scales = {1.0f, 1.0f}; std::vector<float> scales = {1.0f, 1.0f};
const std::string key = platform::CreateKey( const std::string key =
src_x_tz, ctx.op().Output("Out") + std::to_string(x->format()) + platform::CreateKey(src_x_tz, ctx.op().Output("Out"));
std::to_string(y->format()));
platform::SumMKLDNNHandler handler(dev_ctx, mkldnn_engine, key); platform::SumMKLDNNHandler handler(dev_ctx, mkldnn_engine, key);
......
...@@ -40,8 +40,7 @@ class BatchNormMKLDNNHandler ...@@ -40,8 +40,7 @@ class BatchNormMKLDNNHandler
: platform::MKLDNNHandlerT<T, mkldnn::batch_normalization_forward, : platform::MKLDNNHandlerT<T, mkldnn::batch_normalization_forward,
mkldnn::batch_normalization_backward>( mkldnn::batch_normalization_backward>(
dev_ctx, dev_ctx.GetEngine(), cpu_place, dev_ctx, dev_ctx.GetEngine(), cpu_place,
platform::CreateKey(dims, epsilon, flags, global_stats, fmt, platform::CreateKey(dims, uniq_name)) {
uniq_name)) {
auto md = mkldnn::memory::desc(dims, platform::MKLDNNGetDataType<T>(), fmt); auto md = mkldnn::memory::desc(dims, platform::MKLDNNGetDataType<T>(), fmt);
this->AcquireForwardPrimitiveDescriptor( this->AcquireForwardPrimitiveDescriptor(
...@@ -59,8 +58,7 @@ class BatchNormMKLDNNHandler ...@@ -59,8 +58,7 @@ class BatchNormMKLDNNHandler
: platform::MKLDNNHandlerT<T, mkldnn::batch_normalization_forward, : platform::MKLDNNHandlerT<T, mkldnn::batch_normalization_forward,
mkldnn::batch_normalization_backward>( mkldnn::batch_normalization_backward>(
dev_ctx, dev_ctx.GetEngine(), cpu_place, dev_ctx, dev_ctx.GetEngine(), cpu_place,
platform::CreateKey(dims, epsilon, flags, false, src_fmt, platform::CreateKey(dims, uniq_name)) {
uniq_name)) {
auto diff_dst_md = auto diff_dst_md =
mkldnn::memory::desc(dims, platform::MKLDNNGetDataType<T>(), diff_fmt); mkldnn::memory::desc(dims, platform::MKLDNNGetDataType<T>(), diff_fmt);
auto src_md = auto src_md =
......
...@@ -150,9 +150,8 @@ class ConcatMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -150,9 +150,8 @@ class ConcatMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
ConcatPrimitiveFactory<T> prim_creator; ConcatPrimitiveFactory<T> prim_creator;
std::string key = platform::CreateKey( std::string key = platform::CreateKey(
paddle::framework::vectorize<int>(multi_input[0]->dims()), concat_axis, paddle::framework::vectorize<int>(multi_input[0]->dims()),
ctx.op().Output("Out"), dt, multi_input[0]->format(), ctx.op().Output("Out"), dt, platform::ThreadIDasStr());
platform::ThreadIDasStr());
const std::string key_prim = key + "@concat_p"; const std::string key_prim = key + "@concat_p";
const std::string key_concat_pd = key + "@concat_pd"; const std::string key_concat_pd = key + "@concat_pd";
const std::string key_srcs = key + "@concat_srcs"; const std::string key_srcs = key + "@concat_srcs";
......
...@@ -220,8 +220,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -220,8 +220,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
// Get unique name for storing MKLDNN primitives // Get unique name for storing MKLDNN primitives
const std::string key = platform::CreateKey( const std::string key = platform::CreateKey(
src_tz, weights_tz, fuse_activation, strides, paddings, dilations, src_tz, ctx.op().Input("Input") + ctx.op().Input("Filter"));
groups, ctx.op().Input("Input") + ctx.op().Input("Filter"));
std::vector<primitive> pipeline; std::vector<primitive> pipeline;
...@@ -450,9 +449,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -450,9 +449,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
paddle::framework::ToMKLDNNDataType(input->type()); paddle::framework::ToMKLDNNDataType(input->type());
std::string key = platform::CreateKey( std::string key = platform::CreateKey(
src_tz, weights_tz, strides, paddings, dilations, groups, src_dt, src_tz, src_dt, ctx.op().Input("Input") + ctx.op().Input("Filter"));
input->format(), fuse_activation, fuse_residual_conn,
ctx.op().Input("Input") + ctx.op().Input("Filter"));
std::shared_ptr<mkldnn::convolution_forward> conv_p; std::shared_ptr<mkldnn::convolution_forward> conv_p;
std::shared_ptr<mkldnn::memory> src_memory_p; std::shared_ptr<mkldnn::memory> src_memory_p;
...@@ -662,8 +659,7 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> { ...@@ -662,8 +659,7 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
// as well as attributes of primitive to be created // as well as attributes of primitive to be created
// This name will be used as key when saving info into device context // This name will be used as key when saving info into device context
const std::string key = platform::CreateKey( const std::string key = platform::CreateKey(
src_tz, weights_tz, "", strides, paddings, dilations, groups, src_tz, ctx.op().Input("Input") + ctx.op().Input("Filter"));
ctx.op().Input("Input") + ctx.op().Input("Filter"));
const std::string key_conv_pd = key + "@conv_pd"; const std::string key_conv_pd = key + "@conv_pd";
std::vector<primitive> pipeline; std::vector<primitive> pipeline;
......
...@@ -128,8 +128,7 @@ class ConvTransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -128,8 +128,7 @@ class ConvTransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
// Get unique name for storing MKLDNN primitives // Get unique name for storing MKLDNN primitives
const std::string key = const std::string key =
platform::CreateKey(src_tz, weights_tz, strides, paddings, dilations, platform::CreateKey(src_tz, ctx.op().Output("Output"));
groups, ctx.op().Output("Output"));
std::vector<mkldnn::primitive> pipeline; std::vector<mkldnn::primitive> pipeline;
......
...@@ -52,8 +52,8 @@ class DeQuantOpKernel : public framework::OpKernel<T> { ...@@ -52,8 +52,8 @@ class DeQuantOpKernel : public framework::OpKernel<T> {
mkldnn::memory::data_type src_dt = mkldnn::memory::data_type src_dt =
paddle::framework::ToMKLDNNDataType(input->type()); paddle::framework::ToMKLDNNDataType(input->type());
MKLDNNMemoryFormat src_fmt = input->format(); MKLDNNMemoryFormat src_fmt = input->format();
std::string key = platform::CreateKey(src_dt, src_tz, reorder_scale[0], std::string key =
ctx.op().Output("Output")); platform::CreateKey(src_dt, src_tz, ctx.op().Output("Output"));
const std::string key_prim = key + "@reorder_p"; const std::string key_prim = key + "@reorder_p";
const std::string key_src_mem = key + "@src_mem"; const std::string key_src_mem = key + "@src_mem";
const std::string key_dst_mem = key + "@dst_mem"; const std::string key_dst_mem = key + "@dst_mem";
......
...@@ -228,8 +228,7 @@ std::shared_ptr<FCPrimitiveFactory<T>> GetPrimitiveFactory( ...@@ -228,8 +228,7 @@ std::shared_ptr<FCPrimitiveFactory<T>> GetPrimitiveFactory(
const Tensor* input, const Tensor* weights, const Tensor* input, const Tensor* weights,
const mkldnn::engine& mkldnn_engine) { const mkldnn::engine& mkldnn_engine) {
const std::string key = platform::CreateKey( const std::string key = platform::CreateKey(
input->format(), framework::vectorize<int>(weights->dims()), framework::vectorize<int>(weights->dims()), ctx.op().Output("Out"));
ctx.op().Output("Out"));
auto prim_creator = auto prim_creator =
std::static_pointer_cast<FCPrimitiveFactory<T>>(dev_ctx.GetBlob(key)); std::static_pointer_cast<FCPrimitiveFactory<T>>(dev_ctx.GetBlob(key));
......
...@@ -340,8 +340,7 @@ std::shared_ptr<MulPrimitiveFactory<XT, YT, OT>> GetPrimitiveFactory( ...@@ -340,8 +340,7 @@ std::shared_ptr<MulPrimitiveFactory<XT, YT, OT>> GetPrimitiveFactory(
const Tensor *input_x, const Tensor *input_y, const Tensor *input_x, const Tensor *input_y,
const mkldnn::engine &mkldnn_engine, bool enable_quant) { const mkldnn::engine &mkldnn_engine, bool enable_quant) {
const std::string key = platform::CreateKey( const std::string key = platform::CreateKey(
input_x->format(), input_x->type(), input_x->type(), framework::vectorize<int>(input_x->dims()),
framework::vectorize<int>(input_x->dims()), input_y->format(),
input_y->type(), framework::vectorize<int>(input_y->dims()), input_y->type(), framework::vectorize<int>(input_y->dims()),
ctx.op().Output("Out")); ctx.op().Output("Out"));
......
...@@ -45,7 +45,7 @@ class SoftmaxMKLDNNHandler ...@@ -45,7 +45,7 @@ class SoftmaxMKLDNNHandler
: platform::MKLDNNHandlerT<T, mkldnn::softmax_forward, : platform::MKLDNNHandlerT<T, mkldnn::softmax_forward,
mkldnn::softmax_backward>( mkldnn::softmax_backward>(
dev_ctx, dev_ctx.GetEngine(), cpu_place, dev_ctx, dev_ctx.GetEngine(), cpu_place,
platform::CreateKey(dims, axis, uniq_name)) { platform::CreateKey(dims, uniq_name)) {
auto md = mkldnn::memory::desc(dims, platform::MKLDNNGetDataType<T>(), fmt); auto md = mkldnn::memory::desc(dims, platform::MKLDNNGetDataType<T>(), fmt);
this->AcquireForwardPrimitiveDescriptor(prop_kind::forward_scoring, md, this->AcquireForwardPrimitiveDescriptor(prop_kind::forward_scoring, md,
...@@ -60,7 +60,7 @@ class SoftmaxMKLDNNHandler ...@@ -60,7 +60,7 @@ class SoftmaxMKLDNNHandler
: platform::MKLDNNHandlerT<T, mkldnn::softmax_forward, : platform::MKLDNNHandlerT<T, mkldnn::softmax_forward,
mkldnn::softmax_backward>( mkldnn::softmax_backward>(
dev_ctx, dev_ctx.GetEngine(), cpu_place, dev_ctx, dev_ctx.GetEngine(), cpu_place,
platform::CreateKey(dims, axis, uniq_name)) { platform::CreateKey(dims, uniq_name)) {
auto data_softmax_md = auto data_softmax_md =
mkldnn::memory::desc(dims, platform::MKLDNNGetDataType<T>(), fmt); mkldnn::memory::desc(dims, platform::MKLDNNGetDataType<T>(), fmt);
auto diff_softmax_md = auto diff_softmax_md =
......
...@@ -46,8 +46,7 @@ class TransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -46,8 +46,7 @@ class TransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto nchw_tz = paddle::framework::vectorize<int>(input->dims()); auto nchw_tz = paddle::framework::vectorize<int>(input->dims());
const std::string key = const std::string key =
platform::CreateKey(nchw_tz, axis, ctx.op().Output("Out") + platform::CreateKey(nchw_tz, ctx.op().Output("Out"));
std::to_string(input->format()));
platform::TransposeMKLDNNHandler handler(nchw_tz, axis, dev_ctx, platform::TransposeMKLDNNHandler handler(nchw_tz, axis, dev_ctx,
mkldnn_engine, key); mkldnn_engine, key);
...@@ -100,7 +99,7 @@ class TransposeMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> { ...@@ -100,7 +99,7 @@ class TransposeMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
auto nchw_tz = paddle::framework::vectorize<int>(out_grad->dims()); auto nchw_tz = paddle::framework::vectorize<int>(out_grad->dims());
const std::string key = platform::CreateKey( const std::string key = platform::CreateKey(
nchw_tz, axis, ctx.op().Output(framework::GradVarName("X"))); nchw_tz, ctx.op().Output(framework::GradVarName("X")));
platform::TransposeMKLDNNHandler handler(nchw_tz, reversed_axis, dev_ctx, platform::TransposeMKLDNNHandler handler(nchw_tz, reversed_axis, dev_ctx,
mkldnn_engine, key); mkldnn_engine, key);
......
...@@ -420,8 +420,7 @@ class ActivationMKLDNNHandler ...@@ -420,8 +420,7 @@ class ActivationMKLDNNHandler
: platform::MKLDNNHandlerT<T, mkldnn::eltwise_forward, : platform::MKLDNNHandlerT<T, mkldnn::eltwise_forward,
mkldnn::eltwise_backward>( mkldnn::eltwise_backward>(
dev_ctx, dev_ctx.GetEngine(), cpu_place, dev_ctx, dev_ctx.GetEngine(), cpu_place,
platform::CreateKey(dims, algorithm, fmt, alpha, beta, platform::CreateKey(dims, unique_name)) {
unique_name)) {
auto md = mkldnn::memory::desc(dims, platform::MKLDNNGetDataType<T>(), fmt); auto md = mkldnn::memory::desc(dims, platform::MKLDNNGetDataType<T>(), fmt);
this->AcquireForwardPrimitiveDescriptor( this->AcquireForwardPrimitiveDescriptor(
...@@ -441,8 +440,7 @@ class ActivationMKLDNNHandler ...@@ -441,8 +440,7 @@ class ActivationMKLDNNHandler
: platform::MKLDNNHandlerT<T, mkldnn::eltwise_forward, : platform::MKLDNNHandlerT<T, mkldnn::eltwise_forward,
mkldnn::eltwise_backward>( mkldnn::eltwise_backward>(
dev_ctx, dev_ctx.GetEngine(), cpu_place, dev_ctx, dev_ctx.GetEngine(), cpu_place,
platform::CreateKey(dims, algorithm, fmt, alpha, beta, platform::CreateKey(dims, unique_name)) {
unique_name)) {
auto diff_dst_md = platform::MKLDNNMemDesc( auto diff_dst_md = platform::MKLDNNMemDesc(
dims, platform::MKLDNNGetDataType<T>(), diff_fmt); dims, platform::MKLDNNGetDataType<T>(), diff_fmt);
auto src_md = auto src_md =
...@@ -473,7 +471,7 @@ class LRNMKLDNNHandler ...@@ -473,7 +471,7 @@ class LRNMKLDNNHandler
: platform::MKLDNNHandlerT<T, mkldnn::lrn_forward, mkldnn::lrn_backward>( : platform::MKLDNNHandlerT<T, mkldnn::lrn_forward, mkldnn::lrn_backward>(
dev_ctx, dev_ctx.GetEngine(), cpu_place, dev_ctx, dev_ctx.GetEngine(), cpu_place,
platform::CreateKey(dims, n, alpha, beta, k, fmt, unique_name)) { platform::CreateKey(dims, unique_name)) {
auto src_md = auto src_md =
mkldnn::memory::desc(dims, platform::MKLDNNGetDataType<T>(), fmt); mkldnn::memory::desc(dims, platform::MKLDNNGetDataType<T>(), fmt);
this->AcquireForwardPrimitiveDescriptor( this->AcquireForwardPrimitiveDescriptor(
...@@ -491,7 +489,7 @@ class LRNMKLDNNHandler ...@@ -491,7 +489,7 @@ class LRNMKLDNNHandler
: platform::MKLDNNHandlerT<T, mkldnn::lrn_forward, mkldnn::lrn_backward>( : platform::MKLDNNHandlerT<T, mkldnn::lrn_forward, mkldnn::lrn_backward>(
dev_ctx, dev_ctx.GetEngine(), cpu_place, dev_ctx, dev_ctx.GetEngine(), cpu_place,
platform::CreateKey(dims, n, alpha, beta, k, fmt, unique_name)) { platform::CreateKey(dims, unique_name)) {
auto src_md = auto src_md =
mkldnn::memory::desc(dims, platform::MKLDNNGetDataType<T>(), fmt); mkldnn::memory::desc(dims, platform::MKLDNNGetDataType<T>(), fmt);
auto diff_md = auto diff_md =
...@@ -533,8 +531,7 @@ class PoolingMKLDNNHandler : public MKLDNNHandlerT<T, mkldnn::pooling_forward, ...@@ -533,8 +531,7 @@ class PoolingMKLDNNHandler : public MKLDNNHandlerT<T, mkldnn::pooling_forward,
: platform::MKLDNNHandlerT<T, mkldnn::pooling_forward, : platform::MKLDNNHandlerT<T, mkldnn::pooling_forward,
mkldnn::pooling_backward>( mkldnn::pooling_backward>(
dev_ctx, dev_ctx.GetEngine(), cpu_place, dev_ctx, dev_ctx.GetEngine(), cpu_place,
platform::CreateKey(src_dims, pooling_type, ksize, strides, platform::CreateKey(src_dims, dt, unique_name)) {
paddings, dt, fmt, unique_name)) {
auto src_md = mkldnn::memory::desc(src_dims, dt, fmt); auto src_md = mkldnn::memory::desc(src_dims, dt, fmt);
/* create memory descriptor for pooling without specified format /* create memory descriptor for pooling without specified format
* ('any') which lets a primitive (pooling in this case) choose * ('any') which lets a primitive (pooling in this case) choose
...@@ -574,8 +571,7 @@ class PoolingMKLDNNHandler : public MKLDNNHandlerT<T, mkldnn::pooling_forward, ...@@ -574,8 +571,7 @@ class PoolingMKLDNNHandler : public MKLDNNHandlerT<T, mkldnn::pooling_forward,
: platform::MKLDNNHandlerT<T, mkldnn::pooling_forward, : platform::MKLDNNHandlerT<T, mkldnn::pooling_forward,
mkldnn::pooling_backward>( mkldnn::pooling_backward>(
dev_ctx, dev_ctx.GetEngine(), cpu_place, dev_ctx, dev_ctx.GetEngine(), cpu_place,
platform::CreateKey(diff_src_dims, pooling_type, ksize, strides, platform::CreateKey(diff_src_dims, dt, unique_name)) {
paddings, dt, fmt, unique_name)) {
auto diff_dst_md = mkldnn::memory::desc( auto diff_dst_md = mkldnn::memory::desc(
diff_dst_dims, platform::MKLDNNGetDataType<T>(), diff_dst_fmt); diff_dst_dims, platform::MKLDNNGetDataType<T>(), diff_dst_fmt);
auto diff_src_md = auto diff_src_md =
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册