From d414af940a956b51c0586b14f5b65265284bfe1a Mon Sep 17 00:00:00 2001 From: Jacek Czaja Date: Mon, 23 May 2022 09:38:36 +0200 Subject: [PATCH] [Internal reviewing] NHWC fix to am_vocoder model for oneDNN 2.6 (#42729) * - prototype of reimplemented fixes * - compilation fixes * - compilation fix * - cosmetic info * - hopefully fix * - compilation fix * - supported for nested blocking of cache clearing * - fix * - Unit test to changes * - Compilation fix to windows (hopefully) * - Moved resetting layout to ResetBlob * - fixes after review --- paddle/fluid/framework/operator.cc | 3 +- .../controlflow/conditional_block_op.cc | 10 +++ .../fluid/operators/controlflow/while_op.cc | 9 +++ paddle/fluid/operators/crop_op.cc | 2 +- .../operators/mkldnn/nhwc_op_tests.cmake | 2 +- .../operators/mkldnn/test_mkldnn_op_nhwc.cc | 65 +++++++++++++++++++ paddle/fluid/platform/device_context.cc | 23 +++++-- paddle/fluid/platform/device_context.h | 3 +- paddle/fluid/platform/mkldnn_helper.h | 2 - 9 files changed, 108 insertions(+), 11 deletions(-) diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index 18287f0c7a4..d8eab0e9a72 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -1908,7 +1908,8 @@ Scope* OperatorWithKernel::PrepareData( (var->IsType() == true) && (expected_kernel_key.data_layout_ != DataLayout::kMKLDNN) && (paddle::platform::MKLDNNDeviceContext::tls() - .get_cur_paddle_data_layout() == DataLayout::kNHWC)) { + .get_cur_paddle_data_layout() == DataLayout::kNHWC) && + (tensor_in->dims().size() >= 3)) { // Mixed execution : MKL-DNN and GPU is not supported! if (!new_scope) { new_scope = &scope.NewScope(); diff --git a/paddle/fluid/operators/controlflow/conditional_block_op.cc b/paddle/fluid/operators/controlflow/conditional_block_op.cc index 6bf419c47a5..fd06e33a6bb 100644 --- a/paddle/fluid/operators/controlflow/conditional_block_op.cc +++ b/paddle/fluid/operators/controlflow/conditional_block_op.cc @@ -17,6 +17,10 @@ limitations under the License. */ #include "paddle/fluid/operators/assign_op.h" #include "paddle/phi/kernels/funcs/math_function.h" +#ifdef PADDLE_WITH_MKLDNN +#include "paddle/fluid/platform/mkldnn_helper.h" +#endif + namespace paddle { namespace operators { @@ -65,6 +69,12 @@ class ConditionalBlockOp : public ConditionalOp { scopes->resize(1); scopes->front() = &scope.NewScope(); auto &cur_scope = *scopes->front(); +#ifdef PADDLE_WITH_MKLDNN + // (jczaja) Executor on being destroyed clears oneDNN cache and + // reset registered model data layout. This is unwanted for nested + // Executors (executors declared inside control ops) + platform::DontClearMKLDNNCache(dev_place); +#endif framework::Executor exec(dev_place); auto *block = Attr("sub_block"); VLOG(3) << "Conditional block.idx = " << block->ID() diff --git a/paddle/fluid/operators/controlflow/while_op.cc b/paddle/fluid/operators/controlflow/while_op.cc index eb44655c88f..d8daa25f31b 100644 --- a/paddle/fluid/operators/controlflow/while_op.cc +++ b/paddle/fluid/operators/controlflow/while_op.cc @@ -17,6 +17,9 @@ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/operators/controlflow/while_op_helper.h" +#ifdef PADDLE_WITH_MKLDNN +#include "paddle/fluid/platform/mkldnn_helper.h" +#endif namespace paddle { namespace framework { class InferShapeContext; @@ -66,6 +69,12 @@ class WhileOp : public framework::OperatorBase { "the Condition's shape is ", cond.dims().to_str(), ".\n")); +#ifdef PADDLE_WITH_MKLDNN + // (jczaja) Executor on being destroyed clears oneDNN cache and + // resets registered model data layout. This is unwanted for nested + // Executors (executors declared inside control ops) + platform::DontClearMKLDNNCache(dev_place); +#endif framework::Executor executor(dev_place); auto *block = Attr(kStepBlock); diff --git a/paddle/fluid/operators/crop_op.cc b/paddle/fluid/operators/crop_op.cc index f2beb4cec21..9de5bc6ea36 100644 --- a/paddle/fluid/operators/crop_op.cc +++ b/paddle/fluid/operators/crop_op.cc @@ -97,7 +97,7 @@ Crop Operator. Crop input into output, as specified by offsets and shape. There are two ways to set the offsets: -1. In runtime: Using the input 'Offsets', which is a Vairbale and can be +1. In runtime: Using the input 'Offsets', which is a Variable and can be output of other operators. This way is suitable for dynamic offsets. 2. In network configuration: Using the attribute 'offsets', which will be diff --git a/paddle/fluid/operators/mkldnn/nhwc_op_tests.cmake b/paddle/fluid/operators/mkldnn/nhwc_op_tests.cmake index 3ebfbdc50ca..8bad3e86b29 100644 --- a/paddle/fluid/operators/mkldnn/nhwc_op_tests.cmake +++ b/paddle/fluid/operators/mkldnn/nhwc_op_tests.cmake @@ -1 +1 @@ -cc_test(test_mkldnn_op_nhwc SRCS mkldnn/test_mkldnn_op_nhwc.cc DEPS op_registry pool_op shape_op activation_op pooling transpose_op scope device_context enforce executor) +cc_test(test_mkldnn_op_nhwc SRCS mkldnn/test_mkldnn_op_nhwc.cc DEPS op_registry pool_op shape_op crop_op activation_op pooling transpose_op scope device_context enforce executor) diff --git a/paddle/fluid/operators/mkldnn/test_mkldnn_op_nhwc.cc b/paddle/fluid/operators/mkldnn/test_mkldnn_op_nhwc.cc index 4ff93ee3cd6..b9866ba8c36 100644 --- a/paddle/fluid/operators/mkldnn/test_mkldnn_op_nhwc.cc +++ b/paddle/fluid/operators/mkldnn/test_mkldnn_op_nhwc.cc @@ -34,6 +34,8 @@ USE_OP_ITSELF(transpose); USE_OP_DEVICE_KERNEL(transpose, MKLDNN); USE_OP_ITSELF(shape); USE_OP_DEVICE_KERNEL(shape, MKLDNN); +USE_OP_ITSELF(crop); +USE_OP_DEVICE_KERNEL(crop, CPU); PD_DECLARE_KERNEL(pool2d, CPU, ALL_LAYOUT); PD_DECLARE_KERNEL(relu, CPU, ALL_LAYOUT); @@ -211,5 +213,68 @@ TEST(test_pool2d_shape_nhwc, cpu_place) { "Computed shape does not match expected shape")); } +TEST(test_pool2d_crop_nhwc, cpu_place) { + framework::DDim dims({1, 4, 8, 512}); // NHWC shape + framework::DDim expected_dims({1, 3, 7, 512}); // NCHW expected shape + platform::CPUPlace p; + framework::Scope scope; + + InputVars input_name = {"x", + scope.Var("x")->GetMutable()}; + InputVars second_crop_input_name = { + "v", scope.Var("v")->GetMutable()}; + // Initialize input data + std::uniform_real_distribution dist(10.0f, 20.0f); + std::mt19937 engine; + size_t numel = static_cast(phi::product(dims)); + input_name.tensor->Resize(dims); + auto data_ptr = input_name.tensor->mutable_data(p); + for (size_t i = 0; i < numel; ++i) { + data_ptr[i] = dist(engine); + } + // Second input (Y) to crop is having no buffer + // but as it is MKLDNN then its shape order should be NCHW + auto expected_dims_nchw = phi::vectorize(expected_dims); + std::rotate(expected_dims_nchw.begin() + 1, expected_dims_nchw.end() - 1, + expected_dims_nchw.end()); + second_crop_input_name.tensor->Resize(phi::make_ddim(expected_dims_nchw)); + const auto second_crop_input_md = + dnnl::memory::desc(expected_dims_nchw, dnnl::memory::data_type::f32, + dnnl::memory::format_tag::nhwc); + second_crop_input_name.tensor->set_mem_desc(second_crop_input_md); + + scope.Var("y")->GetMutable(); + auto *z = scope.Var("z")->GetMutable(); + + auto &pool = platform::DeviceContextPool::Instance(); + + // Make pool2d followed by crop. crop may have Y input as + // non buffered so the path to be executed is handling oneDNN kernel + // that is followed by CPU kernel with non-buffered Input + + auto ksize = std::vector(2, 2); + auto op_pool = framework::OpRegistry::CreateOp( + "pool2d", {{"X", {"x"}}}, {{"Out", {"y"}}}, + {{"pooling_type", {std::string("max")}}, + {"ksize", {ksize}}, + {"data_format", {std::string("NHWC")}}, + {"use_mkldnn", {true}}}); + + std::vector offsets{0, 0, 0, 0}; + auto op_crop = framework::OpRegistry::CreateOp( + "crop", {{"X", {"y"}}, {"Y", {"v"}}}, {{"Out", {"z"}}}, + {{"offsets", {offsets}}}); + + op_pool->Run(scope, p); + op_crop->Run(scope, p); + + pool.Get(p)->Wait(); + + // Verify shape of output + PADDLE_ENFORCE_EQ(z->dims(), expected_dims, + platform::errors::InvalidArgument( + "Output shape does not match expected output shape")); +} + } // namespace operators } // namespace paddle diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index 0bf5ca7f8f5..09a29c3429c 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -750,7 +750,7 @@ dnnl::stream& MKLDNNDeviceContextThreadLocals::Body::get_stream(void) { void MKLDNNDeviceContext::ResetBlobMap(void* ptr) { VLOG(4) << tls().get_curr_exec() << " " << ptr; std::lock_guard lock(*p_mutex_); - if (!block_next_cache_clearing_) { + if (block_next_cache_clearing_ == 0) { VLOG(3) << "Clearing DNNL cache."; // If no specific executor pointer then clear // everything. For executor pointer then clear only @@ -768,9 +768,20 @@ void MKLDNNDeviceContext::ResetBlobMap(void* ptr) { s.second->erase(ptr); } } + // Reset paddle layout to NCHW + VLOG(3) << "Resetting Paddle data layout to NCHW."; + platform::MKLDNNDeviceContext::tls().set_cur_paddle_data_layout( + paddle::framework::DataLayout::kNCHW); } else { - VLOG(3) << "Prevented Clearing DNNL cache."; - block_next_cache_clearing_ = false; + --block_next_cache_clearing_; + VLOG(3) << "Prevented Clearing DNNL cache. Updated " + "block_next_cache_clearing_ : " + << block_next_cache_clearing_; + PADDLE_ENFORCE_GE(block_next_cache_clearing_, 0, + platform::errors::InvalidArgument( + "Cache clearing mark should be non-negative " + ". But received %d.", + block_next_cache_clearing_)); } } @@ -796,8 +807,10 @@ void MKLDNNDeviceContext::LinkEntryWithExecutor(BlobPtr_t pblob, void MKLDNNDeviceContext::BlockNextCacheClearing() { std::lock_guard lock(*p_mutex_); - VLOG(3) << "Next DNNL cache clearing has been blocked."; - block_next_cache_clearing_ = true; + ++block_next_cache_clearing_; + VLOG(3) << "Next DNNL cache clearing has been blocked. Updated " + "block_next_cache_clearing_ : " + << block_next_cache_clearing_; } size_t MKLDNNDeviceContext::GetShapeBlobSize() const { diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index 2b53ecf86a6..a63d41405f1 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -850,7 +850,8 @@ class MKLDNNDeviceContext : public CPUDeviceContext { // to erase std::shared_ptr p_exec_items_; std::shared_ptr p_mutex_; - bool block_next_cache_clearing_ = false; + // 0 - clearing is allowed. x > 0 do not clear. + unsigned int block_next_cache_clearing_ = 0; }; #endif diff --git a/paddle/fluid/platform/mkldnn_helper.h b/paddle/fluid/platform/mkldnn_helper.h index 94c0124440e..5e770469629 100644 --- a/paddle/fluid/platform/mkldnn_helper.h +++ b/paddle/fluid/platform/mkldnn_helper.h @@ -148,8 +148,6 @@ inline void ClearMKLDNNCache(const platform::Place& place, platform::MKLDNNDeviceContext* dev_ctx = (platform::MKLDNNDeviceContext*)pool.Get(place); dev_ctx->ResetBlobMap(ptr); - platform::MKLDNNDeviceContext::tls().set_cur_paddle_data_layout( - paddle::framework::DataLayout::kNCHW); } } -- GitLab