diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index 18287f0c7a4eec2c93b049251ddc0b0863604828..d8eab0e9a729744d8c3608891cb2f636414821a6 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -1908,7 +1908,8 @@ Scope* OperatorWithKernel::PrepareData( (var->IsType() == true) && (expected_kernel_key.data_layout_ != DataLayout::kMKLDNN) && (paddle::platform::MKLDNNDeviceContext::tls() - .get_cur_paddle_data_layout() == DataLayout::kNHWC)) { + .get_cur_paddle_data_layout() == DataLayout::kNHWC) && + (tensor_in->dims().size() >= 3)) { // Mixed execution : MKL-DNN and GPU is not supported! if (!new_scope) { new_scope = &scope.NewScope(); diff --git a/paddle/fluid/operators/controlflow/conditional_block_op.cc b/paddle/fluid/operators/controlflow/conditional_block_op.cc index 6bf419c47a5669b87c0b47d48259362a66a23239..fd06e33a6bb6e03b8e90c47ae3edbb9ce18e0e85 100644 --- a/paddle/fluid/operators/controlflow/conditional_block_op.cc +++ b/paddle/fluid/operators/controlflow/conditional_block_op.cc @@ -17,6 +17,10 @@ limitations under the License. */ #include "paddle/fluid/operators/assign_op.h" #include "paddle/phi/kernels/funcs/math_function.h" +#ifdef PADDLE_WITH_MKLDNN +#include "paddle/fluid/platform/mkldnn_helper.h" +#endif + namespace paddle { namespace operators { @@ -65,6 +69,12 @@ class ConditionalBlockOp : public ConditionalOp { scopes->resize(1); scopes->front() = &scope.NewScope(); auto &cur_scope = *scopes->front(); +#ifdef PADDLE_WITH_MKLDNN + // (jczaja) Executor on being destroyed clears oneDNN cache and + // reset registered model data layout. This is unwanted for nested + // Executors (executors declared inside control ops) + platform::DontClearMKLDNNCache(dev_place); +#endif framework::Executor exec(dev_place); auto *block = Attr("sub_block"); VLOG(3) << "Conditional block.idx = " << block->ID() diff --git a/paddle/fluid/operators/controlflow/while_op.cc b/paddle/fluid/operators/controlflow/while_op.cc index eb44655c88f1848e26219fd87e03e9a4e8e587ae..d8daa25f31be8d0eac90667daed40367cc037d35 100644 --- a/paddle/fluid/operators/controlflow/while_op.cc +++ b/paddle/fluid/operators/controlflow/while_op.cc @@ -17,6 +17,9 @@ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/operators/controlflow/while_op_helper.h" +#ifdef PADDLE_WITH_MKLDNN +#include "paddle/fluid/platform/mkldnn_helper.h" +#endif namespace paddle { namespace framework { class InferShapeContext; @@ -66,6 +69,12 @@ class WhileOp : public framework::OperatorBase { "the Condition's shape is ", cond.dims().to_str(), ".\n")); +#ifdef PADDLE_WITH_MKLDNN + // (jczaja) Executor on being destroyed clears oneDNN cache and + // resets registered model data layout. This is unwanted for nested + // Executors (executors declared inside control ops) + platform::DontClearMKLDNNCache(dev_place); +#endif framework::Executor executor(dev_place); auto *block = Attr(kStepBlock); diff --git a/paddle/fluid/operators/crop_op.cc b/paddle/fluid/operators/crop_op.cc index f2beb4cec212e245a7e8a825b7b660c8633b2acb..9de5bc6ea36368e51f52916f50443a6a732c5b68 100644 --- a/paddle/fluid/operators/crop_op.cc +++ b/paddle/fluid/operators/crop_op.cc @@ -97,7 +97,7 @@ Crop Operator. Crop input into output, as specified by offsets and shape. There are two ways to set the offsets: -1. In runtime: Using the input 'Offsets', which is a Vairbale and can be +1. In runtime: Using the input 'Offsets', which is a Variable and can be output of other operators. This way is suitable for dynamic offsets. 2. In network configuration: Using the attribute 'offsets', which will be diff --git a/paddle/fluid/operators/mkldnn/nhwc_op_tests.cmake b/paddle/fluid/operators/mkldnn/nhwc_op_tests.cmake index 3ebfbdc50caab4cbe4f026f1ecdba2da6e5ce44d..8bad3e86b2934e51d763246ce1d4e1330da6e54a 100644 --- a/paddle/fluid/operators/mkldnn/nhwc_op_tests.cmake +++ b/paddle/fluid/operators/mkldnn/nhwc_op_tests.cmake @@ -1 +1 @@ -cc_test(test_mkldnn_op_nhwc SRCS mkldnn/test_mkldnn_op_nhwc.cc DEPS op_registry pool_op shape_op activation_op pooling transpose_op scope device_context enforce executor) +cc_test(test_mkldnn_op_nhwc SRCS mkldnn/test_mkldnn_op_nhwc.cc DEPS op_registry pool_op shape_op crop_op activation_op pooling transpose_op scope device_context enforce executor) diff --git a/paddle/fluid/operators/mkldnn/test_mkldnn_op_nhwc.cc b/paddle/fluid/operators/mkldnn/test_mkldnn_op_nhwc.cc index 4ff93ee3cd6241a5fe4b2190575472974845a4ac..b9866ba8c364794f67935c2855ae2b51c4b443b5 100644 --- a/paddle/fluid/operators/mkldnn/test_mkldnn_op_nhwc.cc +++ b/paddle/fluid/operators/mkldnn/test_mkldnn_op_nhwc.cc @@ -34,6 +34,8 @@ USE_OP_ITSELF(transpose); USE_OP_DEVICE_KERNEL(transpose, MKLDNN); USE_OP_ITSELF(shape); USE_OP_DEVICE_KERNEL(shape, MKLDNN); +USE_OP_ITSELF(crop); +USE_OP_DEVICE_KERNEL(crop, CPU); PD_DECLARE_KERNEL(pool2d, CPU, ALL_LAYOUT); PD_DECLARE_KERNEL(relu, CPU, ALL_LAYOUT); @@ -211,5 +213,68 @@ TEST(test_pool2d_shape_nhwc, cpu_place) { "Computed shape does not match expected shape")); } +TEST(test_pool2d_crop_nhwc, cpu_place) { + framework::DDim dims({1, 4, 8, 512}); // NHWC shape + framework::DDim expected_dims({1, 3, 7, 512}); // NCHW expected shape + platform::CPUPlace p; + framework::Scope scope; + + InputVars input_name = {"x", + scope.Var("x")->GetMutable()}; + InputVars second_crop_input_name = { + "v", scope.Var("v")->GetMutable()}; + // Initialize input data + std::uniform_real_distribution dist(10.0f, 20.0f); + std::mt19937 engine; + size_t numel = static_cast(phi::product(dims)); + input_name.tensor->Resize(dims); + auto data_ptr = input_name.tensor->mutable_data(p); + for (size_t i = 0; i < numel; ++i) { + data_ptr[i] = dist(engine); + } + // Second input (Y) to crop is having no buffer + // but as it is MKLDNN then its shape order should be NCHW + auto expected_dims_nchw = phi::vectorize(expected_dims); + std::rotate(expected_dims_nchw.begin() + 1, expected_dims_nchw.end() - 1, + expected_dims_nchw.end()); + second_crop_input_name.tensor->Resize(phi::make_ddim(expected_dims_nchw)); + const auto second_crop_input_md = + dnnl::memory::desc(expected_dims_nchw, dnnl::memory::data_type::f32, + dnnl::memory::format_tag::nhwc); + second_crop_input_name.tensor->set_mem_desc(second_crop_input_md); + + scope.Var("y")->GetMutable(); + auto *z = scope.Var("z")->GetMutable(); + + auto &pool = platform::DeviceContextPool::Instance(); + + // Make pool2d followed by crop. crop may have Y input as + // non buffered so the path to be executed is handling oneDNN kernel + // that is followed by CPU kernel with non-buffered Input + + auto ksize = std::vector(2, 2); + auto op_pool = framework::OpRegistry::CreateOp( + "pool2d", {{"X", {"x"}}}, {{"Out", {"y"}}}, + {{"pooling_type", {std::string("max")}}, + {"ksize", {ksize}}, + {"data_format", {std::string("NHWC")}}, + {"use_mkldnn", {true}}}); + + std::vector offsets{0, 0, 0, 0}; + auto op_crop = framework::OpRegistry::CreateOp( + "crop", {{"X", {"y"}}, {"Y", {"v"}}}, {{"Out", {"z"}}}, + {{"offsets", {offsets}}}); + + op_pool->Run(scope, p); + op_crop->Run(scope, p); + + pool.Get(p)->Wait(); + + // Verify shape of output + PADDLE_ENFORCE_EQ(z->dims(), expected_dims, + platform::errors::InvalidArgument( + "Output shape does not match expected output shape")); +} + } // namespace operators } // namespace paddle diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index 0bf5ca7f8f52572943c711eb6b960aedb7b13acd..09a29c3429cba5ce71da7138097bded7572913f5 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -750,7 +750,7 @@ dnnl::stream& MKLDNNDeviceContextThreadLocals::Body::get_stream(void) { void MKLDNNDeviceContext::ResetBlobMap(void* ptr) { VLOG(4) << tls().get_curr_exec() << " " << ptr; std::lock_guard lock(*p_mutex_); - if (!block_next_cache_clearing_) { + if (block_next_cache_clearing_ == 0) { VLOG(3) << "Clearing DNNL cache."; // If no specific executor pointer then clear // everything. For executor pointer then clear only @@ -768,9 +768,20 @@ void MKLDNNDeviceContext::ResetBlobMap(void* ptr) { s.second->erase(ptr); } } + // Reset paddle layout to NCHW + VLOG(3) << "Resetting Paddle data layout to NCHW."; + platform::MKLDNNDeviceContext::tls().set_cur_paddle_data_layout( + paddle::framework::DataLayout::kNCHW); } else { - VLOG(3) << "Prevented Clearing DNNL cache."; - block_next_cache_clearing_ = false; + --block_next_cache_clearing_; + VLOG(3) << "Prevented Clearing DNNL cache. Updated " + "block_next_cache_clearing_ : " + << block_next_cache_clearing_; + PADDLE_ENFORCE_GE(block_next_cache_clearing_, 0, + platform::errors::InvalidArgument( + "Cache clearing mark should be non-negative " + ". But received %d.", + block_next_cache_clearing_)); } } @@ -796,8 +807,10 @@ void MKLDNNDeviceContext::LinkEntryWithExecutor(BlobPtr_t pblob, void MKLDNNDeviceContext::BlockNextCacheClearing() { std::lock_guard lock(*p_mutex_); - VLOG(3) << "Next DNNL cache clearing has been blocked."; - block_next_cache_clearing_ = true; + ++block_next_cache_clearing_; + VLOG(3) << "Next DNNL cache clearing has been blocked. Updated " + "block_next_cache_clearing_ : " + << block_next_cache_clearing_; } size_t MKLDNNDeviceContext::GetShapeBlobSize() const { diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index 2b53ecf86a641f3579f6ff6f6c78f6c2c55d83da..a63d41405f1b2dac733d419e954d74ba35f63252 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -850,7 +850,8 @@ class MKLDNNDeviceContext : public CPUDeviceContext { // to erase std::shared_ptr p_exec_items_; std::shared_ptr p_mutex_; - bool block_next_cache_clearing_ = false; + // 0 - clearing is allowed. x > 0 do not clear. + unsigned int block_next_cache_clearing_ = 0; }; #endif diff --git a/paddle/fluid/platform/mkldnn_helper.h b/paddle/fluid/platform/mkldnn_helper.h index 94c0124440ea9e671b0a9b2e17bdccd6923b20f5..5e77046962931302f06080a5dd1275d7fd2b0d7a 100644 --- a/paddle/fluid/platform/mkldnn_helper.h +++ b/paddle/fluid/platform/mkldnn_helper.h @@ -148,8 +148,6 @@ inline void ClearMKLDNNCache(const platform::Place& place, platform::MKLDNNDeviceContext* dev_ctx = (platform::MKLDNNDeviceContext*)pool.Get(place); dev_ctx->ResetBlobMap(ptr); - platform::MKLDNNDeviceContext::tls().set_cur_paddle_data_layout( - paddle::framework::DataLayout::kNCHW); } }