diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index 6009f0d2d0cf59097eea81c682849264b1a41cce..4cb141c421a8826412d53b7b11156241e75c3bc5 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -155,6 +155,7 @@ cc_test(op_debug_string_test SRCS op_debug_string_test.cc DEPS elementwise_add_o if(WITH_MKLDNN) include(mkldnn/inplace_op_tests.cmake) +include(mkldnn/caching_tests.cmake) include(mkldnn/nhwc_op_tests.cmake) endif() diff --git a/paddle/fluid/operators/mkldnn/caching_tests.cmake b/paddle/fluid/operators/mkldnn/caching_tests.cmake new file mode 100644 index 0000000000000000000000000000000000000000..ff910a18767dc86d179fe13d53d53f0596192b95 --- /dev/null +++ b/paddle/fluid/operators/mkldnn/caching_tests.cmake @@ -0,0 +1 @@ +cc_test(test_mkldnn_caching SRCS mkldnn/test_mkldnn_caching.cc DEPS op_registry elementwise_add_op activation_op softmax_op softmax scope device_context enforce) diff --git a/paddle/fluid/operators/mkldnn/test_mkldnn_caching.cc b/paddle/fluid/operators/mkldnn/test_mkldnn_caching.cc new file mode 100644 index 0000000000000000000000000000000000000000..f88b0d56218b5f7231fbebbd9c58d5e7d5b1ca3c --- /dev/null +++ b/paddle/fluid/operators/mkldnn/test_mkldnn_caching.cc @@ -0,0 +1,169 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include "gtest/gtest.h" +#include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/platform/device_context.h" +#include "paddle/fluid/platform/enforce.h" +#include "paddle/fluid/platform/place.h" + +USE_OP(elementwise_add); +USE_OP_DEVICE_KERNEL(elementwise_add, MKLDNN); +USE_OP(relu); +USE_OP_DEVICE_KERNEL(relu, MKLDNN); +USE_OP(softmax); +USE_OP_DEVICE_KERNEL(softmax, MKLDNN); + +namespace paddle { +namespace operators { + +struct InputVars { + std::string name; + framework::LoDTensor *tensor; +}; + +class CacheTester { + public: + CacheTester() { + // Clear oneDNN cache + auto &pool = platform::DeviceContextPool::Instance(); + platform::CPUPlace place; + onednn_dev_ctx_ = + dynamic_cast(pool.Get(place)); + onednn_dev_ctx_->ResetBlobMap(); + } + + bool Analyze(unsigned short int num_entries) { + // Number of created objects in cache should be as expected (num_entries) + return onednn_dev_ctx_->GetCachedObjectsNumber() == num_entries; + } + + private: + platform::MKLDNNDeviceContext *onednn_dev_ctx_; +}; + +template +void RunOperator(const platform::Place &place, const std::string &op_type, + const framework::DDim &dims, const std::string &output_name, + bool inplace = false) { + framework::Scope scope; + + std::map num_inputs = { + {"softmax", 1}, {"relu", 1}, {"elementwise_add", 2}}; + + std::string first_input = inplace == true ? output_name : "x"; + + std::vector input_names = { + {first_input, scope.Var(first_input)->GetMutable()}, + {"x1", num_inputs[op_type] > 1 + ? scope.Var("x1")->GetMutable() + : nullptr}, + {"x2", num_inputs[op_type] > 2 + ? scope.Var("x2")->GetMutable() + : nullptr}, + {"x3", num_inputs[op_type] > 3 + ? scope.Var("x3")->GetMutable() + : nullptr}, + {"x4", num_inputs[op_type] > 4 + ? scope.Var("x4")->GetMutable() + : nullptr}}; + auto *y = scope.Var(output_name)->GetMutable(); + + // Initialize input data + std::uniform_real_distribution dist(static_cast(10.0), + static_cast(20.0)); + std::mt19937 engine; + size_t numel = static_cast(framework::product(dims)); + for (int i = 0; i < num_inputs[op_type]; ++i) { + input_names[i].tensor->Resize(dims); + auto data_ptr = input_names[i].tensor->mutable_data(place); + for (size_t i = 0; i < numel; ++i) { + data_ptr[i] = dist(engine); + } + } + + // Initialize output + y->Resize(dims); + auto y_ptr = y->mutable_data(place); + for (size_t i = 0; i < numel; ++i) { + y_ptr[i] = static_cast(0); + } + + auto &pool = platform::DeviceContextPool::Instance(); + + auto op = num_inputs[op_type] > 1 + ? framework::OpRegistry::CreateOp( + op_type, {{"X", {first_input}}, {"Y", {"x1"}}}, + {{"Out", {output_name}}}, {{"use_mkldnn", {true}}}) + : framework::OpRegistry::CreateOp( + op_type, {{"X", {first_input}}}, {{"Out", {output_name}}}, + {{"use_mkldnn", {true}}}); + + op->Run(scope, place); + pool.Get(place)->Wait(); +} + +TEST(test_softmax_reuse_cache, cpu_place) { + framework::DDim dims({32, 64}); + platform::CPUPlace p; + CacheTester ct; + RunOperator(p, "softmax", dims, "softmax_out"); + RunOperator(p, "softmax", dims, "softmax_out"); + PADDLE_ENFORCE_EQ(ct.Analyze(4), true, + platform::errors::InvalidArgument( + "Wrong number of cached oneDNN objects")); +} + +TEST(test_softmax_noreuse_cache, cpu_place) { + framework::DDim dims({32, 64}); + platform::CPUPlace p; + CacheTester ct; + RunOperator(p, "softmax", dims, "softmax_out"); + RunOperator(p, "softmax", dims, "softmax_out2"); + PADDLE_ENFORCE_EQ(ct.Analyze(8), true, + platform::errors::InvalidArgument( + "Wrong number of cached oneDNN objects")); +} + +TEST(test_softmax_inplace_cache, cpu_place) { + framework::DDim dims({32, 64}); + platform::CPUPlace p; + CacheTester ct; + RunOperator(p, "softmax", dims, "softmax_out"); + RunOperator(p, "softmax", dims, "softmax_out", true); + PADDLE_ENFORCE_EQ(ct.Analyze(4), true, + platform::errors::InvalidArgument( + "Wrong number of cached oneDNN objects")); +} + +TEST(test_elementwise_add_reuse_cache, cpu_place) { + framework::DDim dims({32, 64}); + platform::CPUPlace p; + CacheTester ct; + RunOperator(p, "elementwise_add", dims, "elementwise_add_out"); + RunOperator(p, "relu", dims, "elementwise_add_out", true); + PADDLE_ENFORCE_EQ(ct.Analyze(8), true, + platform::errors::InvalidArgument( + "Wrong number of cached oneDNN objects")); +} + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index 61a60383b939483df73c4cf7f13c5ef051bfd171..8aa67c877ab58cf2f01b34b792eeb905b8995cd2 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -581,6 +581,16 @@ void MKLDNNDeviceContext::SetBlob(const std::string& name, return; } +unsigned int MKLDNNDeviceContext::GetCachedObjectsNumber(void) { + unsigned int num_entries = 0; + for (auto const& l3 : *p_blobmap_) { + for (auto const& l2 : *(l3.second)) { + num_entries += (l2.second)->size(); + } + } + return num_entries; +} + MKLDNNDeviceContext::BlobPtr_t MKLDNNDeviceContext::GetBlob( const std::string& name) const { BlobMap* pMap = p_blobmap_.get(); diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index f0ce89aa5efd86b5f6b11a04388acb8d4166e302..2fefb3c041fb3fc6ba051c51100c5a70b8b1773e 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -564,6 +564,9 @@ class MKLDNNDeviceContext : public CPUDeviceContext { // Set data to blob (i.e. name/data pair). Create blob if not existing void SetBlob(const std::string& name, std::shared_ptr data) const; + // Calculate number of oneDNN objects cached + unsigned int GetCachedObjectsNumber(void); + // Find a saved blob. Return nullptr if not found std::shared_ptr GetBlob(const std::string& name) const;