From 6588d0e039b36be9febd51683b6cad17264628ab Mon Sep 17 00:00:00 2001 From: Michal Gallus Date: Mon, 13 Aug 2018 12:20:06 +0200 Subject: [PATCH] Update MKLDNN to 0.15, fix conv integration --- cmake/external/mkldnn.cmake | 2 +- paddle/fluid/framework/tensor.cc | 9 ++++---- paddle/fluid/framework/tensor.h | 14 +++++++----- paddle/fluid/framework/tensor_impl.h | 9 ++++---- paddle/fluid/operators/conv_mkldnn_op.cc | 28 +++++++++++++++++------- 5 files changed, 39 insertions(+), 23 deletions(-) diff --git a/cmake/external/mkldnn.cmake b/cmake/external/mkldnn.cmake index 260985cc8aa..baf253df275 100644 --- a/cmake/external/mkldnn.cmake +++ b/cmake/external/mkldnn.cmake @@ -54,7 +54,7 @@ ExternalProject_Add( ${EXTERNAL_PROJECT_LOG_ARGS} DEPENDS ${MKLDNN_DEPENDS} GIT_REPOSITORY "https://github.com/01org/mkl-dnn.git" - GIT_TAG "a29d8487a63afca3d5b8c5bbdbb473cf8ccc6e51" + GIT_TAG "64e03a1939e0d526aa8e9f2e3f7dc0ad8d372944" PREFIX ${MKLDNN_SOURCES_DIR} UPDATE_COMMAND "" CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} diff --git a/paddle/fluid/framework/tensor.cc b/paddle/fluid/framework/tensor.cc index 56bb9142dab..222a51672fc 100644 --- a/paddle/fluid/framework/tensor.cc +++ b/paddle/fluid/framework/tensor.cc @@ -31,7 +31,8 @@ size_t Tensor::memory_size() const { return holder_ == nullptr ? 0UL : holder_->size() - offset_; } -void* Tensor::mutable_data(platform::Place place, std::type_index type) { +void* Tensor::mutable_data(platform::Place place, std::type_index type, + int64_t requested_size) { if (holder_ != nullptr) { holder_->set_type(type); } @@ -39,7 +40,7 @@ void* Tensor::mutable_data(platform::Place place, std::type_index type) { "When calling this method, the Tensor's numel must be " "equal or larger than zero. " "Please check Tensor::Resize has been called first."); - int64_t size = numel() * SizeOfType(type); + int64_t size = requested_size ? requested_size : numel() * SizeOfType(type); /* some versions of boost::variant don't have operator!= */ if (holder_ == nullptr || !(holder_->place() == place) || holder_->size() < size + offset_) { @@ -68,10 +69,10 @@ void* Tensor::mutable_data(platform::Place place, std::type_index type) { offset_); } -void* Tensor::mutable_data(platform::Place place) { +void* Tensor::mutable_data(platform::Place place, int64_t requested_size) { PADDLE_ENFORCE(this->holder_ != nullptr, "Cannot invoke mutable data if current hold nothing."); - return mutable_data(place, holder_->type()); + return mutable_data(place, holder_->type(), requested_size); } Tensor& Tensor::ShareDataWith(const Tensor& src) { diff --git a/paddle/fluid/framework/tensor.h b/paddle/fluid/framework/tensor.h index 0bbfd66148e..a4454c90b06 100644 --- a/paddle/fluid/framework/tensor.h +++ b/paddle/fluid/framework/tensor.h @@ -89,22 +89,24 @@ class Tensor { * @note If not exist, then allocation. */ template - T* mutable_data(platform::Place place); + T* mutable_data(platform::Place place, int64_t requested_size = 0); - void* mutable_data(platform::Place place, std::type_index type); + void* mutable_data(platform::Place place, std::type_index type, + int64_t requested_size = 0); - void* mutable_data(platform::Place place); + void* mutable_data(platform::Place place, int64_t requested_size = 0); /** * @brief Return a pointer to mutable memory block. * - * @param[in] dims The dimensions of the memory block. - * @param[in] place The place of the memory block. + * @param[in] dims The dimensions of the memory block. + * @param[in] place The place of the memory block. + * @param[in] requested_size The size of the block in bytes. * * @note If not exist, then allocation. */ template - T* mutable_data(DDim dims, platform::Place place); + T* mutable_data(DDim dims, platform::Place place, int64_t requested_size = 0); /*! Return the dimensions of the memory block. */ const DDim& dims() const; diff --git a/paddle/fluid/framework/tensor_impl.h b/paddle/fluid/framework/tensor_impl.h index b7b62eef23e..ea10c9a2658 100644 --- a/paddle/fluid/framework/tensor_impl.h +++ b/paddle/fluid/framework/tensor_impl.h @@ -46,16 +46,17 @@ inline T* Tensor::data() { } template -inline T* Tensor::mutable_data(DDim dims, platform::Place place) { +inline T* Tensor::mutable_data(DDim dims, platform::Place place, + int64_t requested_size) { static_assert(std::is_pod::value, "T must be POD"); Resize(dims); - return mutable_data(place); + return mutable_data(place, requested_size); } template -inline T* Tensor::mutable_data(platform::Place place) { +inline T* Tensor::mutable_data(platform::Place place, int64_t requested_size) { static_assert(std::is_pod::value, "T must be POD"); - return reinterpret_cast(mutable_data(place, typeid(T))); + return reinterpret_cast(mutable_data(place, typeid(T), requested_size)); } inline Tensor ReshapeToMatrix(const Tensor& src, int num_col_dims) { diff --git a/paddle/fluid/operators/conv_mkldnn_op.cc b/paddle/fluid/operators/conv_mkldnn_op.cc index f07ab5a33b8..77d0cf07a86 100644 --- a/paddle/fluid/operators/conv_mkldnn_op.cc +++ b/paddle/fluid/operators/conv_mkldnn_op.cc @@ -53,6 +53,18 @@ class ConvMKLDNNHandler : public platform::MKLDNNHandler { key_ += "-BWD"; } + size_t GetDstMemorySize() { + return conv_pd_->dst_primitive_desc().get_size(); + } + + size_t GetDiffWeightsMemorySize() { + return conv_bwd_weights_pd_->diff_weights_primitive_desc().get_size(); + } + + size_t GetDiffSourceMemorySize() { + return conv_bwd_data_pd_->diff_src_primitive_desc().get_size(); + } + std::shared_ptr AcquireSrcMemoryFromWeightsPrimitive( const std::shared_ptr user_memory_p, std::vector& pipeline) { // NOLINT @@ -251,7 +263,6 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { const T* input_data = input->data(); const T* filter_data = filter->data(); - T* output_data = output->mutable_data(ctx.GetPlace()); PADDLE_ENFORCE(input->dims().size() == 4, "Input must be with 4 dimensions, i.e. NCHW"); @@ -306,6 +317,8 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { auto user_weights_memory_p = handler.AcquireWeightsMemory( user_weights_md, to_void_cast(filter_data)); + T* output_data = + output->mutable_data(ctx.GetPlace(), handler.GetDstMemorySize()); // create reorder primitive if the input format is not the preferred one auto src_memory_p = handler.AcquireSrcMemoryFromPrimitive(user_src_memory_p, pipeline); @@ -393,13 +406,6 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel { T* input_grad_data = nullptr; T* filter_grad_data = nullptr; - if (input_grad) { - input_grad_data = input_grad->mutable_data(ctx.GetPlace()); - } - if (filter_grad) { - filter_grad_data = filter_grad->mutable_data(ctx.GetPlace()); - } - std::vector src_tz = paddle::framework::vectorize2int(input->dims()); std::vector weights_tz = paddle::framework::vectorize2int(filter->dims()); @@ -485,6 +491,9 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel { handler.AcquireDiffDstMemoryFromWeightsPrimitive( user_diff_dst_memory_p, pipeline); + size_t size = handler.GetDiffWeightsMemorySize(); + filter_grad_data = filter_grad->mutable_data(ctx.GetPlace(), size); + auto diff_weights_memory_p = handler.AcquireDiffWeightsMemoryFromWeightsPrimitive( reinterpret_cast(filter_grad_data)); @@ -507,6 +516,9 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel { handler.AcquireDiffDstMemoryFromDataPrimitive(user_diff_dst_memory_p, pipeline); + size_t size = handler.GetDiffSourceMemorySize(); + input_grad_data = input_grad->mutable_data(ctx.GetPlace(), size); + auto diff_src_memory_p = handler.AcquireDiffSrcMemoryFromDataPrimitive( reinterpret_cast(input_grad_data)); -- GitLab