diff --git a/cmake/external/mkldnn.cmake b/cmake/external/mkldnn.cmake index 260985cc8aa4ad0f231798666c048703b64c6d15..baf253df2755657b01b67c410f63b7d8422d4df3 100644 --- a/cmake/external/mkldnn.cmake +++ b/cmake/external/mkldnn.cmake @@ -54,7 +54,7 @@ ExternalProject_Add( ${EXTERNAL_PROJECT_LOG_ARGS} DEPENDS ${MKLDNN_DEPENDS} GIT_REPOSITORY "https://github.com/01org/mkl-dnn.git" - GIT_TAG "a29d8487a63afca3d5b8c5bbdbb473cf8ccc6e51" + GIT_TAG "64e03a1939e0d526aa8e9f2e3f7dc0ad8d372944" PREFIX ${MKLDNN_SOURCES_DIR} UPDATE_COMMAND "" CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} diff --git a/paddle/fluid/framework/tensor.cc b/paddle/fluid/framework/tensor.cc index 56bb9142dabe0d5546e321e675a5acba7bf4d306..222a51672fc827b39b5f6ba2d5e5e3384997e567 100644 --- a/paddle/fluid/framework/tensor.cc +++ b/paddle/fluid/framework/tensor.cc @@ -31,7 +31,8 @@ size_t Tensor::memory_size() const { return holder_ == nullptr ? 0UL : holder_->size() - offset_; } -void* Tensor::mutable_data(platform::Place place, std::type_index type) { +void* Tensor::mutable_data(platform::Place place, std::type_index type, + int64_t requested_size) { if (holder_ != nullptr) { holder_->set_type(type); } @@ -39,7 +40,7 @@ void* Tensor::mutable_data(platform::Place place, std::type_index type) { "When calling this method, the Tensor's numel must be " "equal or larger than zero. " "Please check Tensor::Resize has been called first."); - int64_t size = numel() * SizeOfType(type); + int64_t size = requested_size ? requested_size : numel() * SizeOfType(type); /* some versions of boost::variant don't have operator!= */ if (holder_ == nullptr || !(holder_->place() == place) || holder_->size() < size + offset_) { @@ -68,10 +69,10 @@ void* Tensor::mutable_data(platform::Place place, std::type_index type) { offset_); } -void* Tensor::mutable_data(platform::Place place) { +void* Tensor::mutable_data(platform::Place place, int64_t requested_size) { PADDLE_ENFORCE(this->holder_ != nullptr, "Cannot invoke mutable data if current hold nothing."); - return mutable_data(place, holder_->type()); + return mutable_data(place, holder_->type(), requested_size); } Tensor& Tensor::ShareDataWith(const Tensor& src) { diff --git a/paddle/fluid/framework/tensor.h b/paddle/fluid/framework/tensor.h index 0bbfd66148e9bc9080654bf1b0b34477115a0e6b..a4454c90b06f07de204af2083a06cf3f426e7856 100644 --- a/paddle/fluid/framework/tensor.h +++ b/paddle/fluid/framework/tensor.h @@ -89,22 +89,24 @@ class Tensor { * @note If not exist, then allocation. */ template - T* mutable_data(platform::Place place); + T* mutable_data(platform::Place place, int64_t requested_size = 0); - void* mutable_data(platform::Place place, std::type_index type); + void* mutable_data(platform::Place place, std::type_index type, + int64_t requested_size = 0); - void* mutable_data(platform::Place place); + void* mutable_data(platform::Place place, int64_t requested_size = 0); /** * @brief Return a pointer to mutable memory block. * - * @param[in] dims The dimensions of the memory block. - * @param[in] place The place of the memory block. + * @param[in] dims The dimensions of the memory block. + * @param[in] place The place of the memory block. + * @param[in] requested_size The size of the block in bytes. * * @note If not exist, then allocation. */ template - T* mutable_data(DDim dims, platform::Place place); + T* mutable_data(DDim dims, platform::Place place, int64_t requested_size = 0); /*! Return the dimensions of the memory block. */ const DDim& dims() const; diff --git a/paddle/fluid/framework/tensor_impl.h b/paddle/fluid/framework/tensor_impl.h index b7b62eef23ec351686378c913d18fc72308fd7b2..ea10c9a2658cbd8334d2c50e87c55967dbf0db65 100644 --- a/paddle/fluid/framework/tensor_impl.h +++ b/paddle/fluid/framework/tensor_impl.h @@ -46,16 +46,17 @@ inline T* Tensor::data() { } template -inline T* Tensor::mutable_data(DDim dims, platform::Place place) { +inline T* Tensor::mutable_data(DDim dims, platform::Place place, + int64_t requested_size) { static_assert(std::is_pod::value, "T must be POD"); Resize(dims); - return mutable_data(place); + return mutable_data(place, requested_size); } template -inline T* Tensor::mutable_data(platform::Place place) { +inline T* Tensor::mutable_data(platform::Place place, int64_t requested_size) { static_assert(std::is_pod::value, "T must be POD"); - return reinterpret_cast(mutable_data(place, typeid(T))); + return reinterpret_cast(mutable_data(place, typeid(T), requested_size)); } inline Tensor ReshapeToMatrix(const Tensor& src, int num_col_dims) { diff --git a/paddle/fluid/operators/conv_mkldnn_op.cc b/paddle/fluid/operators/conv_mkldnn_op.cc index f07ab5a33b87d7945e5fcdf8f3644f0711ce643b..77d0cf07a869d1e997227ae82e2c697825a2aa48 100644 --- a/paddle/fluid/operators/conv_mkldnn_op.cc +++ b/paddle/fluid/operators/conv_mkldnn_op.cc @@ -53,6 +53,18 @@ class ConvMKLDNNHandler : public platform::MKLDNNHandler { key_ += "-BWD"; } + size_t GetDstMemorySize() { + return conv_pd_->dst_primitive_desc().get_size(); + } + + size_t GetDiffWeightsMemorySize() { + return conv_bwd_weights_pd_->diff_weights_primitive_desc().get_size(); + } + + size_t GetDiffSourceMemorySize() { + return conv_bwd_data_pd_->diff_src_primitive_desc().get_size(); + } + std::shared_ptr AcquireSrcMemoryFromWeightsPrimitive( const std::shared_ptr user_memory_p, std::vector& pipeline) { // NOLINT @@ -251,7 +263,6 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { const T* input_data = input->data(); const T* filter_data = filter->data(); - T* output_data = output->mutable_data(ctx.GetPlace()); PADDLE_ENFORCE(input->dims().size() == 4, "Input must be with 4 dimensions, i.e. NCHW"); @@ -306,6 +317,8 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { auto user_weights_memory_p = handler.AcquireWeightsMemory( user_weights_md, to_void_cast(filter_data)); + T* output_data = + output->mutable_data(ctx.GetPlace(), handler.GetDstMemorySize()); // create reorder primitive if the input format is not the preferred one auto src_memory_p = handler.AcquireSrcMemoryFromPrimitive(user_src_memory_p, pipeline); @@ -393,13 +406,6 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel { T* input_grad_data = nullptr; T* filter_grad_data = nullptr; - if (input_grad) { - input_grad_data = input_grad->mutable_data(ctx.GetPlace()); - } - if (filter_grad) { - filter_grad_data = filter_grad->mutable_data(ctx.GetPlace()); - } - std::vector src_tz = paddle::framework::vectorize2int(input->dims()); std::vector weights_tz = paddle::framework::vectorize2int(filter->dims()); @@ -485,6 +491,9 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel { handler.AcquireDiffDstMemoryFromWeightsPrimitive( user_diff_dst_memory_p, pipeline); + size_t size = handler.GetDiffWeightsMemorySize(); + filter_grad_data = filter_grad->mutable_data(ctx.GetPlace(), size); + auto diff_weights_memory_p = handler.AcquireDiffWeightsMemoryFromWeightsPrimitive( reinterpret_cast(filter_grad_data)); @@ -507,6 +516,9 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel { handler.AcquireDiffDstMemoryFromDataPrimitive(user_diff_dst_memory_p, pipeline); + size_t size = handler.GetDiffSourceMemorySize(); + input_grad_data = input_grad->mutable_data(ctx.GetPlace(), size); + auto diff_src_memory_p = handler.AcquireDiffSrcMemoryFromDataPrimitive( reinterpret_cast(input_grad_data));