From 9ec1432df691ee6734d3a0c61d81d7da1b2b4e0c Mon Sep 17 00:00:00 2001 From: Feiyu Chan Date: Mon, 22 Nov 2021 20:04:46 +0800 Subject: [PATCH] disable copying of datatype when sharing buffer between two tensors. (#37247) * disable copying of datatype when sharing buffer between two tensors. * fix for mkldnn operator kernels (elementwise_add, sum, softplus, softmax, scale, activation), mannually set the data type when reusing memory by ShareBufferWith. --- paddle/fluid/framework/tensor.h | 5 ++++- .../mkldnn/elementwise_mkldnn_op.h | 19 +++++++++++++++--- .../operators/mkldnn/activation_mkldnn_op.cc | 8 +++++++- .../fluid/operators/mkldnn/scale_mkldnn_op.cc | 9 +++++++-- .../operators/mkldnn/softmax_mkldnn_op.cc | 10 +++++++--- .../operators/mkldnn/softplus_mkldnn_op.h | 20 ++++++++++++------- .../fluid/operators/mkldnn/sum_mkldnn_op.cc | 9 +++++++-- 7 files changed, 61 insertions(+), 19 deletions(-) diff --git a/paddle/fluid/framework/tensor.h b/paddle/fluid/framework/tensor.h index e889de8552d..ec990213433 100644 --- a/paddle/fluid/framework/tensor.h +++ b/paddle/fluid/framework/tensor.h @@ -254,7 +254,10 @@ class Tensor { void ShareBufferWith(const Tensor& tensor) { holder_ = tensor.holder_; offset_ = tensor.offset_; - type_ = tensor.type_; + // NOTE(chenfeiyu): when sharing buffer, by definition only holder + // to the memory allocation and offset should be shared. Shape, + // data type, layout, and other metadata associated with a Tensor + // should not be copied. } bool IsSharedBufferWith(const Tensor& src) const { diff --git a/paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h b/paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h index 131ea3901da..1439cccc3bf 100644 --- a/paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h +++ b/paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h @@ -62,9 +62,22 @@ class EltwiseMKLDNNKernel : public framework::OpKernel { // and they share a buffer (of // shape x) then this buffer is not big enough to hold result of elementwise // operation. - auto dst_memory = (x->numel() == z->numel() && x->IsSharedBufferWith(*z)) - ? src_x_memory - : handler.AcquireDstMemory(z); + const bool reuse_x_memopry = + x->numel() == z->numel() && x->IsSharedBufferWith(*z); + std::shared_ptr dst_memory = nullptr; + if (reuse_x_memopry) { + dst_memory = src_x_memory; + // NOTE(chenfeiyu): when the output reuses memory from other tensor rather + // than allocate its own, it's still need to take care of its data type. + // Unfortunately, paddle's operator only infers the output' shape, but not + // the data type. mutable_data takes care of allocation and data type + // normally, but if the memory is already allocated and there is no need + // to re-allocate, it just set the data type. So this it added there to + // get the right data type. + z->mutable_data(ctx.GetPlace()); + } else { + dst_memory = handler.AcquireDstMemory(z); + } const auto binary_prim = handler.AcquireForwardPrimitive(); diff --git a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc index 29e18da590e..4bde641d2c1 100644 --- a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc @@ -91,7 +91,13 @@ void eltwise_forward(const framework::ExecutionContext &ctx, ctx.GetPlace(), x); auto src_memory_p = handler.AcquireSrcMemory(x); - auto dst_memory_p = is_inplaced ? src_memory_p : handler.AcquireDstMemory(y); + std::shared_ptr dst_memory_p = nullptr; + if (is_inplaced) { + dst_memory_p = src_memory_p; + y->mutable_data(ctx.GetPlace()); + } else { + dst_memory_p = handler.AcquireDstMemory(y); + } auto activation_p = handler.AcquireForwardPrimitive(); auto &astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream(); diff --git a/paddle/fluid/operators/mkldnn/scale_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/scale_mkldnn_op.cc index 84ac14d04b8..b8b735e96d2 100644 --- a/paddle/fluid/operators/mkldnn/scale_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/scale_mkldnn_op.cc @@ -41,8 +41,13 @@ class ScaleMKLDNNKernel : public framework::OpKernel { x); auto src_memory_p = handler.AcquireSrcMemory(x); - auto dst_memory_p = - is_inplaced ? src_memory_p : handler.AcquireDstMemory(out); + std::shared_ptr dst_memory_p = nullptr; + if (is_inplaced) { + dst_memory_p = src_memory_p; + out->mutable_data(ctx.GetPlace()); + } else { + dst_memory_p = handler.AcquireDstMemory(out); + } auto activation_p = handler.AcquireForwardPrimitive(); auto& astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream(); diff --git a/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc index b0f27719bf9..c26c017596d 100644 --- a/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc @@ -103,9 +103,13 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel { auto softmax_src_memory_p = handler.AcquireSrcMemory(input); // For Inplace src and and dst are the same memory object - auto softmax_dst_memory_p = - is_inplaced ? softmax_src_memory_p : handler.AcquireDstMemory(output); - + std::shared_ptr softmax_dst_memory_p = nullptr; + if (is_inplaced) { + softmax_dst_memory_p = softmax_src_memory_p; + output->mutable_data(ctx.GetPlace()); + } else { + softmax_dst_memory_p = handler.AcquireDstMemory(output); + } auto softmax_p = handler.AcquireForwardPrimitive(); auto& astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream(); diff --git a/paddle/fluid/operators/mkldnn/softplus_mkldnn_op.h b/paddle/fluid/operators/mkldnn/softplus_mkldnn_op.h index 60ea5136905..c8c539a9565 100644 --- a/paddle/fluid/operators/mkldnn/softplus_mkldnn_op.h +++ b/paddle/fluid/operators/mkldnn/softplus_mkldnn_op.h @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#pragma once #include "paddle/fluid/platform/mkldnn_reuse.h" namespace paddle { @@ -43,7 +44,7 @@ class SoftplusMKLDNNHandler 1.0f / beta, 0.0f); } - AppendFusedActivationIfExists(ctx, post_ops); + AppendFusedActivationIfExists(ctx, &post_ops); dnnl::primitive_attr attrs; attrs.set_post_ops(post_ops); @@ -59,16 +60,16 @@ class SoftplusMKLDNNHandler private: void AppendFusedActivationIfExists(const framework::ExecutionContext& ctx, - dnnl::post_ops& post_ops) { + dnnl::post_ops* post_ops) { const auto& fused_activation_type = algo_map.find(ctx.Attr("fuse_activation_type")); if (fused_activation_type != algo_map.end()) { auto scale_out = ctx.Attr("fuse_activation_scale"); // for future int8 support - post_ops.append_eltwise(scale_out, fused_activation_type->second, - ctx.Attr("fuse_activation_alpha"), - ctx.Attr("fuse_activation_beta")); + post_ops->append_eltwise(scale_out, fused_activation_type->second, + ctx.Attr("fuse_activation_alpha"), + ctx.Attr("fuse_activation_beta")); } } @@ -109,8 +110,13 @@ void custom_softplus_eltwise_forward(const framework::ExecutionContext& ctx) { auto src_memory_p = handler.AcquireSrcMemory(x); auto beta_memory_p = handler.AcquireBetaMemory(&beta); - auto dst_memory_p = - is_inplaced ? src_memory_p : handler.AcquireDstMemory(out); + std::shared_ptr dst_memory_p = nullptr; + if (is_inplaced) { + dst_memory_p = src_memory_p; + out->mutable_data(ctx.GetPlace()); + } else { + dst_memory_p = handler.AcquireDstMemory(out); + } auto binary_p = handler.AcquireForwardPrimitive(); auto& astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream(); diff --git a/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc index 8208a484b4a..2760bcecd5b 100644 --- a/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc @@ -137,8 +137,13 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel { ++input_index; } - auto dst_mem = in_place ? handler.AcquireDstMemory() - : handler.AcquireDstMemory(output); + std::shared_ptr dst_mem = nullptr; + if (in_place) { + dst_mem = handler.AcquireDstMemory(); + output->mutable_data(ctx.GetPlace()); + } else { + dst_mem = handler.AcquireDstMemory(output); + } auto sum_p = handler.AcquireForwardPrimitive(); -- GitLab