From de74ee378a7e38348add50fa9afd0606aaca9d3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=9F=B3=E6=99=93=E4=BC=9F?= <39303645+Shixiaowei02@users.noreply.github.com> Date: Tue, 17 Mar 2020 15:51:59 +0800 Subject: [PATCH] feature: buffer sharing, test=develop (#3161) * feature: buffer sharing, test=develop * fix the warning of buffer.h, test=develop * test cuda only if necessary, test=develop --- lite/core/lite_tensor_test.cc | 42 ++++++++++++++++++++++++++++++----- lite/core/memory.h | 12 +++++++++- lite/core/tensor.cc | 15 +++++++++++++ lite/core/tensor.h | 10 ++++++++- 4 files changed, 71 insertions(+), 8 deletions(-) diff --git a/lite/core/lite_tensor_test.cc b/lite/core/lite_tensor_test.cc index d667a9f885..500dae3e28 100644 --- a/lite/core/lite_tensor_test.cc +++ b/lite/core/lite_tensor_test.cc @@ -13,19 +13,49 @@ // limitations under the License. #include +#include #include "lite/core/tensor.h" namespace paddle { namespace lite { -TEST(tensor, test) { - TensorLite tensor; - DDimLite ddim({1, 8}); - tensor.Resize(ddim); +template +void test_shared_memory_tensor() { + const std::vector data({0, 1, 2, 3}); + const std::vector shape({2, 2}); + const size_t size = data.size() * sizeof(Dtype); + TensorLite init_tensor; + init_tensor.Assign(data.data(), + static_cast(shape)); + Dtype* init_raw_data = init_tensor.mutable_data(); - for (int i = 0; i < 8; i++) { - tensor.mutable_data()[i] = i; + TensorLite shared_tensor( + std::make_shared(Buffer(init_raw_data, Target, size))); + Buffer host_buffer; + host_buffer.ResetLazy(TargetType::kHost, size); + if (Target == TargetType::kHost) { + CopySync( + host_buffer.data(), init_raw_data, size, IoDirection::HtoH); + } else { + CopySync( + host_buffer.data(), init_raw_data, size, IoDirection::DtoH); } + EXPECT_EQ(std::memcmp(host_buffer.data(), data.data(), size), 0); + + shared_tensor.Resize({1, 5}); + ASSERT_DEATH(shared_tensor.mutable_data(), ""); +} + +TEST(tensor, shared_memory) { + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + test_shared_memory_tensor(); + test_shared_memory_tensor(); + test_shared_memory_tensor(); +#ifdef LITE_WITH_CUDA + test_shared_memory_tensor(); + test_shared_memory_tensor(); + test_shared_memory_tensor(); +#endif } } // namespace lite diff --git a/lite/core/memory.h b/lite/core/memory.h index 051d47bdde..71b475078c 100644 --- a/lite/core/memory.h +++ b/lite/core/memory.h @@ -15,6 +15,7 @@ #pragma once #include "lite/api/paddle_place.h" #include "lite/core/target_wrapper.h" +#include "lite/utils/logging.h" #include "lite/utils/macros.h" #ifdef LITE_WITH_OPENCL @@ -81,6 +82,9 @@ void CopySync(void* dst, const void* src, size_t size, IoDirection dir) { TargetWrapper::MemcpySync(dst, src, size, dir); break; #endif + default: + LOG(FATAL) + << "The copy function of this target has not been implemented yet."; } } @@ -89,13 +93,17 @@ class Buffer { public: Buffer() = default; Buffer(TargetType target, size_t size) : space_(size), target_(target) {} + Buffer(void* data, TargetType target, size_t size) + : space_(size), data_(data), own_data_(false), target_(target) {} void* data() const { return data_; } TargetType target() const { return target_; } size_t space() const { return space_; } + bool own_data() const { return own_data_; } void ResetLazy(TargetType target, size_t size) { if (target != target_ || space_ < size) { + CHECK_EQ(own_data_, true) << "Can not reset unowned buffer."; Free(); data_ = TargetMalloc(target, size); target_ = target; @@ -115,6 +123,7 @@ class Buffer { 4; // 4 for RGBA, un-used for opencl Image2D if (target != target_ || cl_image2d_width_ < img_w || cl_image2d_height_ < img_h) { + CHECK_EQ(own_data_, true) << "Can not reset unowned buffer."; Free(); data_ = TargetWrapperCL::MallocImage(img_w, img_h, host_ptr); target_ = target; @@ -126,7 +135,7 @@ class Buffer { #endif void Free() { - if (space_ > 0) { + if (space_ > 0 && own_data_) { TargetFree(target_, data_); } data_ = nullptr; @@ -149,6 +158,7 @@ class Buffer { size_t cl_image2d_width_{0}; // only used for OpenCL Image2D size_t cl_image2d_height_{0}; // only used for OpenCL Image2D void* data_{nullptr}; + bool own_data_{true}; TargetType target_{TargetType::kHost}; }; diff --git a/lite/core/tensor.cc b/lite/core/tensor.cc index 7664633077..ecb9935dfd 100644 --- a/lite/core/tensor.cc +++ b/lite/core/tensor.cc @@ -98,6 +98,21 @@ void *TensorLite::mutable_data(TargetType target, size_t memory_size) { return mutable_data(memory_size); } +void TensorLite::ResetBuffer(std::shared_ptr buffer, + size_t memory_size) { + CHECK_EQ(offset_, 0) + << "Only the offset is supported to zero when the Buffer is reset."; + if (buffer_) { + CHECK_LE(memory_size_, buffer->space()) + << "The space of buffer is not enough to store the tensor."; + CHECK_LE(memory_size, buffer->space()) + << "The buffer is smaller than the specified minimum size."; + } + buffer_ = buffer; + memory_size_ = memory_size; + target_ = buffer->target(); +} + #ifdef LITE_WITH_OPENCL template <> const cl::Image2D *TensorLite::data() const { diff --git a/lite/core/tensor.h b/lite/core/tensor.h index 6e2e771be9..2209e524f4 100644 --- a/lite/core/tensor.h +++ b/lite/core/tensor.h @@ -102,9 +102,10 @@ using LoD = std::vector>; class TensorLite { public: TensorLite() : buffer_(std::make_shared()) {} + explicit TensorLite(std::shared_ptr buffer) : buffer_(buffer) {} template - void Assign(DType *data, const DimT &dim) { + void Assign(const DType *data, const DimT &dim) { Resize(dim); auto *dst = mutable_data(Target); CopySync( @@ -178,6 +179,11 @@ class TensorLite { (static_cast(buffer_->data()) + offset_)); } + void *raw_data() { + return static_cast( + (static_cast(buffer_->data()) + offset_)); + } + void clear() { buffer_->Free(); offset_ = 0; @@ -195,6 +201,8 @@ class TensorLite { void CopyDataFrom(const TensorLite &other); + void ResetBuffer(std::shared_ptr buffer, size_t memory_size); + TargetType target() const { return target_; } template -- GitLab