feature: buffer sharing, test=develop (#3161)

* feature: buffer sharing, test=develop * fix the warning of buffer.h, test=develop * test cuda only if necessary, test=develop

feature: buffer sharing, test=develop (#3161)
* feature: buffer sharing, test=develop * fix the warning of buffer.h, test=develop * test cuda only if necessary, test=develop
e04399ba · 石晓伟 · GitHub · 774b4652 · e04399ba · e04399ba
Showing with 71 addition and 8 deletion

lite/core/lite_tensor_test.cc lite/core/lite_tensor_test.cc +36 -6

lite/core/memory.h lite/core/memory.h +11 -1

lite/core/tensor.cc lite/core/tensor.cc +15 -0

lite/core/tensor.h lite/core/tensor.h +9 -1

未找到文件。
--- a/lite/core/lite_tensor_test.cc
+++ b/lite/core/lite_tensor_test.cc
@@ -13,19 +13,49 @@
 // limitations under the License.

 #include <gtest/gtest.h>
+#include <cstring>
 #include "lite/core/tensor.h"

 namespace paddle {
 namespace lite {

-TEST(tensor, test) {
-  TensorLite tensor;
-  DDimLite ddim({1, 8});
-  tensor.Resize(ddim);
+template <typename Dtype, TargetType Target>
+void test_shared_memory_tensor() {
+  const std::vector<Dtype> data({0, 1, 2, 3});
+  const std::vector<int64_t> shape({2, 2});
+  const size_t size = data.size() * sizeof(Dtype);
+  TensorLite init_tensor;
+  init_tensor.Assign<Dtype, DDim, Target>(data.data(),
+                                          static_cast<DDim>(shape));
+  Dtype* init_raw_data = init_tensor.mutable_data<Dtype>();

-  for (int i = 0; i < 8; i++) {
-    tensor.mutable_data<int>()[i] = i;
+  TensorLite shared_tensor(
+      std::make_shared<Buffer>(Buffer(init_raw_data, Target, size)));
+  Buffer host_buffer;
+  host_buffer.ResetLazy(TargetType::kHost, size);
+  if (Target == TargetType::kHost) {
+    CopySync<Target>(
+        host_buffer.data(), init_raw_data, size, IoDirection::HtoH);
+  } else {
+    CopySync<Target>(
+        host_buffer.data(), init_raw_data, size, IoDirection::DtoH);
  }
+  EXPECT_EQ(std::memcmp(host_buffer.data(), data.data(), size), 0);
+
+  shared_tensor.Resize({1, 5});
+  ASSERT_DEATH(shared_tensor.mutable_data<Dtype>(), "");
+}
+
+TEST(tensor, shared_memory) {
+  ::testing::FLAGS_gtest_death_test_style = "threadsafe";
+  test_shared_memory_tensor<float, TargetType::kHost>();
+  test_shared_memory_tensor<int64_t, TargetType::kHost>();
+  test_shared_memory_tensor<int8_t, TargetType::kHost>();
+#ifdef LITE_WITH_CUDA
+  test_shared_memory_tensor<float, TargetType::kCUDA>();
+  test_shared_memory_tensor<int64_t, TargetType::kCUDA>();
+  test_shared_memory_tensor<int8_t, TargetType::kCUDA>();
+#endif
 }

 }  // namespace lite

--- a/lite/core/memory.h
+++ b/lite/core/memory.h
@@ -15,6 +15,7 @@
 #pragma once
 #include "lite/api/paddle_place.h"
 #include "lite/core/target_wrapper.h"
+#include "lite/utils/logging.h"
 #include "lite/utils/macros.h"

 #ifdef LITE_WITH_OPENCL
@@ -81,6 +82,9 @@ void CopySync(void* dst, const void* src, size_t size, IoDirection dir) {
      TargetWrapper<TARGET(kBM)>::MemcpySync(dst, src, size, dir);
      break;
 #endif
+    default:
+      LOG(FATAL)
+          << "The copy function of this target has not been implemented yet.";
  }
 }

@@ -89,13 +93,17 @@ class Buffer {
 public:
  Buffer() = default;
  Buffer(TargetType target, size_t size) : space_(size), target_(target) {}
+  Buffer(void* data, TargetType target, size_t size)
+      : space_(size), data_(data), own_data_(false), target_(target) {}

  void* data() const { return data_; }
  TargetType target() const { return target_; }
  size_t space() const { return space_; }
+  bool own_data() const { return own_data_; }

  void ResetLazy(TargetType target, size_t size) {
    if (target != target_ || space_ < size) {
+      CHECK_EQ(own_data_, true) << "Can not reset unowned buffer.";
      Free();
      data_ = TargetMalloc(target, size);
      target_ = target;
@@ -115,6 +123,7 @@ class Buffer {
                  4;  // 4 for RGBA, un-used for opencl Image2D
    if (target != target_ || cl_image2d_width_ < img_w ||
        cl_image2d_height_ < img_h) {
+      CHECK_EQ(own_data_, true) << "Can not reset unowned buffer.";
      Free();
      data_ = TargetWrapperCL::MallocImage<T>(img_w, img_h, host_ptr);
      target_ = target;
@@ -126,7 +135,7 @@ class Buffer {
 #endif

  void Free() {
-    if (space_ > 0) {
+    if (space_ > 0 && own_data_) {
      TargetFree(target_, data_);
    }
    data_ = nullptr;
@@ -149,6 +158,7 @@ class Buffer {
  size_t cl_image2d_width_{0};   // only used for OpenCL Image2D
  size_t cl_image2d_height_{0};  // only used for OpenCL Image2D
  void* data_{nullptr};
+  bool own_data_{true};
  TargetType target_{TargetType::kHost};
 };


--- a/lite/core/tensor.cc
+++ b/lite/core/tensor.cc
@@ -98,6 +98,21 @@ void *TensorLite::mutable_data(TargetType target, size_t memory_size) {
  return mutable_data(memory_size);
 }

+void TensorLite::ResetBuffer(std::shared_ptr<Buffer> buffer,
+                             size_t memory_size) {
+  CHECK_EQ(offset_, 0)
+      << "Only the offset is supported to zero when the Buffer is reset.";
+  if (buffer_) {
+    CHECK_LE(memory_size_, buffer->space())
+        << "The space of buffer is not enough to store the tensor.";
+    CHECK_LE(memory_size, buffer->space())
+        << "The buffer is smaller than the specified minimum size.";
+  }
+  buffer_ = buffer;
+  memory_size_ = memory_size;
+  target_ = buffer->target();
+}
+
 #ifdef LITE_WITH_OPENCL
 template <>
 const cl::Image2D *TensorLite::data<float, cl::Image2D>() const {

--- a/lite/core/tensor.h
+++ b/lite/core/tensor.h
@@ -102,9 +102,10 @@ using LoD = std::vector<std::vector<uint64_t>>;
 class TensorLite {
 public:
  TensorLite() : buffer_(std::make_shared<Buffer>()) {}
+  explicit TensorLite(std::shared_ptr<Buffer> buffer) : buffer_(buffer) {}

  template <typename DType, typename DimT, TargetType Target>
-  void Assign(DType *data, const DimT &dim) {
+  void Assign(const DType *data, const DimT &dim) {
    Resize(dim);
    auto *dst = mutable_data<DType, void>(Target);
    CopySync<Target>(
@@ -178,6 +179,11 @@ class TensorLite {
        (static_cast<char *>(buffer_->data()) + offset_));
  }

+  void *raw_data() {
+    return static_cast<char *>(
+        (static_cast<char *>(buffer_->data()) + offset_));
+  }
+
  void clear() {
    buffer_->Free();
    offset_ = 0;
@@ -195,6 +201,8 @@ class TensorLite {

  void CopyDataFrom(const TensorLite &other);

+  void ResetBuffer(std::shared_ptr<Buffer> buffer, size_t memory_size);
+
  TargetType target() const { return target_; }

  template <typename T>