diff --git a/paddle/fluid/framework/tensor_util.h b/paddle/fluid/framework/tensor_util.h index e782963f1882a4cd3a04d4aef3b1cb90f86f86ae..1df4cf2638c30c437137da5767039625dfa781d9 100644 --- a/paddle/fluid/framework/tensor_util.h +++ b/paddle/fluid/framework/tensor_util.h @@ -136,6 +136,7 @@ void TensorFromArray(const T* src, const size_t& array_size, } #endif } + template void TensorFromVector(const std::vector& src, const platform::DeviceContext& ctx, Tensor* dst) { @@ -168,6 +169,49 @@ void TensorFromVector(const std::vector& src, #endif } +// The fully specialized function should be inline to avoid +// multi-definition. +template <> +inline void TensorFromVector(const std::vector& src, + const platform::DeviceContext& ctx, Tensor* dst) { + // vector has no data() member, use array instead. + // See details: + // https://stackoverflow.com/questions/46115669/why-does-stdvectorbool-have-no-data/46115714 + bool* array = new bool[src.size()]; + for (unsigned int i = 0; i < src.size(); i++) { + array[i] = static_cast(src[i]); + } + + auto dst_place = ctx.GetPlace(); + auto src_ptr = static_cast(array); + platform::CPUPlace src_place; + dst->Resize({static_cast(src.size())}); + auto dst_ptr = static_cast(dst->mutable_data(dst_place)); + auto size = src.size() * sizeof(bool); + + if (platform::is_cpu_place(dst_place)) { + memory::Copy(BOOST_GET_CONST(platform::CPUPlace, dst_place), dst_ptr, + src_place, src_ptr, size); + } +#ifdef PADDLE_WITH_CUDA + else if (platform::is_gpu_place(dst_place)) { // NOLINT + memory::Copy( + BOOST_GET_CONST(platform::CUDAPlace, dst_place), dst_ptr, src_place, + src_ptr, size, + reinterpret_cast(ctx).stream()); + } +#endif +#ifdef PADDLE_WITH_ASCEND_CL + else if (platform::is_npu_place(dst_place)) { // NOLINT + memory::Copy( + BOOST_GET_CONST(platform::NPUPlace, dst_place), dst_ptr, src_place, + src_ptr, size, + reinterpret_cast(ctx).stream()); + } +#endif + delete[] array; +} + template void TensorFromVector(const std::vector& src, Tensor* dst) { platform::CPUPlace dst_place = platform::CPUPlace(); @@ -180,6 +224,23 @@ void TensorFromVector(const std::vector& src, Tensor* dst) { memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size); } +template <> +inline void TensorFromVector(const std::vector& src, Tensor* dst) { + bool* array = new bool[src.size()]; + for (unsigned int i = 0; i < src.size(); i++) { + array[i] = static_cast(src[i]); + } + platform::CPUPlace dst_place = platform::CPUPlace(); + auto src_ptr = static_cast(array); + platform::CPUPlace src_place; + dst->Resize({static_cast(src.size())}); + auto dst_ptr = static_cast(dst->mutable_data(dst_place)); + auto size = src.size() * sizeof(bool); + + memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size); + delete[] array; +} + template void TensorToVector(const Tensor& src, const platform::DeviceContext& ctx, std::vector* dst) { @@ -213,6 +274,46 @@ void TensorToVector(const Tensor& src, const platform::DeviceContext& ctx, #endif } +template <> +inline void TensorToVector(const Tensor& src, + const platform::DeviceContext& ctx, + std::vector* dst) { + auto src_ptr = static_cast(src.data()); + auto size = src.numel() * sizeof(bool); + + bool* array = new bool[src.numel()]; + + platform::CPUPlace dst_place; + dst->resize(src.numel()); + auto dst_ptr = static_cast(array); + + if (platform::is_cpu_place(src.place())) { + memory::Copy(dst_place, dst_ptr, + BOOST_GET_CONST(platform::CPUPlace, src.place()), src_ptr, + size); + } +#ifdef PADDLE_WITH_CUDA + else if (platform::is_gpu_place(src.place())) { // NOLINT + memory::Copy( + dst_place, dst_ptr, BOOST_GET_CONST(platform::CUDAPlace, src.place()), + src_ptr, size, + reinterpret_cast(ctx).stream()); + } +#endif +#ifdef PADDLE_WITH_ASCEND_CL + else if (platform::is_npu_place(src.place())) { // NOLINT + memory::Copy( + dst_place, dst_ptr, BOOST_GET_CONST(platform::NPUPlace, src.place()), + src_ptr, size, + reinterpret_cast(ctx).stream()); + } +#endif + for (unsigned int i = 0; i < src.numel(); i++) { + (*dst)[i] = static_cast(array[i]); + } + delete[] array; +} + template void TensorToVector(const Tensor& src, std::vector* dst) { auto src_ptr = static_cast(src.data()); @@ -232,6 +333,32 @@ void TensorToVector(const Tensor& src, std::vector* dst) { BOOST_GET_CONST(platform::CPUPlace, src.place()), src_ptr, size); } +template <> +inline void TensorToVector(const Tensor& src, std::vector* dst) { + auto src_ptr = static_cast(src.data()); + auto size = src.numel() * sizeof(bool); + + bool* array = new bool[src.numel()]; + + platform::CPUPlace dst_place; + dst->resize(src.numel()); + auto dst_ptr = static_cast(array); + + PADDLE_ENFORCE_EQ( + platform::is_cpu_place(src.place()), true, + platform::errors::InvalidArgument( + "The input tensor should be CPU device, but actually it is in %s.", + src.place())); + + memory::Copy(dst_place, dst_ptr, + BOOST_GET_CONST(platform::CPUPlace, src.place()), src_ptr, size); + + for (unsigned int i = 0; i < src.numel(); i++) { + (*dst)[i] = static_cast(array[i]); + } + delete[] array; +} + std::ostream& operator<<(std::ostream& os, const Tensor& t); } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/tensor_util_test.cc b/paddle/fluid/framework/tensor_util_test.cc index e389cb34679a2459b547497a46e97586c7cd72bd..dad014123014f7d85f0636b5e790da072cd2f850 100644 --- a/paddle/fluid/framework/tensor_util_test.cc +++ b/paddle/fluid/framework/tensor_util_test.cc @@ -242,6 +242,61 @@ TEST(TensorToVector, Tensor) { #endif } +TEST(TensorToVector, Tensor_bool) { + { + paddle::framework::Tensor src; + bool* src_ptr = + src.mutable_data({3, 3}, paddle::platform::CPUPlace()); + for (int i = 0; i < 3 * 3; ++i) { + src_ptr[i] = static_cast(i % 2); + } + + paddle::platform::CPUPlace place; + std::vector dst; + paddle::framework::TensorToVector(src, &dst); + + for (int i = 0; i < 3 * 3; ++i) { + EXPECT_EQ(src_ptr[i], dst[i]); + } + } +#ifdef PADDLE_WITH_CUDA + { + std::vector src_vec = { + false, true, false, true, false, true, false, true, false, + }; + paddle::framework::Tensor gpu_tensor; + paddle::platform::CUDAPlace place; + paddle::platform::CUDADeviceContext gpu_ctx(place); + paddle::framework::TensorFromVector(src_vec, gpu_ctx, &gpu_tensor); + + std::vector dst; + paddle::framework::TensorToVector(gpu_tensor, gpu_ctx, &dst); + + for (int i = 0; i < 3 * 3; ++i) { + EXPECT_EQ(src_vec[i], dst[i]); + } + } +#endif +#ifdef PADDLE_WITH_ASCEND_CL + { + std::vector src_vec = { + false, true, false, true, false, true, false, true, false, + }; + paddle::framework::Tensor npu_tensor; + paddle::platform::NPUPlace place(0); + paddle::platform::NPUDeviceContext npu_ctx(place); + paddle::framework::TensorFromVector(src_vec, npu_ctx, &npu_tensor); + + std::vector dst; + paddle::framework::TensorToVector(npu_tensor, npu_ctx, &dst); + + for (int i = 0; i < 3 * 3; ++i) { + EXPECT_EQ(src_vec[i], dst[i]); + } + } +#endif +} + TEST(TensorFromDLPack, Tensor) { { std::vector src_vec = {1, 2, 3, 4, 5, 6, 7, 8, 9};