From a56e16a74513c2b1e2d645f002010d3174064d00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=9F=B3=E6=99=93=E4=BC=9F?= <39303645+Shixiaowei02@users.noreply.github.com> Date: Mon, 24 Jan 2022 11:58:41 +0800 Subject: [PATCH] [Refactoring Tensor PR #5] replace storage with pten allocation (#39085) * updates callers, test=develop * updates tensor, test=develop * fixes errors, test=develop * remove some dtypes, test=develop * fix errors in the base storage modification, test=develop * fixes a bug, test=develop * fixes the bugs in push the whole, test=develop * updates, test=develop * update * update, test=develop * fixes the mac-py3 CI, test=develop * remove the storage impl, test=develop * updates some codes, test=develop * update, test=develop * updates pten allocation, test=develop --- .../accumulation/gradient_accumulation.cc | 5 +- .../accumulation_node_test.cc | 6 +- .../autograd_meta_test.cc | 2 +- .../data_structure_tests/eager_tensor_test.cc | 4 +- .../grad_node_info_test.cc | 8 +- .../data_structure_tests/grad_node_test.h | 2 +- .../grad_tensor_holder_test.cc | 4 +- .../tensor_wrapper_test.cc | 4 +- .../tests/task_tests/eager_utils_test.cc | 6 +- .../tests/task_tests/fwd_bwd_joint_test.cc | 6 +- .../fluid/eager/tests/task_tests/hook_test.cc | 4 +- paddle/fluid/eager/tests/test_utils.h | 4 +- paddle/fluid/pybind/eager.cc | 1 + paddle/pten/api/ext/dispatch.h | 6 - paddle/pten/api/lib/tensor.cc | 26 +- paddle/pten/api/lib/utils.cc | 1 + paddle/pten/api/lib/utils/storage.h | 18 ++ paddle/pten/common/scalar.h | 3 - paddle/pten/core/allocator.h | 130 ++++------ paddle/pten/core/candidate/allocator.h | 107 --------- paddle/pten/core/dense_tensor.cc | 224 +++++------------- paddle/pten/core/dense_tensor.h | 82 +++---- paddle/pten/core/device_context.h | 2 +- paddle/pten/core/storage.h | 22 +- paddle/pten/kernels/cpu/cast_kernel.cc | 2 +- paddle/pten/kernels/cpu/copy_kernel.cc | 16 +- paddle/pten/kernels/cpu/dot_kernel.cc | 2 +- paddle/pten/kernels/cpu/elementwise.h | 26 +- paddle/pten/kernels/cpu/math_kernel.cc | 4 +- paddle/pten/kernels/cpu/reduce.h | 4 +- paddle/pten/kernels/cpu/scale_kernel.cc | 2 +- paddle/pten/kernels/empty_kernel.cc | 2 +- paddle/pten/kernels/funcs/elementwise_base.h | 4 +- paddle/pten/kernels/funcs/transpose.cc | 5 +- paddle/pten/kernels/funcs/transpose.cu | 5 +- paddle/pten/kernels/gpu/cast_kernel.cu | 2 +- paddle/pten/kernels/gpu/copy_kernel.cu | 2 +- paddle/pten/kernels/gpu/dot_kernel.cu | 2 +- paddle/pten/kernels/gpu/elementwise.h | 2 +- paddle/pten/kernels/gpu/math_kernel.cu | 2 +- paddle/pten/kernels/gpu/reduce.h | 9 +- paddle/pten/kernels/gpu/scale_kernel.cu | 2 +- .../pten/kernels/impl/complex_kernel_impl.h | 2 +- .../pten/kernels/impl/dot_grad_kernel_impl.h | 84 +++---- paddle/pten/kernels/impl/full_kernel_impl.h | 2 +- .../kernels/impl/matmul_grad_kernel_impl.h | 8 +- paddle/pten/kernels/impl/matmul_kernel_impl.h | 36 +-- paddle/pten/kernels/impl/sign_kernel_impl.h | 2 +- paddle/pten/kernels/reshape_kernel.cc | 7 +- paddle/pten/kernels/xpu/copy_kernel.cc | 2 +- paddle/pten/tests/api/test_cast_api.cc | 3 +- paddle/pten/tests/api/test_conj_api.cc | 3 +- paddle/pten/tests/api/test_dot_api.cc | 6 +- paddle/pten/tests/api/test_elementwise_api.cc | 24 +- paddle/pten/tests/api/test_empty_api.cc | 13 +- paddle/pten/tests/api/test_fill_api.cc | 16 +- paddle/pten/tests/api/test_flatten_api.cc | 3 +- paddle/pten/tests/api/test_matmul_api.cc | 10 +- paddle/pten/tests/api/test_mean_api.cc | 3 +- paddle/pten/tests/api/test_pten_tensor.cc | 25 +- paddle/pten/tests/api/test_reshape_api.cc | 8 +- paddle/pten/tests/api/test_sum_api.cc | 3 +- paddle/pten/tests/api/test_to_api.cc | 3 +- paddle/pten/tests/core/test_dense_tensor.cc | 2 - .../pten/tests/kernels/test_cast_dev_api.cc | 3 +- .../pten/tests/kernels/test_conj_dev_api.cc | 3 +- .../pten/tests/kernels/test_copy_dev_api.cc | 3 +- .../tests/kernels/test_creation_dev_api.cc | 6 +- paddle/pten/tests/kernels/test_dot_dev_api.cc | 6 +- .../tests/kernels/test_elementwise_dev_api.cc | 24 +- .../tests/kernels/test_flatten_dev_api.cc | 3 +- .../pten/tests/kernels/test_matmul_dev_api.cc | 6 +- .../pten/tests/kernels/test_mean_dev_api.cc | 3 +- .../tests/kernels/test_reshape_dev_api.cc | 3 +- .../pten/tests/kernels/test_scale_dev_api.cc | 8 +- paddle/pten/tests/kernels/test_sum_dev_api.cc | 3 +- .../fluid/tests/custom_op/attr_test_op.cc | 34 +-- .../fluid/tests/custom_op/concat_and_split.h | 6 +- .../fluid/tests/custom_op/custom_conj_op.cc | 4 +- .../fluid/tests/custom_op/dispatch_test_op.cc | 24 +- 80 files changed, 490 insertions(+), 684 deletions(-) delete mode 100644 paddle/pten/core/candidate/allocator.h diff --git a/paddle/fluid/eager/accumulation/gradient_accumulation.cc b/paddle/fluid/eager/accumulation/gradient_accumulation.cc index ffd76c5bda6..826b02b3db0 100644 --- a/paddle/fluid/eager/accumulation/gradient_accumulation.cc +++ b/paddle/fluid/eager/accumulation/gradient_accumulation.cc @@ -216,8 +216,9 @@ void TensorAdd(const egr::EagerTensor& src, egr::EagerTensor* dst) { #define PADDLE_TENSOR_ADD(cpp_type) \ if (data_type == paddle::framework::DataTypeTrait::DataType()) { \ - TensorAddFunctor func(numel, src_tensor->data(), \ - dst_tensor->mutable_data()); \ + TensorAddFunctor func( \ + numel, src_tensor->data(), \ + dst_tensor->mutable_data(place)); \ paddle::platform::VisitPlace(place, func); \ return; \ } diff --git a/paddle/fluid/eager/tests/data_structure_tests/accumulation_node_test.cc b/paddle/fluid/eager/tests/data_structure_tests/accumulation_node_test.cc index cdc97010095..64f980d709a 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/accumulation_node_test.cc +++ b/paddle/fluid/eager/tests/data_structure_tests/accumulation_node_test.cc @@ -36,7 +36,8 @@ TEST(AccumulationNode, EagerTensor) { paddle::platform::CPUPlace()) .get(), meta); - dt0->mutable_data()[0] = 10.0; + dt0->mutable_data( + paddle::platform::CPUPlace())[0] = 10.0; EagerTensor et0 = EagerTensor(dt0); std::shared_ptr dt1 = std::make_shared( @@ -45,7 +46,8 @@ TEST(AccumulationNode, EagerTensor) { .get(), meta); - dt1->mutable_data()[0] = 20.0; + dt1->mutable_data( + paddle::platform::CPUPlace())[0] = 20.0; EagerTensor et1 = EagerTensor(dt1); std::shared_ptr grad_dt = diff --git a/paddle/fluid/eager/tests/data_structure_tests/autograd_meta_test.cc b/paddle/fluid/eager/tests/data_structure_tests/autograd_meta_test.cc index 3d45dc831d4..1c5102f7a21 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/autograd_meta_test.cc +++ b/paddle/fluid/eager/tests/data_structure_tests/autograd_meta_test.cc @@ -46,7 +46,7 @@ TEST(AutogradMeta, MemberFunction) { paddle::platform::CPUPlace()) .get(), meta); - auto* dt_ptr = dt->mutable_data(); + auto* dt_ptr = dt->mutable_data(paddle::platform::CPUPlace()); dt_ptr[0] = 5.0f; dt_ptr[1] = 10.0f; grad_t->set_impl(dt); diff --git a/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc b/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc index a483ddb6a98..620fa52cac6 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc +++ b/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc @@ -40,7 +40,7 @@ TEST(EagerTensor, Constructor) { paddle::platform::CPUPlace()) .get(), meta); - auto* dt_ptr = dt->mutable_data(); + auto* dt_ptr = dt->mutable_data(paddle::platform::CPUPlace()); dt_ptr[0] = 5.0f; dt_ptr[1] = 10.0f; egr::EagerTensor et3 = egr::EagerTensor(dt); @@ -70,7 +70,7 @@ TEST(EagerTensor, MemberFunction) { paddle::platform::CPUPlace()) .get(), meta); - auto* dt_ptr = dt->mutable_data(); + auto* dt_ptr = dt->mutable_data(paddle::platform::CPUPlace()); dt_ptr[0] = 5.0f; dt_ptr[1] = 10.0f; VLOG(6) << "Make Dense Tensor"; diff --git a/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc b/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc index 19850b3210b..ea4b4a480e2 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc +++ b/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc @@ -45,7 +45,7 @@ TEST(GradNodeInfo, GradNodeBase) { paddle::platform::CPUPlace()) .get(), meta); - auto* dt_ptr = dt->mutable_data(); + auto* dt_ptr = dt->mutable_data(paddle::platform::CPUPlace()); dt_ptr[0] = 5.0f; egr::EagerTensor et1(dt); grads = {{et1}}; @@ -102,7 +102,7 @@ TEST(GradNodeInfo, GradNodeBase) { paddle::platform::CPUPlace()) .get(), meta); - auto* dt_ptr = dt->mutable_data(); + auto* dt_ptr = dt->mutable_data(paddle::platform::CPUPlace()); dt_ptr[0] = 6.0f; auto* et_ptr = std::dynamic_pointer_cast(et.impl())->data(); @@ -121,8 +121,8 @@ TEST(GradNodeInfo, GradNodeBase) { VLOG(6) << "Test Reduce Hook"; auto reduce_hook = [&](void) -> void { - auto* et_ptr = std::dynamic_pointer_cast(et1.impl()) - ->mutable_data(); + auto* et_ptr = + std::dynamic_pointer_cast(et1.impl())->data(); et_ptr[0] = 100.0; VLOG(6) << "Running Reduce Hook"; }; diff --git a/paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h b/paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h index 433a00e27be..bf9c3a93e16 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h +++ b/paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h @@ -41,7 +41,7 @@ class GradTestNode : public egr::GradNodeBase { paddle::platform::CPUPlace()) .get(), meta); - auto* dt_ptr = dt->mutable_data(); + auto* dt_ptr = dt->mutable_data(paddle::platform::CPUPlace()); dt_ptr[0] = 6.0f; egr::EagerTensor et1(dt); std::vector> res = {{et1}}; diff --git a/paddle/fluid/eager/tests/data_structure_tests/grad_tensor_holder_test.cc b/paddle/fluid/eager/tests/data_structure_tests/grad_tensor_holder_test.cc index c88a5f5fdce..c2830bf7ef6 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/grad_tensor_holder_test.cc +++ b/paddle/fluid/eager/tests/data_structure_tests/grad_tensor_holder_test.cc @@ -57,7 +57,7 @@ TEST(GradTensorHolder, Interfaces) { paddle::platform::CPUPlace()) .get(), meta); - dt0->mutable_data()[0] = 10.0; + dt0->mutable_data(paddle::platform::CPUPlace())[0] = 10.0; EagerTensor et0 = EagerTensor(dt0); std::shared_ptr dt1 = std::make_shared( @@ -65,7 +65,7 @@ TEST(GradTensorHolder, Interfaces) { paddle::platform::CPUPlace()) .get(), meta); - dt1->mutable_data()[0] = 20.0; + dt1->mutable_data(paddle::platform::CPUPlace())[0] = 20.0; EagerTensor et1 = EagerTensor(dt1); // Constructor empty GradTensorHolder diff --git a/paddle/fluid/eager/tests/data_structure_tests/tensor_wrapper_test.cc b/paddle/fluid/eager/tests/data_structure_tests/tensor_wrapper_test.cc index 8bc739d455a..742a64ecec2 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/tensor_wrapper_test.cc +++ b/paddle/fluid/eager/tests/data_structure_tests/tensor_wrapper_test.cc @@ -29,7 +29,7 @@ TEST(TensorWrapper, Basic) { paddle::platform::CPUPlace()) .get(), meta); - auto* dt_ptr = dt->mutable_data(); + auto* dt_ptr = dt->mutable_data(paddle::platform::CPUPlace()); dt_ptr[0] = 5.0f; dt_ptr[1] = 10.0f; et1.set_impl(dt); @@ -56,7 +56,7 @@ TEST(TensorWrapper, Basic) { paddle::platform::CPUPlace()) .get(), meta2); - auto* dt_ptr2 = dt->mutable_data(); + auto* dt_ptr2 = dt->mutable_data(paddle::platform::CPUPlace()); dt_ptr2[0] = 6.0f; dt_ptr2[1] = 11.0f; et2.set_impl(dt2); diff --git a/paddle/fluid/eager/tests/task_tests/eager_utils_test.cc b/paddle/fluid/eager/tests/task_tests/eager_utils_test.cc index 1b2f1287b06..3bd5b98a164 100644 --- a/paddle/fluid/eager/tests/task_tests/eager_utils_test.cc +++ b/paddle/fluid/eager/tests/task_tests/eager_utils_test.cc @@ -35,7 +35,7 @@ TEST(EagerUtils, AutoGradMeta) { paddle::platform::CPUPlace()) .get(), meta); - dt0->mutable_data()[0] = 10.0; + dt0->mutable_data(paddle::platform::CPUPlace())[0] = 10.0; EagerTensor et0 = EagerTensor(dt0); std::shared_ptr dt1 = std::make_shared( @@ -43,7 +43,7 @@ TEST(EagerUtils, AutoGradMeta) { paddle::platform::CPUPlace()) .get(), meta); - dt1->mutable_data()[0] = 20.0; + dt1->mutable_data(paddle::platform::CPUPlace())[0] = 20.0; EagerTensor et1 = EagerTensor(dt1); std::vector ets = {et0, et1}; @@ -112,7 +112,7 @@ egr::EagerTensor CreateTestCPUTensor(T val, paddle::platform::CPUPlace()) .get(), meta); - auto* dt_ptr = dt->mutable_data(); + auto* dt_ptr = dt->mutable_data(paddle::platform::CPUPlace()); for (int64_t i = 0; i < dt->numel(); i++) { dt_ptr[i] = val; } diff --git a/paddle/fluid/eager/tests/task_tests/fwd_bwd_joint_test.cc b/paddle/fluid/eager/tests/task_tests/fwd_bwd_joint_test.cc index 1fef0905b4c..45b7b800495 100644 --- a/paddle/fluid/eager/tests/task_tests/fwd_bwd_joint_test.cc +++ b/paddle/fluid/eager/tests/task_tests/fwd_bwd_joint_test.cc @@ -44,8 +44,8 @@ egr::EagerTensor hook_function(const egr::EagerTensor& t) { paddle::memory::Alloc(place, bytes_size)), std::move(ret_meta)); - float* t_ptr = t_dense->mutable_data(); - float* ret_ptr = ret_dense->mutable_data(); + float* t_ptr = t_dense->mutable_data(place); + float* ret_ptr = ret_dense->mutable_data(place); for (int i = 0; i < ret_dense->numel(); i++) { ret_ptr[i] = t_ptr[i] + 5.0; } @@ -184,7 +184,7 @@ TEST(FwdBwdJoint, BranchedNodes) { // Examine Forward Output 2 { auto dense_out = std::dynamic_pointer_cast(out2.impl()); - float* ptr = dense_out->mutable_data(); + float* ptr = dense_out->mutable_data(paddle::platform::CPUPlace()); for (int i = 0; i < 20; i++) { PADDLE_ENFORCE(ptr[i] == 150.0, paddle::platform::errors::Fatal( diff --git a/paddle/fluid/eager/tests/task_tests/hook_test.cc b/paddle/fluid/eager/tests/task_tests/hook_test.cc index 4f4a33b1a74..3d61167c52e 100644 --- a/paddle/fluid/eager/tests/task_tests/hook_test.cc +++ b/paddle/fluid/eager/tests/task_tests/hook_test.cc @@ -45,8 +45,8 @@ egr::EagerTensor hook_function(const egr::EagerTensor& t) { paddle::memory::Alloc(place, bytes_size)), std::move(ret_meta)); - float* t_ptr = t_dense->mutable_data(); - float* ret_ptr = ret_dense->mutable_data(); + float* t_ptr = t_dense->mutable_data(place); + float* ret_ptr = ret_dense->mutable_data(place); for (int i = 0; i < ret_dense->numel(); i++) { ret_ptr[i] = t_ptr[i] + 3.0; } diff --git a/paddle/fluid/eager/tests/test_utils.h b/paddle/fluid/eager/tests/test_utils.h index e7f3a89bf06..9c217dff499 100644 --- a/paddle/fluid/eager/tests/test_utils.h +++ b/paddle/fluid/eager/tests/test_utils.h @@ -34,7 +34,7 @@ bool CompareGradTensorWithValue(const egr::EagerTensor& target, T value) { egr::AutogradMeta* meta = egr::EagerUtils::unsafe_autograd_meta(target); auto grad_dense = std::dynamic_pointer_cast(meta->Grad().impl()); - T* ptr = grad_dense->mutable_data(); + T* ptr = grad_dense->data(); std::vector host_data(grad_dense->numel()); if (paddle::platform::is_gpu_place(grad_dense->place())) { @@ -67,7 +67,7 @@ template bool CompareTensorWithValue(const egr::EagerTensor& target, T value) { // TODO(jiabin): Support Selected Rows later auto dense_t = std::dynamic_pointer_cast(target.impl()); - T* ptr = dense_t->mutable_data(); + T* ptr = dense_t->data(); std::vector host_data(dense_t->numel()); if (paddle::platform::is_gpu_place(dense_t->place())) { diff --git a/paddle/fluid/pybind/eager.cc b/paddle/fluid/pybind/eager.cc index 3a7043809d9..607423d64f5 100644 --- a/paddle/fluid/pybind/eager.cc +++ b/paddle/fluid/pybind/eager.cc @@ -80,6 +80,7 @@ void EmptyEagerTensorInitializer( std::make_shared( pten::make_intrusive(place), pten::DenseTensorMeta(pten::TransToPtenDataType(dtype), ddims)); + dense_tensor->mutable_data(place); self->eager_tensor.set_impl(dense_tensor); } else { PADDLE_THROW(platform::errors::InvalidArgument( diff --git a/paddle/pten/api/ext/dispatch.h b/paddle/pten/api/ext/dispatch.h index 07d29ef3e14..945a9557c40 100644 --- a/paddle/pten/api/ext/dispatch.h +++ b/paddle/pten/api/ext/dispatch.h @@ -272,16 +272,10 @@ namespace paddle { NAME, ::pten::DataType::UINT8, uint8_t, __VA_ARGS__) \ PD_PRIVATE_CASE_TYPE( \ NAME, ::pten::DataType::INT16, int16_t, __VA_ARGS__) \ - PD_PRIVATE_CASE_TYPE( \ - NAME, ::pten::DataType::UINT16, uint16_t, __VA_ARGS__) \ PD_PRIVATE_CASE_TYPE( \ NAME, ::pten::DataType::INT32, int32_t, __VA_ARGS__) \ - PD_PRIVATE_CASE_TYPE( \ - NAME, ::pten::DataType::UINT32, uint32_t, __VA_ARGS__) \ PD_PRIVATE_CASE_TYPE( \ NAME, ::pten::DataType::INT64, int64_t, __VA_ARGS__) \ - PD_PRIVATE_CASE_TYPE( \ - NAME, ::pten::DataType::UINT64, uint64_t, __VA_ARGS__) \ PD_PRIVATE_CASE_TYPE(NAME, \ ::pten::DataType::BFLOAT16, \ paddle::experimental::bfloat16, \ diff --git a/paddle/pten/api/lib/tensor.cc b/paddle/pten/api/lib/tensor.cc index 0ccc9c56dbf..3389dacec36 100644 --- a/paddle/pten/api/lib/tensor.cc +++ b/paddle/pten/api/lib/tensor.cc @@ -149,8 +149,8 @@ bool Tensor::is_cuda() const { template T *Tensor::mutable_data() { if (is_dense_tensor()) { - return std::dynamic_pointer_cast(impl_) - ->mutable_data(); + return std::dynamic_pointer_cast(impl_)->mutable_data( + ConvertExtPlaceToInnerPlace(place())); } return nullptr; } @@ -173,12 +173,18 @@ Tensor::mutable_data(); template T *Tensor::mutable_data(const PlaceType &place) { auto inner_place = ConvertExtPlaceToInnerPlace(place); - PADDLE_ENFORCE_EQ( - platform::is_same_place(inner_place, impl_->place()), - true, - platform::errors::Unimplemented("Modification of tensor place through " - "mutable_data is not supported now")); - return mutable_data(); + if (impl_->initialized()) { + PADDLE_ENFORCE_EQ( + platform::is_same_place(inner_place, impl_->place()), + true, + platform::errors::Unimplemented("Modification of tensor place through " + "mutable_data is not supported now")); + } + if (is_dense_tensor()) { + return std::dynamic_pointer_cast(impl_)->mutable_data( + inner_place); + } + return nullptr; } template PADDLE_API float *Tensor::mutable_data(const PlaceType &place); @@ -205,7 +211,8 @@ Tensor::mutable_data(const PlaceType &place); template const T *Tensor::data() const { if (is_dense_tensor()) { - return std::dynamic_pointer_cast(impl_)->data(); + return std::dynamic_pointer_cast(impl_)->mutable_data( + ConvertExtPlaceToInnerPlace(place())); } return nullptr; } @@ -217,7 +224,6 @@ template PADDLE_API const int32_t *Tensor::data() const; template PADDLE_API const uint8_t *Tensor::data() const; template PADDLE_API const int8_t *Tensor::data() const; template PADDLE_API const int16_t *Tensor::data() const; -template PADDLE_API const uint16_t *Tensor::data() const; template PADDLE_API const bool *Tensor::data() const; template PADDLE_API const paddle::platform::complex *Tensor::data>() const; diff --git a/paddle/pten/api/lib/utils.cc b/paddle/pten/api/lib/utils.cc index f42f3b37f0a..aacbfb15ed7 100644 --- a/paddle/pten/api/lib/utils.cc +++ b/paddle/pten/api/lib/utils.cc @@ -65,6 +65,7 @@ PADDLE_API Tensor copy_to(const Tensor& x, Backend backend, bool blocking) { pten::make_intrusive( pten::TransToFluidPlace(backend)), std::move(out_meta)); + dense_out->mutable_data(pten::TransToFluidPlace(backend)); kernel_context.EmplaceBackOutput(dense_out.get()); Tensor out; out.set_impl(dense_out); diff --git a/paddle/pten/api/lib/utils/storage.h b/paddle/pten/api/lib/utils/storage.h index e102ecbc5de..a02e5d46a65 100644 --- a/paddle/pten/api/lib/utils/storage.h +++ b/paddle/pten/api/lib/utils/storage.h @@ -39,6 +39,18 @@ class ExternalStorage : public pten::Storage { size_ = 0; } + void set_data_shared( + const std::shared_ptr& holder) override { + CHECK(holder); + data_ = holder; + size_ = holder->size(); + } + + std::shared_ptr&& move_data_shared() override { + size_ = 0; + return std::move(data_); + } + size_t size() const noexcept override { return size_; } const paddle::platform::Place& place() const override { PADDLE_ENFORCE_NOT_NULL( @@ -92,6 +104,12 @@ class SharedStorage : public pten::Storage { } } + std::shared_ptr&& move_data_shared() override { + size_ = 0; + place_ = Place(); + return std::move(data_); + } + size_t size() const noexcept override { return data_ ? data_->size() : size_; } diff --git a/paddle/pten/common/scalar.h b/paddle/pten/common/scalar.h index 36205a0e4c2..5c8fb046330 100644 --- a/paddle/pten/common/scalar.h +++ b/paddle/pten/common/scalar.h @@ -133,9 +133,6 @@ class ScalarBase { case DataType::INT8: data_.i8 = tensor.template data()[0]; break; - case DataType::UINT16: - data_.ui16 = tensor.template data()[0]; - break; case DataType::UINT8: data_.ui8 = tensor.template data()[0]; break; diff --git a/paddle/pten/core/allocator.h b/paddle/pten/core/allocator.h index 2647490c9f5..75d42c4fd15 100644 --- a/paddle/pten/core/allocator.h +++ b/paddle/pten/core/allocator.h @@ -15,46 +15,16 @@ limitations under the License. */ #pragma once #include +#include #include "paddle/fluid/platform/place.h" -#include "paddle/pten/core/candidate/allocator.h" namespace pten { -namespace deprecated { -/// \brief Encapsulates strategies for access/addressing, allocation/ -/// deallocation and construction/destruction of objects. -class RawAllocator { - public: - using Place = paddle::platform::Place; - - /// \brief Default destructor. - virtual ~RawAllocator() = default; - - /// \brief Allocates storage suitable for an array object of n bytes - /// and creates the array, but does not construct array elements. - /// May throw exceptions. - /// \param bytes_size The number of bytes to allocate. - /// \return The first address allocated. - virtual void* Allocate(size_t bytes_size) = 0; - - /// \brief Deallocates storage pointed to ptr, which must be a value - /// returned by a previous call to allocate that has not been - /// invalidated by an intervening call to deallocate. The bytes_size - /// must match the value previously passed to allocate. - /// \param ptr The first address to deallocate. - /// \param bytes_size The number of bytes to deallocate. - virtual void Deallocate(void* ptr, size_t bytes_size) = 0; - - /// \brief Get the place value of the allocator and the allocation. - /// \return The place value of the allocator and the allocation. - virtual const Place& place() const = 0; -}; - -/// \brief Fancy pointer with context. The use of this data type +/// \brief Fancy pointer with deleter. The use of this data type /// is to be compatible with allocators from different frameworks /// without significant performance loss. This class does not /// support being inherited. -class Allocation final { +class Allocation { public: using Place = paddle::platform::Place; using DeleterFnPtr = void (*)(Allocation*); @@ -62,63 +32,54 @@ class Allocation final { Allocation() = default; // Don't own resources, only provide access. - Allocation(void* data, const Place& place) : data_(data), place_(place) {} + Allocation(void* data, size_t size, const Place& place) + : ptr_(data), size_(size), place_(place) {} // Own resources. - Allocation(void* data, void* ctx, DeleterFnPtr deleter, const Place& place) - : data_(data), ctx_(ctx), deleter_(deleter), place_(place) {} + Allocation(void* data, size_t size, DeleterFnPtr deleter, const Place& place) + : ptr_(data), size_(size), deleter_(deleter), place_(place) {} - Allocation(Allocation&& other) { swap(*this, other); } - Allocation& operator=(Allocation&& other) { + Allocation(Allocation&& other) noexcept { swap(*this, other); } + Allocation& operator=(Allocation&& other) noexcept { // Exchange them explicitly to avoid moving is equivalent // to copying. swap(*this, other); return *this; } - ~Allocation() { Clear(); } - void* ptr() const noexcept { return data_; } - void* operator->() const noexcept { return data_; } - operator bool() const noexcept { return data_ || ctx_; } - const Place& place() const noexcept { return place_; } - - void Clear() { + virtual ~Allocation() { if (deleter_) { deleter_(this); } - ctx_ = nullptr; - deleter_ = nullptr; - data_ = nullptr; } + // Returns the holding pointer. + // NOTE: For performance consideration, it is better not to make this method + // as a virtual method. If we want to implement a `defragmentation` later, + // we might need to make `ptr_` field as a protected field, and add a virtual + // method like `defragmentation` to change `ptr_`. + void* ptr() const noexcept { return ptr_; } + + // Returns the size of this memory buffer, i.e., ptr() + size() - 1 is the + // last valid element. + // + // NOTE: Some allocator might alloc more memory than request. The size + // could larger than its request. For example, + // the AlignedAllocator will always allocate memory as size + kAlignment. + // The raw pointer might not aligned, so an offset might be added to raw + // the pointer. The size of this allocation will be + // `size + kAlignemnt - offset`. + size_t size() const noexcept { return size_; } + + void* operator->() const noexcept { return ptr_; } + operator bool() const noexcept { return ptr_; } + const Place& place() const noexcept { return place_; } DeleterFnPtr deleter() const noexcept { return deleter_; } - template - T* CastContextWithoutCheck() const noexcept { - return static_cast(ctx_); - } - - /// \brief Statically cast the void pointer of the context object to - /// the primitive type. Conversion of any pointer to void* and back - /// to pointer to the original cv type preserves its original value. - /// \param T The primitive type name of the context pointer. - /// \param expected_deleter The destructor passed in to enhance type - /// safety checking. - template - T* CastContext(DeleterFnPtr expected_deleter) const { - PADDLE_ENFORCE_EQ( - deleter_ == expected_deleter, - true, - paddle::platform::errors::InvalidArgument( - "The deleter of the allocation does not match, so the pointer " - "cannot be safely removed.")); - return CastContextWithoutCheck(); - } - - private: + protected: friend void swap(Allocation& a, Allocation& b) noexcept; - void* data_{nullptr}; - void* ctx_{nullptr}; + void* ptr_{nullptr}; + size_t size_{}; DeleterFnPtr deleter_{nullptr}; // TODO(Shixiaowei02): Enum needs to be used instead to reduce // the construction overhead by more than 50%. @@ -126,28 +87,21 @@ class Allocation final { }; inline void swap(Allocation& a, Allocation& b) noexcept { - ::std::swap(a.data_, b.data_); - ::std::swap(a.ctx_, b.ctx_); + ::std::swap(a.ptr_, b.ptr_); ::std::swap(a.deleter_, b.deleter_); ::std::swap(a.place_, b.place_); + ::std::swap(a.size_, b.size_); } -/// \brief Context compatible allocator interface. This allocator is -/// mainly used for general data structures such as Tensor. The raw -/// allocator is more universal and efficient. class Allocator { - using Place = paddle::platform::Place; - public: + using DeleterType = std::function; + using AllocationPtr = std::unique_ptr; + virtual ~Allocator() = default; - virtual Allocation Allocate(size_t bytes_size) = 0; - virtual const Place& place() = 0; -}; + virtual AllocationPtr Allocate(size_t bytes_size) = 0; -inline Allocation Allocate(const std::shared_ptr& a, size_t n) { - CHECK(a); - return a->Allocate(n); -} + virtual bool IsAllocThreadSafe() const { return false; } +}; -} // namespace deprecated } // namespace pten diff --git a/paddle/pten/core/candidate/allocator.h b/paddle/pten/core/candidate/allocator.h deleted file mode 100644 index 75d42c4fd15..00000000000 --- a/paddle/pten/core/candidate/allocator.h +++ /dev/null @@ -1,107 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include -#include "paddle/fluid/platform/place.h" - -namespace pten { - -/// \brief Fancy pointer with deleter. The use of this data type -/// is to be compatible with allocators from different frameworks -/// without significant performance loss. This class does not -/// support being inherited. -class Allocation { - public: - using Place = paddle::platform::Place; - using DeleterFnPtr = void (*)(Allocation*); - - Allocation() = default; - - // Don't own resources, only provide access. - Allocation(void* data, size_t size, const Place& place) - : ptr_(data), size_(size), place_(place) {} - - // Own resources. - Allocation(void* data, size_t size, DeleterFnPtr deleter, const Place& place) - : ptr_(data), size_(size), deleter_(deleter), place_(place) {} - - Allocation(Allocation&& other) noexcept { swap(*this, other); } - Allocation& operator=(Allocation&& other) noexcept { - // Exchange them explicitly to avoid moving is equivalent - // to copying. - swap(*this, other); - return *this; - } - - virtual ~Allocation() { - if (deleter_) { - deleter_(this); - } - } - - // Returns the holding pointer. - // NOTE: For performance consideration, it is better not to make this method - // as a virtual method. If we want to implement a `defragmentation` later, - // we might need to make `ptr_` field as a protected field, and add a virtual - // method like `defragmentation` to change `ptr_`. - void* ptr() const noexcept { return ptr_; } - - // Returns the size of this memory buffer, i.e., ptr() + size() - 1 is the - // last valid element. - // - // NOTE: Some allocator might alloc more memory than request. The size - // could larger than its request. For example, - // the AlignedAllocator will always allocate memory as size + kAlignment. - // The raw pointer might not aligned, so an offset might be added to raw - // the pointer. The size of this allocation will be - // `size + kAlignemnt - offset`. - size_t size() const noexcept { return size_; } - - void* operator->() const noexcept { return ptr_; } - operator bool() const noexcept { return ptr_; } - const Place& place() const noexcept { return place_; } - DeleterFnPtr deleter() const noexcept { return deleter_; } - - protected: - friend void swap(Allocation& a, Allocation& b) noexcept; - void* ptr_{nullptr}; - size_t size_{}; - DeleterFnPtr deleter_{nullptr}; - // TODO(Shixiaowei02): Enum needs to be used instead to reduce - // the construction overhead by more than 50%. - Place place_; -}; - -inline void swap(Allocation& a, Allocation& b) noexcept { - ::std::swap(a.ptr_, b.ptr_); - ::std::swap(a.deleter_, b.deleter_); - ::std::swap(a.place_, b.place_); - ::std::swap(a.size_, b.size_); -} - -class Allocator { - public: - using DeleterType = std::function; - using AllocationPtr = std::unique_ptr; - - virtual ~Allocator() = default; - virtual AllocationPtr Allocate(size_t bytes_size) = 0; - - virtual bool IsAllocThreadSafe() const { return false; } -}; - -} // namespace pten diff --git a/paddle/pten/core/dense_tensor.cc b/paddle/pten/core/dense_tensor.cc index 4008b6f6cee..b1a5015f010 100644 --- a/paddle/pten/core/dense_tensor.cc +++ b/paddle/pten/core/dense_tensor.cc @@ -33,28 +33,17 @@ extern void TensorCopy(const pten::DenseTensor& src, namespace pten { DenseTensor::DenseTensor(Allocator* a, const DenseTensorMeta& meta) - : meta_(meta), - storage_(make_intrusive(a, SizeOf(dtype()) * numel())) {} + : meta_(meta), holder_(a->Allocate(SizeOf(dtype()) * numel())) {} DenseTensor::DenseTensor(Allocator* a, DenseTensorMeta&& meta) - : meta_(std::move(meta)), - storage_(make_intrusive(a, SizeOf(dtype()) * numel())) {} + : meta_(std::move(meta)), holder_(a->Allocate(SizeOf(dtype()) * numel())) {} -DenseTensor::DenseTensor(intrusive_ptr storage, +DenseTensor::DenseTensor(const std::shared_ptr& holder, const DenseTensorMeta& meta) - : meta_(meta), storage_(std::move(storage)) {} - -DenseTensor::DenseTensor(intrusive_ptr storage, DenseTensorMeta&& meta) - : meta_(std::move(meta)), storage_(std::move(storage)) {} + : meta_(meta), holder_(holder) {} DenseTensor::DenseTensor(const DenseTensor& other) : meta_(other.meta()) { - if (storage_ == nullptr) { - storage_ = make_intrusive( - paddle::platform::CPUPlace()); - } - if (other.storage_ != nullptr && other.storage_->data_shared()) { - storage_->set_data_shared(other.storage_->data_shared()); - } + holder_ = other.holder_; #ifdef PADDLE_WITH_MKLDNN format_ = other.format_; @@ -63,13 +52,7 @@ DenseTensor::DenseTensor(const DenseTensor& other) : meta_(other.meta()) { DenseTensor& DenseTensor::operator=(const DenseTensor& other) { meta_ = other.meta(); - if (storage_ == nullptr) { - storage_ = make_intrusive( - paddle::platform::CPUPlace()); - } - if (other.storage_ != nullptr && other.storage_->data_shared()) { - storage_->set_data_shared(other.storage_->data_shared()); - } + holder_ = other.holder_; #ifdef PADDLE_WITH_MKLDNN format_ = other.format_; #endif @@ -78,7 +61,7 @@ DenseTensor& DenseTensor::operator=(const DenseTensor& other) { DenseTensor& DenseTensor::operator=(DenseTensor&& other) { meta_ = std::move(other.meta_); - storage_.swap(other.storage_); + std::swap(holder_, other.holder_); return *this; } @@ -90,59 +73,7 @@ int64_t DenseTensor::numel() const { } bool DenseTensor::IsSharedWith(const DenseTensor& b) const { - return storage_.get() == b.storage_.get() && storage_.get() != nullptr; -} - -void* DenseTensor::mutable_data(size_t request_bytes) { - PADDLE_ENFORCE( - valid(), - paddle::platform::errors::PreconditionNotMet( - "The meta data must be valid when call the mutable data function.")); - PADDLE_ENFORCE_NOT_NULL( - storage_, - paddle::platform::errors::PreconditionNotMet( - "The storage must be valid when call the mutable data function.")); - size_t bytes = numel() * SizeOf(dtype()); - if (request_bytes) { - PADDLE_ENFORCE_GE(request_bytes, - bytes, - paddle::platform::errors::InvalidArgument( - "The reserved size %d should be enough to meet the " - "volume required by metadata %d.", - request_bytes, - bytes)); - bytes = request_bytes; - } - if (!storage_->data() || storage_->size() < bytes + meta_.offset || - storage_->size() == 0) { - VLOG(10) << "mutbale data realloc, original size: " << storage_->size() - << ", new size: " << bytes; - storage_->Realloc(bytes); - meta_.offset = 0; - } - return reinterpret_cast(reinterpret_cast(storage_->data()) + - meta_.offset); -} - -template -T* DenseTensor::mutable_data() { - // In order to be compatible with the original Tensor design and - // execution system, we have to reset the datatype in mutable_data. - // When the compatibility phase is over in the future, we can delete it - if (meta_.dtype == DataType::UNDEFINED) { - VLOG(10) << "change data type in mutbale_data, target dtype - " - << paddle::experimental::CppTypeToDataType::Type(); - const_cast(meta_.dtype) = - paddle::experimental::CppTypeToDataType::Type(); - } - PADDLE_ENFORCE( - (dtype() == paddle::experimental::CppTypeToDataType::Type()), - paddle::platform::errors::InvalidArgument( - "The type of data (%d) we are trying to retrieve does not match the " - "type of data currently contained in the container (%d).", - static_cast(paddle::experimental::CppTypeToDataType::Type()), - static_cast(dtype()))); - return static_cast(mutable_data()); + return holder_ && holder_ == b.Holder(); } template @@ -164,29 +95,27 @@ T* DenseTensor::data() { paddle::platform::errors::InvalidArgument( "The type of data we are trying to retrieve does not match the " "type of data currently contained in the container.")); - PADDLE_ENFORCE_NOT_NULL( - storage_, - paddle::platform::errors::PreconditionNotMet( - "The storage must be valid when call the mutable data function.")); - return reinterpret_cast(data()); + return static_cast(data()); } void* DenseTensor::data() { + check_memory_size(); PADDLE_ENFORCE_NOT_NULL( - storage_, + holder_, paddle::platform::errors::PreconditionNotMet( - "The storage must be valid when call the mutable data function.")); - return reinterpret_cast(reinterpret_cast(storage_->data()) + + "The storage must be valid when call the data function.")); + return reinterpret_cast(reinterpret_cast(holder_->ptr()) + meta_.offset); } const void* DenseTensor::data() const { + check_memory_size(); PADDLE_ENFORCE_NOT_NULL( - storage_, + holder_, paddle::platform::errors::PreconditionNotMet( - "The storage must be valid when call the mutable data function.")); + "The storage must be valid when call the data function.")); return reinterpret_cast( - reinterpret_cast(storage_->data()) + meta_.offset); + reinterpret_cast(holder_->ptr()) + meta_.offset); } void DenseTensor::set_meta(DenseTensorMeta&& meta) { @@ -209,15 +138,14 @@ void DenseTensor::set_meta(DenseTensorMeta&& meta) { */ void DenseTensor::ResizeAndAllocate(const DDim& dims) { meta_.dims = dims; - if (storage_ != nullptr) { - mutable_data(); + if (holder_ != nullptr && place().GetType() != AllocationType::UNDEFINED) { + mutable_data(place()); } } void DenseTensor::ResetLoD(const LoD& lod) { meta_.lod = lod; } #define DATA_MEMBER_FUNC_INSTANTIATION(dtype) \ - template dtype* DenseTensor::mutable_data(); \ template const dtype* DenseTensor::data() const; \ template dtype* DenseTensor::data(); @@ -243,68 +171,47 @@ DATA_MEMBER_FUNC_INSTANTIATION(::paddle::experimental::complex128); /* From framework::Tensor */ /* --------------------------- */ DenseTensor::DenseTensor() { - storage_ = make_intrusive( - paddle::platform::CPUPlace()); inplace_version_counter_ = std::make_shared(0); meta_.dtype = paddle::experimental::DataType::FLOAT32; meta_.offset = 0; } -DenseTensor::DenseTensor(const paddle::framework::proto::VarType::Type& dtype) { - storage_ = make_intrusive( - paddle::platform::CPUPlace()); +DenseTensor::DenseTensor(paddle::framework::proto::VarType::Type dtype) { inplace_version_counter_ = std::make_shared(0); meta_.dtype = TransToPtenDataType(dtype); meta_.offset = 0; } size_t DenseTensor::memory_size() const { - if (storage_ == nullptr || storage_->data_shared() == nullptr) { - return 0UL; - } - - return storage_->data_shared()->size() - meta_.offset; + return holder_ == nullptr ? 0UL : holder_->size() - meta_.offset; } void DenseTensor::check_memory_size() const { - PADDLE_ENFORCE_NOT_NULL(storage_, + PADDLE_ENFORCE_NOT_NULL(holder_, paddle::platform::errors::PreconditionNotMet( "Tensor holds no memory. " "Call Tensor::mutable_data firstly.")); - PADDLE_ENFORCE_NOT_NULL(storage_->data_shared(), - paddle::platform::errors::PreconditionNotMet( - "Tensor holds no memory. " - "Call Tensor::mutable_data firstly.")); - size_t size = numel() * SizeOf(dtype()); - PADDLE_ENFORCE_LE( - size, + numel() * SizeOf(dtype()), memory_size(), paddle::platform::errors::PreconditionNotMet( "Tensor's dimension is out of bound." "Tensor's dimension must be equal or less than the size of its " "memory." - "But received Tensor's dimension is d%, memory's size is %d.", - size, + "But received Tensor's dimension is d%, memory's size is %d.", + numel() * SizeOf(dtype()), memory_size())); } const paddle::platform::Place& DenseTensor::place() const { PADDLE_ENFORCE_NOT_NULL( - storage_, + holder_, paddle::platform::errors::PreconditionNotMet( - "Tensor not initialized yet when Tensor::place() is called.")); - if (storage_->data_shared()) { - return storage_->data_shared()->place(); - } - return storage_->place(); + "Tensor not initialized yet when DenseTensor::place() is called.")); + return holder_->place(); } paddle::framework::proto::VarType::Type DenseTensor::type() const { - PADDLE_ENFORCE_NOT_NULL( - storage_, - paddle::platform::errors::PreconditionNotMet( - "Tensor not initialized yet when Tensor::type() is called.")); return TransToProtoVarType(meta_.dtype); } @@ -316,39 +223,31 @@ void DenseTensor::set_layout(const paddle::framework::DataLayout layout) { meta_.layout = layout; } -void DenseTensor::ResetHolder( - const std::shared_ptr& holder) { +void DenseTensor::ResetHolder(const std::shared_ptr& holder) { PADDLE_ENFORCE_EQ( meta_.offset, 0, paddle::platform::errors::Fatal( "Only the offset is supported to zero when the holder is reset.")); - PADDLE_ENFORCE_NOT_NULL( - storage_, - paddle::platform::errors::PreconditionNotMet( - "The storage must be valid when call the mutable data function.")); - - if (storage_->data_shared()) { + if (holder_) { PADDLE_ENFORCE_LE( numel() * SizeOf(dtype()) + meta_.offset, holder->size(), paddle::platform::errors::InvalidArgument( "The size of Holder is not enough to store the Tensor.")); } - - storage_->set_data_shared(holder); + holder_ = holder; } void DenseTensor::ResetHolderWithType( - const std::shared_ptr& holder, - const paddle::framework::proto::VarType::Type& type) { + const std::shared_ptr& holder, + paddle::framework::proto::VarType::Type type) { set_type(type); ResetHolder(holder); } -void DenseTensor::set_type( - const paddle::framework::proto::VarType::Type& type) { +void DenseTensor::set_type(paddle::framework::proto::VarType::Type type) { meta_.dtype = TransToPtenDataType(type); } @@ -369,19 +268,14 @@ void* DenseTensor::mutable_data(const paddle::platform::Place& place, size = requested_size; } - if (storage_ == nullptr) { - storage_ = make_intrusive(place); - } - /* some versions of boost::variant don't have operator!= */ - if (storage_->data_shared() == nullptr || - !(storage_->data_shared()->place() == place) || - storage_->data_shared()->size() < size + meta_.offset) { - storage_->Clear(); - storage_->set_data_shared(paddle::memory::AllocShared(place, size)); + if (holder_ == nullptr || !(holder_->place() == place) || + holder_->size() < size + meta_.offset) { + holder_.reset(); + holder_ = paddle::memory::AllocShared(place, size); meta_.offset = 0; } - return reinterpret_cast(reinterpret_cast(storage_->data()) + + return reinterpret_cast(reinterpret_cast(holder_->ptr()) + meta_.offset); } @@ -404,21 +298,16 @@ void* DenseTensor::mutable_data(const paddle::platform::Place& place, "] now")); size_t size = numel() * SizeOf(dtype()); - if (storage_ == nullptr) { - storage_ = make_intrusive(place); - } - /* some versions of boost::variant don't have operator!= */ - if (storage_->data_shared() == nullptr || - !(storage_->data_shared()->place() == place) || - storage_->data_shared()->size() < size + meta_.offset || + if (holder_ == nullptr || !(holder_->place() == place) || + holder_->size() < size + meta_.offset || !(paddle::platform::is_gpu_place(place) && - paddle::memory::InSameStream(storage_->data_shared(), stream))) { - storage_->Clear(); - storage_->set_data_shared(paddle::memory::AllocShared(place, size, stream)); + paddle::memory::InSameStream(holder_, stream))) { + holder_.reset(); + holder_ = paddle::memory::AllocShared(place, size, stream); meta_.offset = 0; } - return reinterpret_cast(reinterpret_cast(storage_->data()) + + return reinterpret_cast(reinterpret_cast(holder_->ptr()) + meta_.offset); } @@ -445,14 +334,9 @@ inline T* DenseTensor::mutable_data(const paddle::platform::Place& place, } void DenseTensor::ShareBufferWith(const DenseTensor& tensor) { - if (storage_ == nullptr) { - storage_ = make_intrusive( - paddle::platform::CPUPlace()); - } - if (storage_ != nullptr && tensor.storage_ != nullptr) { - storage_->set_data_shared(tensor.storage_->data_shared()); - } + holder_ = tensor.holder_; meta_.offset = tensor.meta().offset; + meta_.dtype = tensor.dtype(); } #define LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(dtype) \ @@ -467,7 +351,7 @@ LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(bool) LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(int8_t) LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(uint8_t) LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(int16_t) -LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(int) +LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(int32_t) LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(int64_t) LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(float) LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(double) @@ -482,6 +366,13 @@ LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(::paddle::experimental::complex128) /* From framework::LoDTensor */ /* ------------------------------ */ +DenseTensor::DenseTensor(intrusive_ptr storage, + const DenseTensorMeta& meta) + : meta_(meta), holder_(storage->move_data_shared()) {} + +DenseTensor::DenseTensor(intrusive_ptr storage, DenseTensorMeta&& meta) + : meta_(std::move(meta)), holder_(storage->move_data_shared()) {} + DenseTensor::DenseTensor(const LoD& lod) : DenseTensor() { meta_.lod = lod; } void DenseTensor::set_lod(const LoD& lod) { meta_.lod = lod; } @@ -559,9 +450,8 @@ DenseTensor DenseTensor::Slice(int64_t begin_idx, int64_t end_idx) const { } else { size_t base = numel() / meta_.dims[0]; DenseTensor dst; - dst.storage_ = pten::make_intrusive( - storage_->data_shared()); - dst.meta_.layout = meta_.layout; + dst.holder_ = holder_; + dst.set_layout(meta_.layout); dst.meta_.dtype = meta_.dtype; DDim dst_dims = meta_.dims; dst_dims[0] = end_idx - begin_idx; diff --git a/paddle/pten/core/dense_tensor.h b/paddle/pten/core/dense_tensor.h index 216689c9b64..88c459e6d87 100644 --- a/paddle/pten/core/dense_tensor.h +++ b/paddle/pten/core/dense_tensor.h @@ -70,17 +70,8 @@ class DenseTensor : public TensorBase, /// \param meta The meta data of dense tensor. DenseTensor(Allocator* a, DenseTensorMeta&& meta); - /// \brief Use existing storage space to create dense tensor. This interface - /// can be used to deliberately create an uninitialized dense tensor. - /// \param storage The existing storage. - /// \param meta The meta data of dense tensor. - DenseTensor(intrusive_ptr storage, const DenseTensorMeta& meta); - - /// \brief Use existing storage space to create dense tensor. This interface - /// can be used to deliberately create an uninitialized dense tensor. - /// \param storage The existing storage. - /// \param meta The meta data of dense tensor. - DenseTensor(intrusive_ptr storage, DenseTensorMeta&& meta); + DenseTensor(const std::shared_ptr& holder, + const DenseTensorMeta& meta); /// \brief Because dense tensor is a kind of container, we give a default /// constructor to use for stl container. But the dense tensor created with @@ -146,9 +137,7 @@ class DenseTensor : public TensorBase, /// \brief Test whether the storage is allocated. /// return Whether the storage is allocated. - bool initialized() const override { - return storage_ != nullptr && storage_->data() != nullptr; - } + bool initialized() const override { return holder_ && holder_->ptr(); } /// \brief Check if storage is shared with other objects. /// \return Whether the storage is shared with other objects. @@ -170,25 +159,7 @@ class DenseTensor : public TensorBase, /// \brief Returns the actual storage size occupied by tensor, may be larger /// than its shape dims. /// \return The actual storage size occupied by tensor. - size_t capacity() const { return storage_->size(); } - - /// \brief Get the mutable data pointer value of type T. - /// Memory allocation may occur when calling this interface: - /// 1. When the storage size is not enough to meet the current shape of the - /// data. - /// \return The mutable data pointer value of type T. - template - T* mutable_data(); - - /// \brief Get the mutable data pointer value of raw type. - /// Memory allocation may occur when calling this interface: - /// 1. When the storage size is not enough to meet the current shape of the - /// data. - /// 2. When more request_bytes parameters are used to reserve the data - /// storage. - /// param request_bytes The bytes to reserve the data storage. - /// \return The mutable data pointer value of type T. - void* mutable_data(size_t request_bytes = 0); + size_t capacity() const { return holder_->size(); } /// \brief Get the const data pointer value of type T. /// \return The const data pointer value of type T. @@ -204,7 +175,7 @@ class DenseTensor : public TensorBase, protected: DenseTensorMeta meta_; - intrusive_ptr storage_; + std::shared_ptr holder_; /* --------------------------- */ /* From framework::Tensor */ @@ -223,11 +194,21 @@ class DenseTensor : public TensorBase, /* @jim19930609: Remove dependency on protobuf after Tensor Unification. */ - explicit DenseTensor(const paddle::framework::proto::VarType::Type& dtype); + explicit DenseTensor(paddle::framework::proto::VarType::Type dtype); - inline bool IsInitialized() const { - return storage_ != nullptr && storage_->data_shared() != nullptr; - } + /// \brief Use existing storage space to create dense tensor. This interface + /// can be used to deliberately create an uninitialized dense tensor. + /// \param storage The existing storage. + /// \param meta The meta data of dense tensor. + DenseTensor(intrusive_ptr storage, const DenseTensorMeta& meta); + + /// \brief Use existing storage space to create dense tensor. This interface + /// can be used to deliberately create an uninitialized dense tensor. + /// \param storage The existing storage. + /// \param meta The meta data of dense tensor. + DenseTensor(intrusive_ptr storage, DenseTensorMeta&& meta); + + inline bool IsInitialized() const { return holder_ != nullptr; } template T* data(); @@ -270,7 +251,7 @@ class DenseTensor : public TensorBase, void set_layout(const paddle::framework::DataLayout layout); void clear() { - storage_.reset(); + holder_.reset(); meta_.offset = 0; } @@ -281,31 +262,24 @@ class DenseTensor : public TensorBase, } bool IsSharedBufferWith(const DenseTensor& src) const { - if (storage_ == nullptr || src.storage_ == nullptr) return false; - if (storage_->data_shared() == src.storage_->data_shared()) return true; - - return false; + return holder_ && holder_ == src.Holder(); } - const std::shared_ptr Holder() const { - return storage_ == nullptr ? nullptr : std::move(storage_->data_shared()); - } + const std::shared_ptr& Holder() const { return holder_; } void set_offset(size_t offset) { meta_.offset = offset; } size_t offset() const { return meta_.offset; } - std::shared_ptr MoveMemoryHolder() { - return storage_ == nullptr ? nullptr - : std::move(storage_->move_data_shared()); + std::shared_ptr MoveMemoryHolder() { + return std::move(holder_); } - void ResetHolder(const std::shared_ptr& holder); + void ResetHolder(const std::shared_ptr& holder); - void ResetHolderWithType( - const std::shared_ptr& holder, - const paddle::framework::proto::VarType::Type& type); + void ResetHolderWithType(const std::shared_ptr& holder, + paddle::framework::proto::VarType::Type type); - void set_type(const paddle::framework::proto::VarType::Type& type); + void set_type(paddle::framework::proto::VarType::Type type); TensorInplaceVersion& InplaceVersionCounter() { return *inplace_version_counter_; diff --git a/paddle/pten/core/device_context.h b/paddle/pten/core/device_context.h index bb851d954f2..1ee2e21494b 100644 --- a/paddle/pten/core/device_context.h +++ b/paddle/pten/core/device_context.h @@ -19,7 +19,7 @@ limitations under the License. */ // TODO(wilber): Do we need to use place in pten kernel? #include "paddle/pten/common/place.h" -#include "paddle/pten/core/candidate/allocator.h" +#include "paddle/pten/core/allocator.h" namespace pten { class TensorBase; diff --git a/paddle/pten/core/storage.h b/paddle/pten/core/storage.h index 97d7f8d0f11..fc8b5dfaab7 100644 --- a/paddle/pten/core/storage.h +++ b/paddle/pten/core/storage.h @@ -56,18 +56,14 @@ class Storage : public intrusive_ref_counter { : nullptr; } - const std::shared_ptr data_shared() const { + const std::shared_ptr& data_shared() const { return data_; } virtual void set_data_shared( - const std::shared_ptr& holder) { - data_ = holder; - } + const std::shared_ptr& holder) = 0; - std::shared_ptr move_data_shared() { - return std::move(data_); - } + virtual std::shared_ptr&& move_data_shared() = 0; virtual void ReallocShared(size_t n) { PADDLE_THROW(paddle::platform::errors::Unimplemented( @@ -123,6 +119,18 @@ class TensorStorage : public Storage { bool OwnsMemory() const noexcept override { return true; } + void set_data_shared( + const std::shared_ptr& holder) override { + CHECK(holder); + data_ = holder; + size_ = holder->size(); + } + + std::shared_ptr&& move_data_shared() override { + size_ = 0; + return std::move(data_); + } + private: Allocator* alloc_; int64_t size_{0}; diff --git a/paddle/pten/kernels/cpu/cast_kernel.cc b/paddle/pten/kernels/cpu/cast_kernel.cc index a0006f49a2b..edb8f59e267 100644 --- a/paddle/pten/kernels/cpu/cast_kernel.cc +++ b/paddle/pten/kernels/cpu/cast_kernel.cc @@ -36,7 +36,7 @@ void CastKernelImpl(const CPUContext& dev_ctx, auto numel = x.numel(); auto* in_end = in_begin + numel; - auto* out_begin = out->mutable_data(); + auto* out_begin = out->mutable_data(dev_ctx.GetPlace()); paddle::platform::Transform trans; trans(dev_ctx, diff --git a/paddle/pten/kernels/cpu/copy_kernel.cc b/paddle/pten/kernels/cpu/copy_kernel.cc index 1889838e253..be5170f4d05 100644 --- a/paddle/pten/kernels/cpu/copy_kernel.cc +++ b/paddle/pten/kernels/cpu/copy_kernel.cc @@ -32,17 +32,16 @@ void Copy(const Context& dev_ctx, DenseTensor* dst) { auto* src_ptr = src.data(); const auto& src_place = src.place(); - const auto& dst_place = dst->place(); VLOG(3) << "TensorCopy " << src.dims() << " from " << src.place() << " to " - << dst_place; + << src_place; - dst->ResizeAndAllocate(src.dims()); - auto* dst_ptr = dst->mutable_data(); + dst->Resize(src.dims()); + auto* dst_ptr = dst->mutable_data(src_place); - if (src_ptr == dst_ptr && src_place == dst_place) { + if (src_ptr == dst_ptr) { VLOG(3) << "Skip copy the same data async from " << src_place << " to " - << dst_place; + << src_place; return; } VLOG(4) << "src:" << src_ptr << ", dst:" << dst_ptr; @@ -51,9 +50,8 @@ void Copy(const Context& dev_ctx, auto size = src.numel() * paddle::framework::SizeOfType(TransToProtoVarType(src.dtype())); - if (paddle::platform::is_cpu_place(src_place) && - paddle::platform::is_cpu_place(dst_place)) { - paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size); + if (paddle::platform::is_cpu_place(src_place)) { + paddle::memory::Copy(src_place, dst_ptr, src_place, src_ptr, size); } } diff --git a/paddle/pten/kernels/cpu/dot_kernel.cc b/paddle/pten/kernels/cpu/dot_kernel.cc index 5745737bbae..e6ffd3b5000 100644 --- a/paddle/pten/kernels/cpu/dot_kernel.cc +++ b/paddle/pten/kernels/cpu/dot_kernel.cc @@ -29,7 +29,7 @@ void DotKernel(const Context& dev_ctx, DenseTensor* out) { auto const *x_ptr = x.data(), *x_ptr_ = &x_ptr[0]; auto const *y_ptr = y.data(), *y_ptr_ = &y_ptr[0]; - auto* z = out->mutable_data(); + auto* z = out->mutable_data(dev_ctx.GetPlace()); // Loop over the total N elements of both operands while sum-reducing every // B pairs along the way where B is the dimension of the least ordered axis diff --git a/paddle/pten/kernels/cpu/elementwise.h b/paddle/pten/kernels/cpu/elementwise.h index e4f426d3f8e..6bfde977ce5 100644 --- a/paddle/pten/kernels/cpu/elementwise.h +++ b/paddle/pten/kernels/cpu/elementwise.h @@ -45,7 +45,10 @@ struct SameDimsAddFunctor< const DenseTensor& y, DenseTensor* z) { auto blas = paddle::operators::math::GetBlas(dev_ctx); - blas.VADD(x.numel(), x.data(), y.data(), z->mutable_data()); + blas.VADD(x.numel(), + x.data(), + y.data(), + z->mutable_data(dev_ctx.GetPlace())); } }; @@ -58,7 +61,7 @@ struct SameDimsAddFunctor< const DenseTensor& x, const DenseTensor& y, DenseTensor* z) { - z->mutable_data(); + z->mutable_data(dev_ctx.GetPlace()); auto eigen_x = pten::EigenVector::Flatten(x); auto eigen_y = pten::EigenVector::Flatten(y); auto eigen_z = pten::EigenVector::Flatten(*z); @@ -86,7 +89,10 @@ struct SameDimsSubtractFunctor< const DenseTensor& y, DenseTensor* z) { auto blas = paddle::operators::math::GetBlas(dev_ctx); - blas.VSUB(x.numel(), x.data(), y.data(), z->mutable_data()); + blas.VSUB(x.numel(), + x.data(), + y.data(), + z->mutable_data(dev_ctx.GetPlace())); } }; @@ -141,7 +147,10 @@ struct SameDimsDivideFunctor< const DenseTensor& y, DenseTensor* z) { auto blas = paddle::operators::math::GetBlas(dev_ctx); - blas.VDIV(x.numel(), x.data(), y.data(), z->mutable_data()); + blas.VDIV(x.numel(), + x.data(), + y.data(), + z->mutable_data(dev_ctx.GetPlace())); } }; @@ -164,7 +173,10 @@ struct SameDimsMultiplyFunctor< const DenseTensor& y, DenseTensor* z) { auto blas = paddle::operators::math::GetBlas(dev_ctx); - blas.VMUL(x.numel(), x.data(), y.data(), z->mutable_data()); + blas.VMUL(x.numel(), + x.data(), + y.data(), + z->mutable_data(dev_ctx.GetPlace())); } }; @@ -280,7 +292,7 @@ void CommonForwardBroadcastCPU(const DenseTensor& x, PADDLE_ENFORCE_NOT_NULL(y_data, paddle::platform::errors::InvalidArgument( "The input Y should not be empty.")); - OutType* out_data = z->mutable_data(); + OutType* out_data = z->mutable_data(ctx.GetPlace()); const int out_size = std::accumulate( out_dims_array, out_dims_array + max_dim, 1, std::multiplies()); @@ -361,7 +373,7 @@ void ElementwiseCompute(const CPUContext& dev_ctx, int axis, Functor func, DenseTensor* z) { - z->mutable_data(); + z->mutable_data(dev_ctx.GetPlace()); auto x_dims = x.dims(); auto y_dims = y.dims(); bool is_xsize_larger = true; diff --git a/paddle/pten/kernels/cpu/math_kernel.cc b/paddle/pten/kernels/cpu/math_kernel.cc index 706a40936a3..6d76626605c 100644 --- a/paddle/pten/kernels/cpu/math_kernel.cc +++ b/paddle/pten/kernels/cpu/math_kernel.cc @@ -37,7 +37,7 @@ namespace pten { const DenseTensor& y, \ int axis, \ DenseTensor* out) { \ - out->mutable_data(); \ + out->mutable_data(dev_ctx.GetPlace()); \ if (x.dims() == y.dims()) { \ SameDimsElementwiseCompute>()( \ dev_ctx, x, y, out); \ @@ -85,7 +85,7 @@ void DivideRawKernel(const Context& dev_ctx, int axis, DenseTensor* out) { // allocate memory for out - out->mutable_data(); + out->mutable_data(dev_ctx.GetPlace()); if (x.dims() == y.dims() && std::is_floating_point::value) { SameDimsElementwiseCompute>()( dev_ctx, x, y, out); diff --git a/paddle/pten/kernels/cpu/reduce.h b/paddle/pten/kernels/cpu/reduce.h index 86443c254bf..8f84bd0515b 100644 --- a/paddle/pten/kernels/cpu/reduce.h +++ b/paddle/pten/kernels/cpu/reduce.h @@ -119,7 +119,7 @@ void GetShuffledInput(const DeviceContext& dev_ctx, GetShuffledDim(input.dims(), &shuffled_dims, dims, &perm_axis); shuffled_input->ResizeAndAllocate(shuffled_dims); - shuffled_input->mutable_data(); + shuffled_input->mutable_data(dev_ctx.GetPlace()); pten::math::TransposeNormal trans; trans(dev_ctx, input, shuffled_input, perm_axis); @@ -158,7 +158,7 @@ void ReduceKernelImpl(const DeviceContext& dev_ctx, const std::vector& dims, bool keep_dim, bool reduce_all) { - output->mutable_data(); + output->mutable_data(dev_ctx.GetPlace()); if (reduce_all) { // Flatten and reduce 1-D tensor diff --git a/paddle/pten/kernels/cpu/scale_kernel.cc b/paddle/pten/kernels/cpu/scale_kernel.cc index 52949b58969..774d3891b03 100644 --- a/paddle/pten/kernels/cpu/scale_kernel.cc +++ b/paddle/pten/kernels/cpu/scale_kernel.cc @@ -33,7 +33,7 @@ void ScaleKernel(const Context& dev_ctx, bool bias_after_scale, DenseTensor* out) { // calc - out->mutable_data(); + out->mutable_data(dev_ctx.GetPlace()); auto eigen_out = pten::EigenVector::Flatten(*out); auto eigen_x = pten::EigenVector::Flatten(x); auto& dev = *dev_ctx.eigen_device(); diff --git a/paddle/pten/kernels/empty_kernel.cc b/paddle/pten/kernels/empty_kernel.cc index 2deac0146c5..6ce49982879 100644 --- a/paddle/pten/kernels/empty_kernel.cc +++ b/paddle/pten/kernels/empty_kernel.cc @@ -29,7 +29,7 @@ void EmptyKernel(const Context& dev_ctx, template void EmptyLikeKernel(const Context& dev_ctx, DenseTensor* out) { - out->mutable_data(); + out->mutable_data(dev_ctx.GetPlace()); } } // namespace pten diff --git a/paddle/pten/kernels/funcs/elementwise_base.h b/paddle/pten/kernels/funcs/elementwise_base.h index 47924c4e2ae..1c18e9f7998 100644 --- a/paddle/pten/kernels/funcs/elementwise_base.h +++ b/paddle/pten/kernels/funcs/elementwise_base.h @@ -227,7 +227,7 @@ class TransformFunctor { const bool is_xsize_larger = true) : x_(x.data()), y_(y.data()), - z_(z->mutable_data()), + z_(z->mutable_data(ctx.GetPlace())), nx_(x.numel()), ctx_(ctx), func_(func), @@ -585,7 +585,7 @@ void ElementwiseCudaKernel(const KPDevice &ctx, ins_data[i] = ins[i]->data(); } for (int i = 0; i < NumOuts; ++i) { - outs_data[i] = (*outs)[i]->mutable_data(); + outs_data[i] = (*outs)[i]->mutable_data(ctx.GetPlace()); } #ifdef PADDLE_WITH_XPU2 int block_size = 64; diff --git a/paddle/pten/kernels/funcs/transpose.cc b/paddle/pten/kernels/funcs/transpose.cc index 90a6859a850..13cfaedb33d 100644 --- a/paddle/pten/kernels/funcs/transpose.cc +++ b/paddle/pten/kernels/funcs/transpose.cc @@ -36,7 +36,7 @@ struct TransposeNormal { auto in_stride = pten::framework::stride(in.dims()); auto out_stride = pten::framework::stride(out->dims()); const T* in_ptr = in.data(); - T* out_ptr = out->mutable_data(); + T* out_ptr = out->mutable_data(dev_ctx.GetPlace()); auto transpose_helper = [&](int64_t beg, int64_t end) { for (int64_t out_idx = beg; out_idx < end; ++out_idx) { @@ -63,11 +63,8 @@ DEFINE_CPU_TRANS_NORMAL(bool); DEFINE_CPU_TRANS_NORMAL(int8_t); DEFINE_CPU_TRANS_NORMAL(uint8_t); DEFINE_CPU_TRANS_NORMAL(int16_t); -DEFINE_CPU_TRANS_NORMAL(uint16_t); DEFINE_CPU_TRANS_NORMAL(int32_t); -DEFINE_CPU_TRANS_NORMAL(uint32_t); DEFINE_CPU_TRANS_NORMAL(int64_t); -DEFINE_CPU_TRANS_NORMAL(uint64_t); DEFINE_CPU_TRANS_NORMAL(float); DEFINE_CPU_TRANS_NORMAL(double); DEFINE_CPU_TRANS_NORMAL(paddle::platform::float16); diff --git a/paddle/pten/kernels/funcs/transpose.cu b/paddle/pten/kernels/funcs/transpose.cu index 474a7c4ea4d..24d72ca3d81 100644 --- a/paddle/pten/kernels/funcs/transpose.cu +++ b/paddle/pten/kernels/funcs/transpose.cu @@ -61,7 +61,7 @@ struct TransposeNormal { auto in_stride = pten::framework::stride(in.dims()); auto out_stride = pten::framework::stride(out->dims()); auto* in_ptr = in.data(); - auto* out_ptr = out->mutable_data(); + auto* out_ptr = out->mutable_data(dev_ctx.GetPlace()); // copy in_stride, out_stride, axis to gpu device const paddle::platform::CUDAPlace& cuda_place = dev_ctx.GetPlace(); @@ -110,11 +110,8 @@ DEFINE_GPU_TRANS_NORMAL(bool); DEFINE_GPU_TRANS_NORMAL(int8_t); DEFINE_GPU_TRANS_NORMAL(uint8_t); DEFINE_GPU_TRANS_NORMAL(int16_t); -DEFINE_GPU_TRANS_NORMAL(uint16_t); DEFINE_GPU_TRANS_NORMAL(int32_t); -DEFINE_GPU_TRANS_NORMAL(uint32_t); DEFINE_GPU_TRANS_NORMAL(int64_t); -DEFINE_GPU_TRANS_NORMAL(uint64_t); DEFINE_GPU_TRANS_NORMAL(float); DEFINE_GPU_TRANS_NORMAL(double); DEFINE_GPU_TRANS_NORMAL(paddle::platform::float16); diff --git a/paddle/pten/kernels/gpu/cast_kernel.cu b/paddle/pten/kernels/gpu/cast_kernel.cu index 3774c56370b..12f246c3238 100644 --- a/paddle/pten/kernels/gpu/cast_kernel.cu +++ b/paddle/pten/kernels/gpu/cast_kernel.cu @@ -43,7 +43,7 @@ void CastCUDAKernelImpl(const GPUContext& dev_ctx, std::vector outputs; inputs.emplace_back(&x); outputs.emplace_back(out); - out->mutable_data(); + out->mutable_data(dev_ctx.GetPlace()); pten::funcs::LaunchSameDimsElementwiseCudaKernel( diff --git a/paddle/pten/kernels/gpu/copy_kernel.cu b/paddle/pten/kernels/gpu/copy_kernel.cu index 1f7a08e8254..d2578723158 100644 --- a/paddle/pten/kernels/gpu/copy_kernel.cu +++ b/paddle/pten/kernels/gpu/copy_kernel.cu @@ -43,7 +43,7 @@ void Copy(const Context& dev_ctx, << dst_place; dst->ResizeAndAllocate(src.dims()); - auto* dst_ptr = dst->mutable_data(); + auto* dst_ptr = dst->mutable_data(dst_place); if (src_ptr == dst_ptr && src_place == dst_place) { VLOG(3) << "Skip copy the same data async from " << src_place << " to " diff --git a/paddle/pten/kernels/gpu/dot_kernel.cu b/paddle/pten/kernels/gpu/dot_kernel.cu index 5fe397e1283..75aacc8d3d1 100644 --- a/paddle/pten/kernels/gpu/dot_kernel.cu +++ b/paddle/pten/kernels/gpu/dot_kernel.cu @@ -29,7 +29,7 @@ void DotKernel(const Context& dev_ctx, const DenseTensor& x, const DenseTensor& y, DenseTensor* out) { - out->mutable_data(); + out->mutable_data(dev_ctx.GetPlace()); if (1 == out->dims().size()) { auto eigen_out = pten::EigenScalar::From(*out); auto eigen_x = pten::EigenVector::Flatten(x); diff --git a/paddle/pten/kernels/gpu/elementwise.h b/paddle/pten/kernels/gpu/elementwise.h index def54e24840..f4d8e442fcd 100644 --- a/paddle/pten/kernels/gpu/elementwise.h +++ b/paddle/pten/kernels/gpu/elementwise.h @@ -350,7 +350,7 @@ void LaunchKernel(const KPDevice &ctx, pten::framework::Array<_ptr_ OutT *, NumOuts> outs_data; for (int i = 0; i < NumOuts; ++i) { - outs_data[i] = (*outs)[i]->mutable_data(); + outs_data[i] = (*outs)[i]->mutable_data(ctx.GetPlace()); } for (int i = 0; i < Arity; i++) { diff --git a/paddle/pten/kernels/gpu/math_kernel.cu b/paddle/pten/kernels/gpu/math_kernel.cu index 6b6383f8106..d06dc1c43f6 100644 --- a/paddle/pten/kernels/gpu/math_kernel.cu +++ b/paddle/pten/kernels/gpu/math_kernel.cu @@ -47,7 +47,7 @@ namespace pten { inputs.emplace_back(&x); \ inputs.emplace_back(&y); \ outputs.emplace_back(out); \ - out->mutable_data(); \ + out->mutable_data(dev_ctx.GetPlace()); \ LaunchElementwiseCudaKernel( \ dev_ctx, inputs, &outputs, axis, funcs::name##Functor()); \ } diff --git a/paddle/pten/kernels/gpu/reduce.h b/paddle/pten/kernels/gpu/reduce.h index e247f786cc6..26f17bc0050 100644 --- a/paddle/pten/kernels/gpu/reduce.h +++ b/paddle/pten/kernels/gpu/reduce.h @@ -328,7 +328,7 @@ struct ReduceConfig { if (should_reduce_again) { tmp->ResizeAndAllocate(pten::framework::make_ddim( {static_cast(left_num * grid.z * grid.y * sizeof(Ty))})); - output_data = tmp->mutable_data(); + output_data = tmp->mutable_data(place); } else { output_data = y_data; } @@ -1032,7 +1032,7 @@ static pten::framework::make_ddim( {static_cast(temp_storage_bytes)}))); - auto* temp_storage = tmp.mutable_data(); + auto* temp_storage = tmp.mutable_data(place); cub::DeviceReduce::Reduce(temp_storage, temp_storage_bytes, @@ -1070,8 +1070,7 @@ void TensorReduceFunctorImpl(const pten::DenseTensor& x, const TransformOp& transform, const std::vector& origin_reduce_dims, gpuStream_t stream) { - // Allocate memory - y->mutable_data(); + y->mutable_data(x.place()); auto x_dim = pten::framework::vectorize(x.dims()); auto config = ReduceConfig(origin_reduce_dims, x_dim); @@ -1088,7 +1087,7 @@ void TensorReduceFunctorImpl(const pten::DenseTensor& x, pten::DenseTensorMeta(y->dtype(), tmp_ddim, y->layout())); auto x_data = x.data(); - auto y_data = y->mutable_data(); + auto y_data = y->data(); auto* dev_ctx = static_cast( paddle::platform::DeviceContextPool::Instance().Get(x.place())); diff --git a/paddle/pten/kernels/gpu/scale_kernel.cu b/paddle/pten/kernels/gpu/scale_kernel.cu index e729dad3b36..dd7c2f242ea 100644 --- a/paddle/pten/kernels/gpu/scale_kernel.cu +++ b/paddle/pten/kernels/gpu/scale_kernel.cu @@ -54,7 +54,7 @@ void ScaleKernel(const Context& dev_ctx, std::vector outputs; inputs.emplace_back(&x); outputs.emplace_back(out); - out->mutable_data(); + out->mutable_data(dev_ctx.GetPlace()); pten::funcs::LaunchSameDimsElementwiseCudaKernel( diff --git a/paddle/pten/kernels/impl/complex_kernel_impl.h b/paddle/pten/kernels/impl/complex_kernel_impl.h index d7132b05f7f..aa878f7e9eb 100644 --- a/paddle/pten/kernels/impl/complex_kernel_impl.h +++ b/paddle/pten/kernels/impl/complex_kernel_impl.h @@ -26,7 +26,7 @@ void ConjKernel(const Context& dev_ctx, DenseTensor* out) { auto numel = x.numel(); auto* x_data = x.data(); - auto* out_data = out->mutable_data(); + auto* out_data = out->mutable_data(dev_ctx.GetPlace()); paddle::platform::ForRange for_range(dev_ctx, numel); paddle::operators::math::ConjFunctor functor(x_data, numel, out_data); diff --git a/paddle/pten/kernels/impl/dot_grad_kernel_impl.h b/paddle/pten/kernels/impl/dot_grad_kernel_impl.h index 557f6fae7b7..d0c6cf6793e 100644 --- a/paddle/pten/kernels/impl/dot_grad_kernel_impl.h +++ b/paddle/pten/kernels/impl/dot_grad_kernel_impl.h @@ -73,7 +73,7 @@ struct DotGradFunction::From(*tensor_dout); if (tensor_dx) { - tensor_dx->mutable_data(); + tensor_dx->mutable_data(ctx.GetPlace()); auto y = EigenMatrix::From(*tensor_y); auto& dev = *ctx.eigen_device(); Eigen::DSizes size(1, tensor_dx->dims()[1]); @@ -85,7 +85,7 @@ struct DotGradFunctionmutable_data(); + tensor_dy->mutable_data(ctx.GetPlace()); auto x = EigenMatrix::From(*tensor_x); auto& dev = *ctx.eigen_device(); Eigen::DSizes size(1, tensor_dy->dims()[1]); @@ -100,7 +100,7 @@ struct DotGradFunctiondata(); if (tensor_dx) { - auto* data_dx = tensor_dx->mutable_data(); + auto* data_dx = tensor_dx->mutable_data(ctx.GetPlace()); const auto* data_y = tensor_y->data(); const DDim& dim = tensor_x->dims(); size_t N = static_cast(pten::framework::product(dim)); @@ -115,7 +115,7 @@ struct DotGradFunctionmutable_data(); + auto* data_dy = tensor_dy->mutable_data(ctx.GetPlace()); const auto* data_x = tensor_x->data(); const DDim& dim = tensor_y->dims(); size_t N = static_cast(pten::framework::product(dim)); @@ -164,7 +164,7 @@ struct DotGradFunction::From(*tensor_dout); if (tensor_dx) { - tensor_dx->mutable_data(); + tensor_dx->mutable_data(ctx.GetPlace()); auto y = EigenMatrix::From(*tensor_y); auto dx = EigenMatrix::From(*tensor_dx); auto& dev = *ctx.eigen_device(); @@ -173,7 +173,7 @@ struct DotGradFunctionmutable_data(); + tensor_dy->mutable_data(ctx.GetPlace()); auto x = EigenMatrix::From(*tensor_x); auto dy = EigenMatrix::From(*tensor_dy); auto& dev = *ctx.eigen_device(); @@ -189,7 +189,7 @@ struct DotGradFunctionmutable_data(); + auto* dx = tensor_dx->mutable_data(ctx.GetPlace()); for (auto j = 0; j < N / B; ++j) { auto const ss = dz[j]; for (auto i = 0; i < B; ++i) *dx++ = *y++ * ss; @@ -197,7 +197,7 @@ struct DotGradFunctionmutable_data(); + auto* dy = tensor_dy->mutable_data(ctx.GetPlace()); for (auto j = 0; j < N / B; ++j) { auto const ss = dz[j]; for (auto i = 0; i < B; i++) *dy++ = *x++ * ss; @@ -272,7 +272,7 @@ struct DotDoubleGradFunctiondata(); if (tensor_dx) { - auto* data_dx = tensor_dx->mutable_data(); + auto* data_dx = tensor_dx->mutable_data(ctx.GetPlace()); const auto* data_ddy = tensor_ddy->data(); const DDim& dim = tensor_dx->dims(); size_t N = static_cast(product(dim)); @@ -287,7 +287,7 @@ struct DotDoubleGradFunctionmutable_data(); + auto* data_dy = tensor_dy->mutable_data(ctx.GetPlace()); const auto* data_ddx = tensor_ddx->data(); const DDim& dim = tensor_dy->dims(); size_t N = static_cast(product(dim)); @@ -302,7 +302,7 @@ struct DotDoubleGradFunctionmutable_data(); + auto* data_ddout = tensor_ddout->mutable_data(ctx.GetPlace()); auto* data_x = tensor_x->data(); auto* data_y = tensor_y->data(); auto* data_ddx = tensor_ddx->data(); @@ -351,7 +351,7 @@ struct DotDoubleGradFunction::Flatten(*tensor_dout); if (tensor_dx) { - tensor_dx->mutable_data(); + tensor_dx->mutable_data(ctx.GetPlace()); auto ddy = EigenVector::Flatten(*tensor_ddy); Eigen::DSizes size(tensor_ddy->numel()); auto dx = EigenVector::Flatten(*tensor_dx); @@ -359,7 +359,7 @@ struct DotDoubleGradFunctionmutable_data(); + tensor_dy->mutable_data(ctx.GetPlace()); auto ddx = EigenVector::Flatten(*tensor_ddx); Eigen::DSizes size(tensor_ddx->numel()); @@ -368,7 +368,7 @@ struct DotDoubleGradFunctionmutable_data(); + tensor_ddout->mutable_data(ctx.GetPlace()); auto x = EigenVector::Flatten(*tensor_x); auto y = EigenVector::Flatten(*tensor_y); auto ddx = EigenVector::Flatten(*tensor_ddx); @@ -381,7 +381,7 @@ struct DotDoubleGradFunctiondata(); if (tensor_dx) { - auto* data_dx = tensor_dx->mutable_data(); + auto* data_dx = tensor_dx->mutable_data(ctx.GetPlace()); const auto* data_ddy = tensor_ddy->data(); const DDim& dim = tensor_dx->dims(); size_t N = static_cast(product(dim)); @@ -396,7 +396,7 @@ struct DotDoubleGradFunctionmutable_data(); + auto* data_dy = tensor_dy->mutable_data(ctx.GetPlace()); const auto* data_ddx = tensor_ddx->data(); const DDim& dim = tensor_dy->dims(); size_t N = static_cast(product(dim)); @@ -411,7 +411,7 @@ struct DotDoubleGradFunctionmutable_data(); + auto* data_ddout = tensor_ddout->mutable_data(ctx.GetPlace()); auto* data_x = tensor_x->data(); auto* data_y = tensor_y->data(); auto* data_ddx = tensor_ddx->data(); @@ -552,7 +552,7 @@ struct DotTripleGradFunctiondata(); if (out_tensor_d_x) { - auto* data_d_x = out_tensor_d_x->mutable_data(); + auto* data_d_x = out_tensor_d_x->mutable_data(ctx.GetPlace()); const auto* data_ddy = in_tensor_ddy->data(); const DDim& dim = out_tensor_d_x->dims(); @@ -567,7 +567,7 @@ struct DotTripleGradFunctionmutable_data(); + auto* data_d_y = out_tensor_d_y->mutable_data(ctx.GetPlace()); const auto* data_ddx = in_tensor_ddx->data(); const DDim& dim = out_tensor_d_y->dims(); @@ -582,7 +582,7 @@ struct DotTripleGradFunctionmutable_data(); + auto* data_d_dout = out_tensor_d_dout->mutable_data(ctx.GetPlace()); auto* data_ddx = in_tensor_ddx->data(); auto* data_ddy = in_tensor_ddy->data(); auto* data_d_dx = in_tensor_d_dx->data(); @@ -613,7 +613,7 @@ struct DotTripleGradFunctionmutable_data(); + auto* data_d_ddx = out_tensor_d_ddx->mutable_data(ctx.GetPlace()); auto* data_dout = in_tensor_dout->data(); auto* data_d_dy = in_tensor_d_dy->data(); auto* data_y = in_tensor_y->data(); @@ -633,7 +633,7 @@ struct DotTripleGradFunctionmutable_data(); + auto* data_d_ddy = out_tensor_d_ddy->mutable_data(ctx.GetPlace()); auto* data_dout = in_tensor_dout->data(); auto* data_d_dx = in_tensor_d_dx->data(); auto* data_x = in_tensor_x->data(); @@ -678,7 +678,7 @@ struct DotTripleGradFunction::Flatten(*in_tensor_d_ddout); if (out_tensor_d_x) { - out_tensor_d_x->mutable_data(); + out_tensor_d_x->mutable_data(ctx.GetPlace()); auto ddy = EigenVector::Flatten(*in_tensor_ddy); Eigen::DSizes size(in_tensor_ddy->numel()); auto d_x = EigenVector::Flatten(*out_tensor_d_x); @@ -686,7 +686,7 @@ struct DotTripleGradFunctionmutable_data(); + out_tensor_d_y->mutable_data(ctx.GetPlace()); auto ddx = EigenVector::Flatten(*in_tensor_ddx); Eigen::DSizes size(in_tensor_ddx->numel()); @@ -695,7 +695,7 @@ struct DotTripleGradFunctionmutable_data(); + out_tensor_d_dout->mutable_data(ctx.GetPlace()); auto ddx = EigenVector::Flatten(*in_tensor_ddx); auto ddy = EigenVector::Flatten(*in_tensor_ddy); auto d_dx = EigenVector::Flatten(*in_tensor_d_dx); @@ -705,7 +705,7 @@ struct DotTripleGradFunctionmutable_data(); + out_tensor_d_ddx->mutable_data(ctx.GetPlace()); auto dout = EigenVector::Flatten(*in_tensor_dout); auto y = EigenVector::Flatten(*in_tensor_y); auto d_ddout = EigenVector::Flatten(*in_tensor_d_ddout); @@ -717,7 +717,7 @@ struct DotTripleGradFunctionmutable_data(); + out_tensor_d_ddy->mutable_data(ctx.GetPlace()); auto dout = EigenVector::Flatten(*in_tensor_dout); auto x = EigenVector::Flatten(*in_tensor_x); auto d_ddout = EigenVector::Flatten(*in_tensor_d_ddout); @@ -732,7 +732,7 @@ struct DotTripleGradFunctiondata(); if (out_tensor_d_x) { - auto* data_d_x = out_tensor_d_x->mutable_data(); + auto* data_d_x = out_tensor_d_x->mutable_data(ctx.GetPlace()); const auto* data_ddy = in_tensor_ddy->data(); const DDim& dim = out_tensor_d_x->dims(); @@ -747,7 +747,7 @@ struct DotTripleGradFunctionmutable_data(); + auto* data_d_y = out_tensor_d_y->mutable_data(ctx.GetPlace()); const auto* data_ddx = in_tensor_ddx->data(); const DDim& dim = out_tensor_d_y->dims(); @@ -762,7 +762,7 @@ struct DotTripleGradFunctionmutable_data(); + auto* data_d_dout = out_tensor_d_dout->mutable_data(ctx.GetPlace()); auto* data_ddx = in_tensor_ddx->data(); auto* data_ddy = in_tensor_ddy->data(); auto* data_d_dx = in_tensor_d_dx->data(); @@ -790,7 +790,7 @@ struct DotTripleGradFunctionmutable_data(); + auto* data_d_ddx = out_tensor_d_ddx->mutable_data(ctx.GetPlace()); auto* data_dout = in_tensor_dout->data(); auto* data_d_dy = in_tensor_d_dy->data(); auto* data_y = in_tensor_y->data(); @@ -809,7 +809,7 @@ struct DotTripleGradFunctionmutable_data(); + auto* data_d_ddy = out_tensor_d_ddy->mutable_data(ctx.GetPlace()); auto* data_dout = in_tensor_dout->data(); auto* data_d_dx = in_tensor_d_dx->data(); auto* data_x = in_tensor_x->data(); @@ -838,10 +838,10 @@ void DotGradKernel(const Context& dev_ctx, DenseTensor* dx, DenseTensor* dy) { if (dx) { - dx->mutable_data(); + dx->mutable_data(dev_ctx.GetPlace()); } if (dy) { - dy->mutable_data(); + dy->mutable_data(dev_ctx.GetPlace()); } DotGradFunction()(dev_ctx, &x, &y, &dout, dx, dy); } @@ -857,13 +857,13 @@ void DotDoubleGradKernel(const Context& dev_ctx, DenseTensor* dy, DenseTensor* ddout) { if (dx) { - dx->mutable_data(); + dx->mutable_data(dev_ctx.GetPlace()); } if (dy) { - dy->mutable_data(); + dy->mutable_data(dev_ctx.GetPlace()); } if (ddout) { - ddout->mutable_data(); + ddout->mutable_data(dev_ctx.GetPlace()); } DotDoubleGradFunction()( dev_ctx, &x, &y, &dout, ddx, ddy, dx, dy, ddout); @@ -885,19 +885,19 @@ void DotTripleGradKernel(const Context& dev_ctx, DenseTensor* d_ddy, DenseTensor* d_dout) { if (d_x) { - d_x->mutable_data(); + d_x->mutable_data(dev_ctx.GetPlace()); } if (d_y) { - d_y->mutable_data(); + d_y->mutable_data(dev_ctx.GetPlace()); } if (d_ddx) { - d_ddx->mutable_data(); + d_ddx->mutable_data(dev_ctx.GetPlace()); } if (d_ddy) { - d_ddy->mutable_data(); + d_ddy->mutable_data(dev_ctx.GetPlace()); } if (d_dout) { - d_dout->mutable_data(); + d_dout->mutable_data(dev_ctx.GetPlace()); } DotTripleGradFunction()(dev_ctx, diff --git a/paddle/pten/kernels/impl/full_kernel_impl.h b/paddle/pten/kernels/impl/full_kernel_impl.h index 2900e2e83bd..4fee23e175c 100644 --- a/paddle/pten/kernels/impl/full_kernel_impl.h +++ b/paddle/pten/kernels/impl/full_kernel_impl.h @@ -26,7 +26,7 @@ namespace pten { template void FullValue(const Context& dev_ctx, DenseTensor* tensor, VType val) { - tensor->mutable_data(); + tensor->mutable_data(dev_ctx.GetPlace()); auto t = pten::EigenVector::Flatten(*tensor); t.device(*dev_ctx.eigen_device()) = t.constant(static_cast(val)); } diff --git a/paddle/pten/kernels/impl/matmul_grad_kernel_impl.h b/paddle/pten/kernels/impl/matmul_grad_kernel_impl.h index 71fadfae7de..fbcb073150c 100644 --- a/paddle/pten/kernels/impl/matmul_grad_kernel_impl.h +++ b/paddle/pten/kernels/impl/matmul_grad_kernel_impl.h @@ -105,7 +105,7 @@ void MatMul(const Context& dev_ctx, bool trans_b, DenseTensor* out, bool flag = false) { - out->mutable_data(); + out->mutable_data(dev_ctx.GetPlace()); auto blas = paddle::operators::math::GetBlas(dev_ctx); auto mat_dim_a = paddle::operators::math::CreateMatrixDescriptor(a.dims(), 0, trans_a); @@ -123,7 +123,7 @@ void MatMul(const Context& dev_ctx, b.data(), mat_dim_b, static_cast(1), - out->mutable_data(), + out->data(), static_cast(flag)); } @@ -242,8 +242,8 @@ void MatmulGradKernel(const Context& dev_ctx, // Case1 : x's or y's dim = 1 if (x_ndim == 1 && y_ndim == 1) { - if (dx) dx->mutable_data(); - if (dy) dy->mutable_data(); + if (dx) dx->mutable_data(dev_ctx.GetPlace()); + if (dy) dy->mutable_data(dev_ctx.GetPlace()); if (out_grad.numel() == 1) { DotGradFunction()(dev_ctx, &x, &y, &out_grad, dx, dy); return; diff --git a/paddle/pten/kernels/impl/matmul_kernel_impl.h b/paddle/pten/kernels/impl/matmul_kernel_impl.h index afe6bf71e2f..e59a54c703a 100644 --- a/paddle/pten/kernels/impl/matmul_kernel_impl.h +++ b/paddle/pten/kernels/impl/matmul_kernel_impl.h @@ -118,7 +118,7 @@ void MatMulFunction(const Context& dev_ctx, N)); VLOG(3) << "MatMul's case 1"; Out->Resize({1}); - Out->mutable_data(); + Out->mutable_data(dev_ctx.GetPlace()); blas.GEMM(CblasNoTrans, CblasTrans, 1, @@ -128,7 +128,7 @@ void MatMulFunction(const Context& dev_ctx, y_data, x_data, static_cast(flag), - Out->mutable_data()); + Out->data()); return; } @@ -165,7 +165,7 @@ void MatMulFunction(const Context& dev_ctx, out_dims.back() = y_dims.back(); } Out->ResizeAndAllocate(pten::framework::make_ddim(out_dims)); - Out->mutable_data(); + Out->mutable_data(dev_ctx.GetPlace()); if (trans_y) { const int M = Y.numel() / N; VLOG(3) << "MatMul's case 2"; @@ -176,7 +176,7 @@ void MatMulFunction(const Context& dev_ctx, y_data, x_data, static_cast(flag), - Out->mutable_data()); + Out->data()); } else { const int M = y_dims[y_ndim - 1]; const int batch_size = Y.numel() / (M * N); @@ -189,7 +189,7 @@ void MatMulFunction(const Context& dev_ctx, y_data, x_data, static_cast(flag), - Out->mutable_data()); + Out->data()); } else { VLOG(3) << "MatMul's case 4"; blas.BatchedGEMM(CblasTrans, @@ -201,7 +201,7 @@ void MatMulFunction(const Context& dev_ctx, y_data, x_data, static_cast(flag), - Out->mutable_data(), + Out->data(), batch_size, M * N, 0); @@ -243,7 +243,7 @@ void MatMulFunction(const Context& dev_ctx, std::copy_n(x_dims.cbegin(), x_ndim - 1, out_dims.begin()); } Out->ResizeAndAllocate(pten::framework::make_ddim(out_dims)); - Out->mutable_data(); + Out->mutable_data(dev_ctx.GetPlace()); if (trans_x) { const int M = x_dims[x_ndim - 1]; @@ -257,7 +257,7 @@ void MatMulFunction(const Context& dev_ctx, x_data, y_data, static_cast(flag), - Out->mutable_data()); + Out->data()); } else { VLOG(3) << "MatMul's case 6"; blas.BatchedGEMM(CblasTrans, @@ -269,7 +269,7 @@ void MatMulFunction(const Context& dev_ctx, x_data, y_data, static_cast(flag), - Out->mutable_data(), + Out->data(), batch_size, M * N, 0); @@ -284,7 +284,7 @@ void MatMulFunction(const Context& dev_ctx, x_data, y_data, static_cast(flag), - Out->mutable_data()); + Out->data()); } return; } @@ -331,7 +331,7 @@ void MatMulFunction(const Context& dev_ctx, out_broadcast_dims[ndim - 1] = N; Out->ResizeAndAllocate(pten::framework::make_ddim(out_broadcast_dims)); - Out->mutable_data(); + Out->mutable_data(dev_ctx.GetPlace()); const int batch_dim = ndim - 2; // broadcast message @@ -367,7 +367,7 @@ void MatMulFunction(const Context& dev_ctx, x_data, y_data, static_cast(flag), - Out->mutable_data()); + Out->data()); } else if (x_batch_size == 1) { if (M == 1 && trans_y) { VLOG(3) << "MatMul's case 9"; @@ -378,7 +378,7 @@ void MatMulFunction(const Context& dev_ctx, y_data, x_data, static_cast(flag), - Out->mutable_data()); + Out->data()); } else { VLOG(3) << "MatMul's case 10"; blas.BatchedGEMM(trans_x ? CblasTrans : CblasNoTrans, @@ -390,7 +390,7 @@ void MatMulFunction(const Context& dev_ctx, x_data, y_data, static_cast(flag), - Out->mutable_data(), + Out->data(), out_batch_size, 0, K * N); @@ -407,7 +407,7 @@ void MatMulFunction(const Context& dev_ctx, x_data, y_data, static_cast(flag), - Out->mutable_data()); + Out->data()); } else { VLOG(3) << "MatMul's case 12"; blas.BatchedGEMM(CblasTrans, @@ -419,7 +419,7 @@ void MatMulFunction(const Context& dev_ctx, x_data, y_data, static_cast(flag), - Out->mutable_data(), + Out->data(), out_batch_size, M * K, 0); @@ -435,7 +435,7 @@ void MatMulFunction(const Context& dev_ctx, x_data, y_data, static_cast(flag), - Out->mutable_data(), + Out->data(), out_batch_size, M * K, K * N); @@ -454,7 +454,7 @@ void MatMulFunction(const Context& dev_ctx, x_ptr[i] = x_data + x_index * M * K; y_ptr[i] = y_data + y_index * K * N; - out_ptr[i] = Out->mutable_data() + i * M * N; + out_ptr[i] = Out->data() + i * M * N; IndexIncreaseFromDims(batch_dim, out_broadcast_dims.data(), index.data()); } VLOG(3) << "MatMul's case 14"; diff --git a/paddle/pten/kernels/impl/sign_kernel_impl.h b/paddle/pten/kernels/impl/sign_kernel_impl.h index 655cda762ee..54c1464c9e0 100644 --- a/paddle/pten/kernels/impl/sign_kernel_impl.h +++ b/paddle/pten/kernels/impl/sign_kernel_impl.h @@ -26,7 +26,7 @@ template void SignKernel(const Context& dev_ctx, const DenseTensor& x, DenseTensor* out) { - out->mutable_data(); + out->mutable_data(dev_ctx.GetPlace()); auto eigen_out = pten::EigenVector::Flatten(*out); auto eigen_x = pten::EigenVector::Flatten(x); diff --git a/paddle/pten/kernels/reshape_kernel.cc b/paddle/pten/kernels/reshape_kernel.cc index 7f58bbbd373..9bfad22374c 100644 --- a/paddle/pten/kernels/reshape_kernel.cc +++ b/paddle/pten/kernels/reshape_kernel.cc @@ -27,12 +27,15 @@ void ReshapeKernel(const Context& dev_ctx, const ScalarArray& shape, DenseTensor* out) { auto out_meta = InferMetaFromVecValue(x.meta(), shape.GetData()); - if (x.data() == out->data() && x.numel() == out->numel()) { + if (x.initialized() && x.Holder() == out->Holder()) { out->ResizeAndAllocate(out_meta.dims); return; } + + out->Resize(x.dims()); + out->mutable_data(x.place()); pten::Copy(dev_ctx, x, false, out); - out->ResizeAndAllocate(out_meta.dims); + out->Resize(out_meta.dims); out->ResetLoD(x.lod()); } diff --git a/paddle/pten/kernels/xpu/copy_kernel.cc b/paddle/pten/kernels/xpu/copy_kernel.cc index 3287fa1f7a8..56b79061f75 100644 --- a/paddle/pten/kernels/xpu/copy_kernel.cc +++ b/paddle/pten/kernels/xpu/copy_kernel.cc @@ -30,7 +30,7 @@ void Copy(const Context& dev_ctx, bool blocking, DenseTensor* dst) { auto* src_ptr = src.data(); - auto* dst_ptr = dst->mutable_data(); + auto* dst_ptr = dst->mutable_data(dev_ctx.GetPlace()); const auto& src_place = src.place(); const auto& dst_place = dst->place(); diff --git a/paddle/pten/tests/api/test_cast_api.cc b/paddle/pten/tests/api/test_cast_api.cc index 0a3b56e3f18..b87bebacab7 100644 --- a/paddle/pten/tests/api/test_cast_api.cc +++ b/paddle/pten/tests/api/test_cast_api.cc @@ -37,7 +37,8 @@ TEST(API, cast) { pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 4}), pten::DataLayout::NCHW)); - auto* dense_x_data = dense_x->mutable_data(); + auto* dense_x_data = + dense_x->mutable_data(paddle::platform::CPUPlace()); for (int i = 0; i < dense_x->numel(); i++) { dense_x_data[i] = i; diff --git a/paddle/pten/tests/api/test_conj_api.cc b/paddle/pten/tests/api/test_conj_api.cc index c17b0f23f4f..0273737347e 100644 --- a/paddle/pten/tests/api/test_conj_api.cc +++ b/paddle/pten/tests/api/test_conj_api.cc @@ -37,7 +37,8 @@ TEST(API, conj) { pten::DenseTensorMeta(pten::DataType::COMPLEX64, framework::make_ddim({3, 10}), pten::DataLayout::NCHW)); - auto* dense_x_data = dense_x->mutable_data(); + auto* dense_x_data = + dense_x->mutable_data(paddle::platform::CPUPlace()); for (size_t i = 0; i < 3; ++i) { for (size_t j = 0; j < 10; ++j) { diff --git a/paddle/pten/tests/api/test_dot_api.cc b/paddle/pten/tests/api/test_dot_api.cc index 97616d0cbcd..6de8943a467 100644 --- a/paddle/pten/tests/api/test_dot_api.cc +++ b/paddle/pten/tests/api/test_dot_api.cc @@ -37,14 +37,16 @@ TEST(API, dot) { pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 10}), pten::DataLayout::NCHW)); - auto* dense_x_data = dense_x->mutable_data(); + auto* dense_x_data = + dense_x->mutable_data(paddle::platform::CPUPlace()); auto dense_y = std::make_shared( alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 10}), pten::DataLayout::NCHW)); - auto* dense_y_data = dense_y->mutable_data(); + auto* dense_y_data = + dense_y->mutable_data(paddle::platform::CPUPlace()); float sum[3] = {0.0, 0.0, 0.0}; for (size_t i = 0; i < 3; ++i) { diff --git a/paddle/pten/tests/api/test_elementwise_api.cc b/paddle/pten/tests/api/test_elementwise_api.cc index 17a6ffde9df..df1c6278d96 100644 --- a/paddle/pten/tests/api/test_elementwise_api.cc +++ b/paddle/pten/tests/api/test_elementwise_api.cc @@ -37,14 +37,16 @@ TEST(API, add) { pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 10}), pten::DataLayout::NCHW)); - auto* dense_x_data = dense_x->mutable_data(); + auto* dense_x_data = + dense_x->mutable_data(paddle::platform::CPUPlace()); auto dense_y = std::make_shared( alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({10}), pten::DataLayout::NCHW)); - auto* dense_y_data = dense_y->mutable_data(); + auto* dense_y_data = + dense_y->mutable_data(paddle::platform::CPUPlace()); float sum[3][10] = {0.0}; for (size_t i = 0; i < 3; ++i) { @@ -91,14 +93,16 @@ TEST(API, subtract) { pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 10}), pten::DataLayout::NCHW)); - auto* dense_x_data = dense_x->mutable_data(); + auto* dense_x_data = + dense_x->mutable_data(paddle::platform::CPUPlace()); auto dense_y = std::make_shared( alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({10}), pten::DataLayout::NCHW)); - auto* dense_y_data = dense_y->mutable_data(); + auto* dense_y_data = + dense_y->mutable_data(paddle::platform::CPUPlace()); float sub[3][10] = {0.0}; for (size_t i = 0; i < 3; ++i) { @@ -145,14 +149,16 @@ TEST(API, divide) { pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 10}), pten::DataLayout::NCHW)); - auto* dense_x_data = dense_x->mutable_data(); + auto* dense_x_data = + dense_x->mutable_data(paddle::platform::CPUPlace()); auto dense_y = std::make_shared( alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({10}), pten::DataLayout::NCHW)); - auto* dense_y_data = dense_y->mutable_data(); + auto* dense_y_data = + dense_y->mutable_data(paddle::platform::CPUPlace()); float div[3][10] = {0.0}; for (size_t i = 0; i < 3; ++i) { @@ -199,14 +205,16 @@ TEST(API, multiply) { pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 10}), pten::DataLayout::NCHW)); - auto* dense_x_data = dense_x->mutable_data(); + auto* dense_x_data = + dense_x->mutable_data(paddle::platform::CPUPlace()); auto dense_y = std::make_shared( alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({10}), pten::DataLayout::NCHW)); - auto* dense_y_data = dense_y->mutable_data(); + auto* dense_y_data = + dense_y->mutable_data(paddle::platform::CPUPlace()); float mul[3][10] = {0.0}; for (size_t i = 0; i < 3; ++i) { diff --git a/paddle/pten/tests/api/test_empty_api.cc b/paddle/pten/tests/api/test_empty_api.cc index f38e91b02b7..72f9100f7b3 100644 --- a/paddle/pten/tests/api/test_empty_api.cc +++ b/paddle/pten/tests/api/test_empty_api.cc @@ -47,10 +47,8 @@ TEST(API, empty_like) { ASSERT_EQ(out.dims().size(), 2); ASSERT_EQ(out.dims()[0], 3); ASSERT_EQ(out.numel(), 6); - ASSERT_EQ(out.is_cpu(), true); ASSERT_EQ(out.type(), pten::DataType::FLOAT32); ASSERT_EQ(out.layout(), pten::DataLayout::NCHW); - ASSERT_EQ(out.initialized(), true); } TEST(API, empty1) { @@ -63,7 +61,8 @@ TEST(API, empty1) { pten::DenseTensorMeta(pten::DataType::INT64, framework::make_ddim({2}), pten::DataLayout::NCHW)); - auto* shape_data = dense_shape->mutable_data(); + auto* shape_data = + dense_shape->mutable_data(paddle::platform::CPUPlace()); shape_data[0] = 2; shape_data[1] = 3; @@ -76,10 +75,8 @@ TEST(API, empty1) { ASSERT_EQ(out.shape().size(), 2UL); ASSERT_EQ(out.shape()[0], 2); ASSERT_EQ(out.numel(), 6); - ASSERT_EQ(out.is_cpu(), true); ASSERT_EQ(out.type(), pten::DataType::FLOAT32); ASSERT_EQ(out.layout(), pten::DataLayout::NCHW); - ASSERT_EQ(out.initialized(), true); } TEST(API, empty2) { @@ -91,7 +88,7 @@ TEST(API, empty2) { pten::DenseTensorMeta(pten::DataType::INT32, framework::make_ddim({1}), pten::DataLayout::NCHW)); - dense_scalar->mutable_data()[0] = 2; + dense_scalar->mutable_data(paddle::platform::CPUPlace())[0] = 2; paddle::experimental::Tensor shape_scalar1(dense_scalar); paddle::experimental::Tensor shape_scalar2(dense_scalar); @@ -103,10 +100,8 @@ TEST(API, empty2) { ASSERT_EQ(out.shape().size(), 2UL); ASSERT_EQ(out.shape()[0], 2); ASSERT_EQ(out.numel(), 4); - ASSERT_EQ(out.is_cpu(), true); ASSERT_EQ(out.type(), pten::DataType::FLOAT32); ASSERT_EQ(out.layout(), pten::DataLayout::NCHW); - ASSERT_EQ(out.initialized(), true); } TEST(API, empty3) { @@ -117,10 +112,8 @@ TEST(API, empty3) { ASSERT_EQ(out.shape().size(), 2UL); ASSERT_EQ(out.shape()[0], 2); ASSERT_EQ(out.numel(), 6); - ASSERT_EQ(out.is_cpu(), true); ASSERT_EQ(out.type(), pten::DataType::INT32); ASSERT_EQ(out.layout(), pten::DataLayout::NCHW); - ASSERT_EQ(out.initialized(), true); } } // namespace tests diff --git a/paddle/pten/tests/api/test_fill_api.cc b/paddle/pten/tests/api/test_fill_api.cc index 7910cc840f5..4b78d142aef 100644 --- a/paddle/pten/tests/api/test_fill_api.cc +++ b/paddle/pten/tests/api/test_fill_api.cc @@ -37,7 +37,8 @@ TEST(API, full_like) { pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 2}), pten::DataLayout::NCHW)); - auto* dense_x_data = dense_x->mutable_data(); + auto* dense_x_data = + dense_x->mutable_data(paddle::platform::CPUPlace()); dense_x_data[0] = 0; float val = 1.0; @@ -72,7 +73,8 @@ TEST(API, zeros_like) { pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 2}), pten::DataLayout::NCHW)); - auto* dense_x_data = dense_x->mutable_data(); + auto* dense_x_data = + dense_x->mutable_data(paddle::platform::CPUPlace()); dense_x_data[0] = 1; paddle::experimental::Tensor x(dense_x); @@ -105,7 +107,8 @@ TEST(API, ones_like) { pten::DenseTensorMeta(pten::DataType::INT32, framework::make_ddim({3, 2}), pten::DataLayout::NCHW)); - auto* dense_x_data = dense_x->mutable_data(); + auto* dense_x_data = + dense_x->mutable_data(paddle::platform::CPUPlace()); dense_x_data[0] = 0; paddle::experimental::Tensor x(dense_x); @@ -139,7 +142,8 @@ TEST(API, full1) { pten::DenseTensorMeta(pten::DataType::INT64, framework::make_ddim({2}), pten::DataLayout::NCHW)); - auto* shape_data = dense_shape->mutable_data(); + auto* shape_data = + dense_shape->mutable_data(paddle::platform::CPUPlace()); shape_data[0] = 2; shape_data[1] = 3; @@ -148,7 +152,7 @@ TEST(API, full1) { pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({1}), pten::DataLayout::NCHW)); - dense_scalar->mutable_data()[0] = 1.0; + dense_scalar->mutable_data(paddle::platform::CPUPlace())[0] = 1.0; paddle::experimental::Tensor value(dense_scalar); @@ -185,7 +189,7 @@ TEST(API, full2) { pten::DenseTensorMeta(pten::DataType::INT32, framework::make_ddim({1}), pten::DataLayout::NCHW)); - dense_scalar->mutable_data()[0] = 2; + dense_scalar->mutable_data(paddle::platform::CPUPlace())[0] = 2; paddle::experimental::Tensor shape_scalar1(dense_scalar); paddle::experimental::Tensor shape_scalar2(dense_scalar); diff --git a/paddle/pten/tests/api/test_flatten_api.cc b/paddle/pten/tests/api/test_flatten_api.cc index cf8fa9cb189..f3b80f7db57 100644 --- a/paddle/pten/tests/api/test_flatten_api.cc +++ b/paddle/pten/tests/api/test_flatten_api.cc @@ -37,7 +37,8 @@ TEST(API, flatten) { pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 2, 2, 3}), pten::DataLayout::NCHW)); - auto* dense_x_data = dense_x->mutable_data(); + auto* dense_x_data = + dense_x->mutable_data(paddle::platform::CPUPlace()); for (int i = 0; i < dense_x->numel(); i++) { dense_x_data[i] = i; diff --git a/paddle/pten/tests/api/test_matmul_api.cc b/paddle/pten/tests/api/test_matmul_api.cc index 08e0e888b99..7342916c514 100644 --- a/paddle/pten/tests/api/test_matmul_api.cc +++ b/paddle/pten/tests/api/test_matmul_api.cc @@ -38,14 +38,16 @@ TEST(API, matmul_cpu) { framework::make_ddim({3, 3}), pten::DataLayout::NCHW)); - auto* dense_x_data = dense_x->mutable_data(); + auto* dense_x_data = + dense_x->mutable_data(paddle::platform::CPUPlace()); auto dense_y = std::make_shared( alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 3}), pten::DataLayout::NCHW)); - auto* dense_y_data = dense_y->mutable_data(); + auto* dense_y_data = + dense_y->mutable_data(paddle::platform::CPUPlace()); for (size_t i = 0; i < 9; ++i) { dense_x_data[i] = 1.0; @@ -87,14 +89,14 @@ TEST(API, matmul_cuda) { framework::make_ddim({3, 3}), pten::DataLayout::NCHW)); - auto* ref_x_data = ref_x->mutable_data(); + auto* ref_x_data = ref_x->mutable_data(paddle::platform::CPUPlace()); auto ref_y = std::make_shared( alloc_cpu.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 3}), pten::DataLayout::NCHW)); - auto* ref_y_data = ref_y->mutable_data(); + auto* ref_y_data = ref_y->mutable_data(paddle::platform::CPUPlace()); for (size_t i = 0; i < 9; ++i) { ref_x_data[i] = 1.0; diff --git a/paddle/pten/tests/api/test_mean_api.cc b/paddle/pten/tests/api/test_mean_api.cc index a7b85cff12c..046db05ca2b 100644 --- a/paddle/pten/tests/api/test_mean_api.cc +++ b/paddle/pten/tests/api/test_mean_api.cc @@ -37,7 +37,8 @@ TEST(API, mean) { pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 4}), pten::DataLayout::NCHW)); - auto* dense_x_data = dense_x->mutable_data(); + auto* dense_x_data = + dense_x->mutable_data(paddle::platform::CPUPlace()); float sum = 0.0; for (size_t i = 0; i < 12; ++i) { diff --git a/paddle/pten/tests/api/test_pten_tensor.cc b/paddle/pten/tests/api/test_pten_tensor.cc index a28f7ca2ca2..e6e2730a94c 100644 --- a/paddle/pten/tests/api/test_pten_tensor.cc +++ b/paddle/pten/tests/api/test_pten_tensor.cc @@ -58,11 +58,11 @@ void TestAPIPlace() { std::vector tensor_shape = {5, 5}; #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) auto t1 = experimental::Tensor(paddle::PlaceType::kGPU, tensor_shape); - t1.mutable_data(); + t1.mutable_data(paddle::PlaceType::kGPU); CHECK((paddle::PlaceType::kGPU == t1.place())); #endif auto t2 = experimental::Tensor(paddle::PlaceType::kCPU, tensor_shape); - t2.mutable_data(); + t2.mutable_data(paddle::PlaceType::kCPU); CHECK((paddle::PlaceType::kCPU == t2.place())); } @@ -80,29 +80,30 @@ void TestAPISlice() { std::vector tensor_shape_sub2 = {1, 5, 5}; #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) auto t1 = experimental::Tensor(paddle::PlaceType::kGPU, tensor_shape_origin1); - t1.mutable_data(); + t1.mutable_data(paddle::PlaceType::kGPU); CHECK(t1.slice(0, 5).shape() == tensor_shape_origin1); CHECK(t1.slice(0, 3).shape() == tensor_shape_sub1); auto t2 = experimental::Tensor(paddle::PlaceType::kGPU, tensor_shape_origin2); - t2.mutable_data(); + t2.mutable_data(paddle::PlaceType::kGPU); CHECK(t2.slice(4, 5).shape() == tensor_shape_sub2); #endif auto t3 = experimental::Tensor(paddle::PlaceType::kCPU, tensor_shape_origin1); - t3.mutable_data(); + t3.mutable_data(paddle::PlaceType::kCPU); CHECK(t3.slice(0, 5).shape() == tensor_shape_origin1); CHECK(t3.slice(0, 3).shape() == tensor_shape_sub1); auto t4 = experimental::Tensor(paddle::PlaceType::kCPU, tensor_shape_origin2); - t4.mutable_data(); + t4.mutable_data(paddle::PlaceType::kCPU); CHECK(t4.slice(4, 5).shape() == tensor_shape_sub2); // Test writing function for sliced tensor auto t = InitCPUTensorForTest(); auto t_sliced = t.slice(0, 1); - auto* t_sliced_data_ptr = t_sliced.mutable_data(); + auto* t_sliced_data_ptr = + t_sliced.mutable_data(paddle::PlaceType::kCPU); for (int64_t i = 0; i < t_sliced.size(); i++) { t_sliced_data_ptr[i] += static_cast(5); } - auto* t_data_ptr = t.mutable_data(); + auto* t_data_ptr = t.mutable_data(paddle::PlaceType::kCPU); for (int64_t i = 0; i < t_sliced.size(); i++) { CHECK_EQ(t_data_ptr[i], static_cast(10)); } @@ -112,7 +113,7 @@ template paddle::DataType TestDtype() { std::vector tensor_shape = {5, 5}; auto t1 = experimental::Tensor(paddle::PlaceType::kCPU, tensor_shape); - t1.template mutable_data(); + t1.template mutable_data(paddle::PlaceType::kCPU); return t1.type(); } @@ -120,13 +121,13 @@ template void TestCast(paddle::DataType data_type) { std::vector tensor_shape = {5, 5}; auto t1 = experimental::Tensor(paddle::PlaceType::kCPU, tensor_shape); - t1.template mutable_data(); + t1.template mutable_data(paddle::PlaceType::kCPU); auto t2 = t1.cast(data_type); CHECK(t2.type() == data_type); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) auto tg1 = experimental::Tensor(paddle::PlaceType::kGPU); tg1.reshape(tensor_shape); - tg1.template mutable_data(); + tg1.template mutable_data(paddle::PlaceType::kGPU); auto tg2 = tg1.cast(data_type); CHECK(tg2.type() == data_type); #endif @@ -194,7 +195,7 @@ void GroupTestDtype() { void TestInitilized() { experimental::Tensor test_tensor(paddle::PlaceType::kCPU, {1, 1}); CHECK(test_tensor.is_initialized() == false); - test_tensor.mutable_data(); + test_tensor.mutable_data(paddle::PlaceType::kCPU); CHECK(test_tensor.is_initialized() == true); float* tensor_data = test_tensor.mutable_data(); for (int i = 0; i < test_tensor.size(); i++) { diff --git a/paddle/pten/tests/api/test_reshape_api.cc b/paddle/pten/tests/api/test_reshape_api.cc index bfd1ea84144..1f0d734a7ec 100644 --- a/paddle/pten/tests/api/test_reshape_api.cc +++ b/paddle/pten/tests/api/test_reshape_api.cc @@ -37,7 +37,8 @@ TEST(API, reshape) { pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 2, 2, 3}), pten::DataLayout::NCHW)); - auto* dense_x_data = dense_x->mutable_data(); + auto* dense_x_data = + dense_x->mutable_data(paddle::platform::CPUPlace()); for (int i = 0; i < dense_x->numel(); i++) { dense_x_data[i] = i; @@ -69,14 +70,15 @@ TEST(API, reshape) { TEST(Tensor, old_reshape) { paddle::experimental::Tensor x(paddle::PlaceType::kCPU); x.reshape({3, 4}); + x.mutable_data(paddle::PlaceType::kCPU); ASSERT_EQ(x.shape()[0], 3); ASSERT_EQ(x.shape()[1], 4); ASSERT_EQ(x.numel(), 12); ASSERT_EQ(x.is_cpu(), true); - ASSERT_EQ(x.type(), pten::DataType::UNDEFINED); + ASSERT_EQ(x.type(), pten::DataType::FLOAT32); ASSERT_EQ(x.layout(), pten::DataLayout::NCHW); - ASSERT_EQ(x.initialized(), false); + ASSERT_EQ(x.initialized(), true); } } // namespace tests diff --git a/paddle/pten/tests/api/test_sum_api.cc b/paddle/pten/tests/api/test_sum_api.cc index c0d5a89eeb7..385d18aa784 100644 --- a/paddle/pten/tests/api/test_sum_api.cc +++ b/paddle/pten/tests/api/test_sum_api.cc @@ -37,7 +37,8 @@ TEST(API, sum) { pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 4}), pten::DataLayout::NCHW)); - auto* dense_x_data = dense_x->mutable_data(); + auto* dense_x_data = + dense_x->mutable_data(paddle::platform::CPUPlace()); float sum = 0.0; for (size_t i = 0; i < 12; ++i) { diff --git a/paddle/pten/tests/api/test_to_api.cc b/paddle/pten/tests/api/test_to_api.cc index fa999aace66..11636e1c014 100644 --- a/paddle/pten/tests/api/test_to_api.cc +++ b/paddle/pten/tests/api/test_to_api.cc @@ -35,7 +35,8 @@ paddle::experimental::Tensor CreateInputTensor() { pten::DenseTensorMeta(pten::DataType::INT64, framework::make_ddim({3, 4}), pten::DataLayout::NCHW)); - auto* dense_x_data = dense_x->mutable_data(); + auto* dense_x_data = + dense_x->mutable_data(paddle::platform::CPUPlace()); for (int64_t i = 0; i < 12; ++i) { dense_x_data[i] = i; diff --git a/paddle/pten/tests/core/test_dense_tensor.cc b/paddle/pten/tests/core/test_dense_tensor.cc index 56722d35f32..ff337aa5e8d 100644 --- a/paddle/pten/tests/core/test_dense_tensor.cc +++ b/paddle/pten/tests/core/test_dense_tensor.cc @@ -112,8 +112,6 @@ TEST(dense_tensor, resize) { CHECK_EQ(tensor_0.capacity(), 2u); tensor_0.ResizeAndAllocate({1, 2, 3}); CHECK_EQ(tensor_0.capacity(), 6u); - tensor_0.mutable_data(); - CHECK_EQ(tensor_0.capacity(), 6u); } TEST(dense_tensor, shallow_copy) { diff --git a/paddle/pten/tests/kernels/test_cast_dev_api.cc b/paddle/pten/tests/kernels/test_cast_dev_api.cc index 3b1412a8e5f..c9d376b81a6 100644 --- a/paddle/pten/tests/kernels/test_cast_dev_api.cc +++ b/paddle/pten/tests/kernels/test_cast_dev_api.cc @@ -38,7 +38,8 @@ TEST(DEV_API, cast) { pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 4}), pten::DataLayout::NCHW)); - auto* dense_x_data = dense_x.mutable_data(); + auto* dense_x_data = + dense_x.mutable_data(paddle::platform::CPUPlace()); float sum = 0.0; for (size_t i = 0; i < 12; ++i) { diff --git a/paddle/pten/tests/kernels/test_conj_dev_api.cc b/paddle/pten/tests/kernels/test_conj_dev_api.cc index 51066d8ae47..6714b57105b 100644 --- a/paddle/pten/tests/kernels/test_conj_dev_api.cc +++ b/paddle/pten/tests/kernels/test_conj_dev_api.cc @@ -37,7 +37,8 @@ TEST(DEV_API, conj) { framework::make_ddim({3, 4}), pten::DataLayout::NCHW)); - auto* dense_x_data = dense_x.mutable_data(); + auto* dense_x_data = + dense_x.mutable_data(paddle::platform::CPUPlace()); for (size_t i = 0; i < 12; ++i) { dense_x_data[i] = paddle::complex64(i * 1.0, i * 1.0); } diff --git a/paddle/pten/tests/kernels/test_copy_dev_api.cc b/paddle/pten/tests/kernels/test_copy_dev_api.cc index 4f8bd727716..01dfa925d6c 100644 --- a/paddle/pten/tests/kernels/test_copy_dev_api.cc +++ b/paddle/pten/tests/kernels/test_copy_dev_api.cc @@ -39,7 +39,8 @@ TEST(DEV_API, copy) { pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({2, 3}), pten::DataLayout::NCHW)); - auto* dense_x_data = dense_src->mutable_data(); + auto* dense_x_data = + dense_src->mutable_data(paddle::platform::CPUPlace()); auto dense_dst = std::make_shared( alloc.get(), diff --git a/paddle/pten/tests/kernels/test_creation_dev_api.cc b/paddle/pten/tests/kernels/test_creation_dev_api.cc index 1aa21b847fa..17416d33473 100644 --- a/paddle/pten/tests/kernels/test_creation_dev_api.cc +++ b/paddle/pten/tests/kernels/test_creation_dev_api.cc @@ -52,7 +52,8 @@ TEST(DEV_API, empty_like) { pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 2}), pten::DataLayout::NCHW)); - auto* dense_x_data = dense_x.mutable_data(); + auto* dense_x_data = + dense_x.mutable_data(paddle::platform::CPUPlace()); dense_x_data[0] = 0; // 2. test API @@ -96,7 +97,8 @@ TEST(DEV_API, full_like) { pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 2}), pten::DataLayout::NCHW)); - auto* dense_x_data = dense_x.mutable_data(); + auto* dense_x_data = + dense_x.mutable_data(paddle::platform::CPUPlace()); dense_x_data[0] = 0; float val = 1.0; diff --git a/paddle/pten/tests/kernels/test_dot_dev_api.cc b/paddle/pten/tests/kernels/test_dot_dev_api.cc index e4978d84c83..27fecd3fcd9 100644 --- a/paddle/pten/tests/kernels/test_dot_dev_api.cc +++ b/paddle/pten/tests/kernels/test_dot_dev_api.cc @@ -36,13 +36,15 @@ TEST(DEV_API, dot) { pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 10}), pten::DataLayout::NCHW)); - auto* dense_x_data = dense_x.mutable_data(); + auto* dense_x_data = + dense_x.mutable_data(paddle::platform::CPUPlace()); pten::DenseTensor dense_y(alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 10}), pten::DataLayout::NCHW)); - auto* dense_y_data = dense_y.mutable_data(); + auto* dense_y_data = + dense_y.mutable_data(paddle::platform::CPUPlace()); float sum[3] = {0.0, 0.0, 0.0}; for (size_t i = 0; i < 3; ++i) { diff --git a/paddle/pten/tests/kernels/test_elementwise_dev_api.cc b/paddle/pten/tests/kernels/test_elementwise_dev_api.cc index e5d9b05eec7..b3948843ee8 100644 --- a/paddle/pten/tests/kernels/test_elementwise_dev_api.cc +++ b/paddle/pten/tests/kernels/test_elementwise_dev_api.cc @@ -36,13 +36,15 @@ TEST(DEV_API, add) { pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 10}), pten::DataLayout::NCHW)); - auto* dense_x_data = dense_x.mutable_data(); + auto* dense_x_data = + dense_x.mutable_data(paddle::platform::CPUPlace()); pten::DenseTensor dense_y(alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({10}), pten::DataLayout::NCHW)); - auto* dense_y_data = dense_y.mutable_data(); + auto* dense_y_data = + dense_y.mutable_data(paddle::platform::CPUPlace()); float sum[3][10] = {0.0}; for (size_t i = 0; i < 3; ++i) { @@ -82,13 +84,15 @@ TEST(DEV_API, subtract) { pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 10}), pten::DataLayout::NCHW)); - auto* dense_x_data = dense_x.mutable_data(); + auto* dense_x_data = + dense_x.mutable_data(paddle::platform::CPUPlace()); pten::DenseTensor dense_y(alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({10}), pten::DataLayout::NCHW)); - auto* dense_y_data = dense_y.mutable_data(); + auto* dense_y_data = + dense_y.mutable_data(paddle::platform::CPUPlace()); float sub[3][10] = {0.0}; for (size_t i = 0; i < 3; ++i) { @@ -128,13 +132,15 @@ TEST(DEV_API, divide) { pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 10}), pten::DataLayout::NCHW)); - auto* dense_x_data = dense_x.mutable_data(); + auto* dense_x_data = + dense_x.mutable_data(paddle::platform::CPUPlace()); pten::DenseTensor dense_y(alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({10}), pten::DataLayout::NCHW)); - auto* dense_y_data = dense_y.mutable_data(); + auto* dense_y_data = + dense_y.mutable_data(paddle::platform::CPUPlace()); float div[3][10] = {0.0}; for (size_t i = 0; i < 3; ++i) { @@ -174,13 +180,15 @@ TEST(DEV_API, multiply) { pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 10}), pten::DataLayout::NCHW)); - auto* dense_x_data = dense_x.mutable_data(); + auto* dense_x_data = + dense_x.mutable_data(paddle::platform::CPUPlace()); pten::DenseTensor dense_y(alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({10}), pten::DataLayout::NCHW)); - auto* dense_y_data = dense_y.mutable_data(); + auto* dense_y_data = + dense_y.mutable_data(paddle::platform::CPUPlace()); float mul[3][10] = {0.0}; for (size_t i = 0; i < 3; ++i) { diff --git a/paddle/pten/tests/kernels/test_flatten_dev_api.cc b/paddle/pten/tests/kernels/test_flatten_dev_api.cc index 78cd6261c3a..fc463d1ff1e 100644 --- a/paddle/pten/tests/kernels/test_flatten_dev_api.cc +++ b/paddle/pten/tests/kernels/test_flatten_dev_api.cc @@ -47,7 +47,8 @@ TEST(DEV_API, flatten) { pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 2, 2, 3}), pten::DataLayout::NCHW)); - auto* dense_x_data = dense_x.mutable_data(); + auto* dense_x_data = + dense_x.mutable_data(paddle::platform::CPUPlace()); for (int i = 0; i < dense_x.numel(); i++) { dense_x_data[i] = i; diff --git a/paddle/pten/tests/kernels/test_matmul_dev_api.cc b/paddle/pten/tests/kernels/test_matmul_dev_api.cc index 76f77503192..40419ecb3ad 100644 --- a/paddle/pten/tests/kernels/test_matmul_dev_api.cc +++ b/paddle/pten/tests/kernels/test_matmul_dev_api.cc @@ -36,13 +36,15 @@ TEST(DEV_API, dot) { framework::make_ddim({3, 3}), pten::DataLayout::NCHW)); - auto* dense_x_data = dense_x.mutable_data(); + auto* dense_x_data = + dense_x.mutable_data(paddle::platform::CPUPlace()); DenseTensor dense_y(alloc.get(), pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 3}), pten::DataLayout::NCHW)); - auto* dense_y_data = dense_y.mutable_data(); + auto* dense_y_data = + dense_y.mutable_data(paddle::platform::CPUPlace()); for (size_t i = 0; i < 9; ++i) { dense_x_data[i] = 1.0; diff --git a/paddle/pten/tests/kernels/test_mean_dev_api.cc b/paddle/pten/tests/kernels/test_mean_dev_api.cc index 07ec30afad5..786492d3a1b 100644 --- a/paddle/pten/tests/kernels/test_mean_dev_api.cc +++ b/paddle/pten/tests/kernels/test_mean_dev_api.cc @@ -35,7 +35,8 @@ TEST(DEV_API, mean) { pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 4}), pten::DataLayout::NCHW)); - auto* dense_x_data = dense_x.mutable_data(); + auto* dense_x_data = + dense_x.mutable_data(paddle::platform::CPUPlace()); float sum = 0.0; for (size_t i = 0; i < 12; ++i) { diff --git a/paddle/pten/tests/kernels/test_reshape_dev_api.cc b/paddle/pten/tests/kernels/test_reshape_dev_api.cc index dc90043305c..ac2bb60cf9f 100644 --- a/paddle/pten/tests/kernels/test_reshape_dev_api.cc +++ b/paddle/pten/tests/kernels/test_reshape_dev_api.cc @@ -37,7 +37,8 @@ TEST(DEV_API, reshape) { pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 2, 2, 3}), pten::DataLayout::NCHW)); - auto* dense_x_data = dense_x.mutable_data(); + auto* dense_x_data = + dense_x.mutable_data(paddle::platform::CPUPlace()); for (int i = 0; i < dense_x.numel(); i++) { dense_x_data[i] = i; diff --git a/paddle/pten/tests/kernels/test_scale_dev_api.cc b/paddle/pten/tests/kernels/test_scale_dev_api.cc index 106835a204c..abb592cde3f 100644 --- a/paddle/pten/tests/kernels/test_scale_dev_api.cc +++ b/paddle/pten/tests/kernels/test_scale_dev_api.cc @@ -36,7 +36,8 @@ TEST(DEV_API, scale) { framework::make_ddim({3, 4}), pten::DataLayout::NCHW)); - auto* dense_x_data = dense_x.mutable_data(); + auto* dense_x_data = + dense_x.mutable_data(paddle::platform::CPUPlace()); for (size_t i = 0; i < 12; ++i) { dense_x_data[i] = i * 1.0; } @@ -68,7 +69,8 @@ TEST(DEV_API, scale_host) { pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 4}), pten::DataLayout::NCHW)); - auto* dense_x_data = dense_x.mutable_data(); + auto* dense_x_data = + dense_x.mutable_data(paddle::platform::CPUPlace()); for (size_t i = 0; i < 12; ++i) { dense_x_data[i] = i * 1.0; } @@ -77,7 +79,7 @@ TEST(DEV_API, scale_host) { pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({1}), pten::DataLayout::NCHW)); - scale.mutable_data()[0] = 2; + scale.data()[0] = 2; float bias = 1; bool bias_after_scale = true; diff --git a/paddle/pten/tests/kernels/test_sum_dev_api.cc b/paddle/pten/tests/kernels/test_sum_dev_api.cc index 41d694a025f..595f0b96920 100644 --- a/paddle/pten/tests/kernels/test_sum_dev_api.cc +++ b/paddle/pten/tests/kernels/test_sum_dev_api.cc @@ -35,7 +35,8 @@ TEST(DEV_API, sum) { pten::DenseTensorMeta(pten::DataType::FLOAT32, framework::make_ddim({3, 4}), pten::DataLayout::NCHW)); - auto* dense_x_data = dense_x.mutable_data(); + auto* dense_x_data = + dense_x.mutable_data(paddle::platform::CPUPlace()); float sum = 0.0; for (size_t i = 0; i < 12; ++i) { diff --git a/python/paddle/fluid/tests/custom_op/attr_test_op.cc b/python/paddle/fluid/tests/custom_op/attr_test_op.cc index 14cb0aa7c71..1c79d9a26ae 100644 --- a/python/paddle/fluid/tests/custom_op/attr_test_op.cc +++ b/python/paddle/fluid/tests/custom_op/attr_test_op.cc @@ -137,7 +137,9 @@ std::vector AttrTestForward( PD_DISPATCH_FLOATING_TYPES( x.type(), "assign_cpu_kernel", ([&] { assign_cpu_kernel( - x.data(), out.mutable_data(), x.size()); + x.data(), + out.mutable_data(paddle::PlaceType::kCPU), + x.size()); })); // Check attrs value @@ -175,12 +177,13 @@ std::vector AttrTestBackward( const std::vector& str_vec_attr) { auto grad_x = paddle::Tensor(paddle::PlaceType::kCPU, grad_out.shape()); - PD_DISPATCH_FLOATING_TYPES(grad_out.type(), "assign_cpu_kernel", ([&] { - assign_cpu_kernel( - grad_out.data(), - grad_x.mutable_data(), - grad_out.size()); - })); + PD_DISPATCH_FLOATING_TYPES( + grad_out.type(), "assign_cpu_kernel", ([&] { + assign_cpu_kernel( + grad_out.data(), + grad_x.mutable_data(paddle::PlaceType::kCPU), + grad_out.size()); + })); CheckAllBackwardAttrs(int_attr, float_vec_attr, str_vec_attr); @@ -203,7 +206,9 @@ std::vector ConstAttrTestForward( PD_DISPATCH_FLOATING_TYPES( x.type(), "assign_cpu_kernel", ([&] { assign_cpu_kernel( - x.data(), out.mutable_data(), x.size()); + x.data(), + out.mutable_data(paddle::PlaceType::kCPU), + x.size()); })); // Check attrs value @@ -241,12 +246,13 @@ std::vector ConstAttrTestBackward( const std::vector& str_vec_attr) { auto grad_x = paddle::Tensor(paddle::PlaceType::kCPU, grad_out.shape()); - PD_DISPATCH_FLOATING_TYPES(grad_out.type(), "assign_cpu_kernel", ([&] { - assign_cpu_kernel( - grad_out.data(), - grad_x.mutable_data(), - grad_out.size()); - })); + PD_DISPATCH_FLOATING_TYPES( + grad_out.type(), "assign_cpu_kernel", ([&] { + assign_cpu_kernel( + grad_out.data(), + grad_x.mutable_data(paddle::PlaceType::kCPU), + grad_out.size()); + })); CheckAllBackwardAttrs(int_attr, float_vec_attr, str_vec_attr); diff --git a/python/paddle/fluid/tests/custom_op/concat_and_split.h b/python/paddle/fluid/tests/custom_op/concat_and_split.h index 9f24cc43699..cbec4653a20 100644 --- a/python/paddle/fluid/tests/custom_op/concat_and_split.h +++ b/python/paddle/fluid/tests/custom_op/concat_and_split.h @@ -47,7 +47,7 @@ void ConcatCpuKernel(const std::vector& ins, int64_t out_cols = 0; auto ins_cols = GetCols(ins, out_rows, &out_cols); - auto* out_data = out->mutable_data(); + auto* out_data = out->mutable_data(paddle::PlaceType::kCPU); int64_t col_idx = 0; for (size_t i = 0; i < num; ++i) { int64_t col_len = ins_cols[i]; @@ -76,7 +76,9 @@ void SplitCpuKernel(const paddle::Tensor& in, int64_t col_idx = 0; for (size_t j = 0; j < num; ++j) { int64_t col_len = out_cols[j]; - auto* out_data = outs->at(j).mutable_data() + i * col_len; + auto* out_data = + outs->at(j).mutable_data(paddle::PlaceType::kCPU) + + i * col_len; std::memcpy(out_data, in_data + col_idx, sizeof(data_t) * col_len); col_idx += col_len; } diff --git a/python/paddle/fluid/tests/custom_op/custom_conj_op.cc b/python/paddle/fluid/tests/custom_op/custom_conj_op.cc index b9c10f479e0..ae60799d239 100644 --- a/python/paddle/fluid/tests/custom_op/custom_conj_op.cc +++ b/python/paddle/fluid/tests/custom_op/custom_conj_op.cc @@ -76,7 +76,9 @@ std::vector ConjFunction(const paddle::Tensor& x) { PD_DISPATCH_FLOATING_AND_COMPLEX_TYPES( x.type(), "ConjCPUKernel", ([&] { ConjCPUKernel( - x.data(), x.size(), out.mutable_data()); + x.data(), + x.size(), + out.mutable_data(paddle::PlaceType::kCPU)); })); return {out}; diff --git a/python/paddle/fluid/tests/custom_op/dispatch_test_op.cc b/python/paddle/fluid/tests/custom_op/dispatch_test_op.cc index 0f7d323b545..d5f161fc5b7 100644 --- a/python/paddle/fluid/tests/custom_op/dispatch_test_op.cc +++ b/python/paddle/fluid/tests/custom_op/dispatch_test_op.cc @@ -32,7 +32,9 @@ std::vector DispatchTestInterger(const paddle::Tensor& x) { PD_DISPATCH_INTEGRAL_TYPES( x.type(), "assign_cpu_kernel", ([&] { assign_cpu_kernel( - x.data(), out.mutable_data(), x.size()); + x.data(), + out.mutable_data(paddle::PlaceType::kCPU), + x.size()); })); return {out}; @@ -50,7 +52,9 @@ std::vector DispatchTestFloatAndInteger( PD_DISPATCH_FLOATING_AND_INTEGRAL_TYPES( x.type(), "assign_cpu_kernel", ([&] { assign_cpu_kernel( - x.data(), out.mutable_data(), x.size()); + x.data(), + out.mutable_data(paddle::PlaceType::kCPU), + x.size()); })); return {out}; @@ -67,7 +71,9 @@ std::vector DispatchTestComplex(const paddle::Tensor& x) { PD_DISPATCH_COMPLEX_TYPES( x.type(), "assign_cpu_kernel", ([&] { assign_cpu_kernel( - x.data(), out.mutable_data(), x.size()); + x.data(), + out.mutable_data(paddle::PlaceType::kCPU), + x.size()); })); return {out}; @@ -85,7 +91,9 @@ std::vector DispatchTestFloatAndComplex( PD_DISPATCH_FLOATING_AND_COMPLEX_TYPES( x.type(), "assign_cpu_kernel", ([&] { assign_cpu_kernel( - x.data(), out.mutable_data(), x.size()); + x.data(), + out.mutable_data(paddle::PlaceType::kCPU), + x.size()); })); return {out}; @@ -103,7 +111,9 @@ std::vector DispatchTestFloatAndIntegerAndComplex( PD_DISPATCH_FLOATING_AND_INTEGRAL_AND_COMPLEX_TYPES( x.type(), "assign_cpu_kernel", ([&] { assign_cpu_kernel( - x.data(), out.mutable_data(), x.size()); + x.data(), + out.mutable_data(paddle::PlaceType::kCPU), + x.size()); })); return {out}; @@ -120,7 +130,9 @@ std::vector DispatchTestFloatAndHalf(const paddle::Tensor& x) { PD_DISPATCH_FLOATING_AND_HALF_TYPES( x.type(), "assign_cpu_kernel", ([&] { assign_cpu_kernel( - x.data(), out.mutable_data(), x.size()); + x.data(), + out.mutable_data(paddle::PlaceType::kCPU), + x.size()); })); return {out}; -- GitLab