未验证 提交 a56e16a7 编写于 作者: 石晓伟 提交者: GitHub

[Refactoring Tensor PR #5] replace storage with pten allocation (#39085)

* updates callers, test=develop

* updates tensor, test=develop

* fixes errors, test=develop

* remove some dtypes, test=develop

* fix errors in the base storage modification, test=develop

* fixes a bug, test=develop

* fixes the bugs in push the whole, test=develop

* updates, test=develop

* update

* update, test=develop

* fixes the mac-py3 CI, test=develop

* remove the storage impl, test=develop

* updates some codes, test=develop

* update, test=develop

* updates pten allocation, test=develop
上级 95b081ef
...@@ -216,8 +216,9 @@ void TensorAdd(const egr::EagerTensor& src, egr::EagerTensor* dst) { ...@@ -216,8 +216,9 @@ void TensorAdd(const egr::EagerTensor& src, egr::EagerTensor* dst) {
#define PADDLE_TENSOR_ADD(cpp_type) \ #define PADDLE_TENSOR_ADD(cpp_type) \
if (data_type == paddle::framework::DataTypeTrait<cpp_type>::DataType()) { \ if (data_type == paddle::framework::DataTypeTrait<cpp_type>::DataType()) { \
TensorAddFunctor<cpp_type> func(numel, src_tensor->data<cpp_type>(), \ TensorAddFunctor<cpp_type> func( \
dst_tensor->mutable_data<cpp_type>()); \ numel, src_tensor->data<cpp_type>(), \
dst_tensor->mutable_data<cpp_type>(place)); \
paddle::platform::VisitPlace(place, func); \ paddle::platform::VisitPlace(place, func); \
return; \ return; \
} }
......
...@@ -36,7 +36,8 @@ TEST(AccumulationNode, EagerTensor) { ...@@ -36,7 +36,8 @@ TEST(AccumulationNode, EagerTensor) {
paddle::platform::CPUPlace()) paddle::platform::CPUPlace())
.get(), .get(),
meta); meta);
dt0->mutable_data<paddle::platform::float16>()[0] = 10.0; dt0->mutable_data<paddle::platform::float16>(
paddle::platform::CPUPlace())[0] = 10.0;
EagerTensor et0 = EagerTensor(dt0); EagerTensor et0 = EagerTensor(dt0);
std::shared_ptr<pten::DenseTensor> dt1 = std::make_shared<pten::DenseTensor>( std::shared_ptr<pten::DenseTensor> dt1 = std::make_shared<pten::DenseTensor>(
...@@ -45,7 +46,8 @@ TEST(AccumulationNode, EagerTensor) { ...@@ -45,7 +46,8 @@ TEST(AccumulationNode, EagerTensor) {
.get(), .get(),
meta); meta);
dt1->mutable_data<paddle::platform::float16>()[0] = 20.0; dt1->mutable_data<paddle::platform::float16>(
paddle::platform::CPUPlace())[0] = 20.0;
EagerTensor et1 = EagerTensor(dt1); EagerTensor et1 = EagerTensor(dt1);
std::shared_ptr<pten::DenseTensor> grad_dt = std::shared_ptr<pten::DenseTensor> grad_dt =
......
...@@ -46,7 +46,7 @@ TEST(AutogradMeta, MemberFunction) { ...@@ -46,7 +46,7 @@ TEST(AutogradMeta, MemberFunction) {
paddle::platform::CPUPlace()) paddle::platform::CPUPlace())
.get(), .get(),
meta); meta);
auto* dt_ptr = dt->mutable_data<float>(); auto* dt_ptr = dt->mutable_data<float>(paddle::platform::CPUPlace());
dt_ptr[0] = 5.0f; dt_ptr[0] = 5.0f;
dt_ptr[1] = 10.0f; dt_ptr[1] = 10.0f;
grad_t->set_impl(dt); grad_t->set_impl(dt);
......
...@@ -40,7 +40,7 @@ TEST(EagerTensor, Constructor) { ...@@ -40,7 +40,7 @@ TEST(EagerTensor, Constructor) {
paddle::platform::CPUPlace()) paddle::platform::CPUPlace())
.get(), .get(),
meta); meta);
auto* dt_ptr = dt->mutable_data<float>(); auto* dt_ptr = dt->mutable_data<float>(paddle::platform::CPUPlace());
dt_ptr[0] = 5.0f; dt_ptr[0] = 5.0f;
dt_ptr[1] = 10.0f; dt_ptr[1] = 10.0f;
egr::EagerTensor et3 = egr::EagerTensor(dt); egr::EagerTensor et3 = egr::EagerTensor(dt);
...@@ -70,7 +70,7 @@ TEST(EagerTensor, MemberFunction) { ...@@ -70,7 +70,7 @@ TEST(EagerTensor, MemberFunction) {
paddle::platform::CPUPlace()) paddle::platform::CPUPlace())
.get(), .get(),
meta); meta);
auto* dt_ptr = dt->mutable_data<float>(); auto* dt_ptr = dt->mutable_data<float>(paddle::platform::CPUPlace());
dt_ptr[0] = 5.0f; dt_ptr[0] = 5.0f;
dt_ptr[1] = 10.0f; dt_ptr[1] = 10.0f;
VLOG(6) << "Make Dense Tensor"; VLOG(6) << "Make Dense Tensor";
......
...@@ -45,7 +45,7 @@ TEST(GradNodeInfo, GradNodeBase) { ...@@ -45,7 +45,7 @@ TEST(GradNodeInfo, GradNodeBase) {
paddle::platform::CPUPlace()) paddle::platform::CPUPlace())
.get(), .get(),
meta); meta);
auto* dt_ptr = dt->mutable_data<float>(); auto* dt_ptr = dt->mutable_data<float>(paddle::platform::CPUPlace());
dt_ptr[0] = 5.0f; dt_ptr[0] = 5.0f;
egr::EagerTensor et1(dt); egr::EagerTensor et1(dt);
grads = {{et1}}; grads = {{et1}};
...@@ -102,7 +102,7 @@ TEST(GradNodeInfo, GradNodeBase) { ...@@ -102,7 +102,7 @@ TEST(GradNodeInfo, GradNodeBase) {
paddle::platform::CPUPlace()) paddle::platform::CPUPlace())
.get(), .get(),
meta); meta);
auto* dt_ptr = dt->mutable_data<float>(); auto* dt_ptr = dt->mutable_data<float>(paddle::platform::CPUPlace());
dt_ptr[0] = 6.0f; dt_ptr[0] = 6.0f;
auto* et_ptr = auto* et_ptr =
std::dynamic_pointer_cast<pten::DenseTensor>(et.impl())->data<float>(); std::dynamic_pointer_cast<pten::DenseTensor>(et.impl())->data<float>();
...@@ -121,8 +121,8 @@ TEST(GradNodeInfo, GradNodeBase) { ...@@ -121,8 +121,8 @@ TEST(GradNodeInfo, GradNodeBase) {
VLOG(6) << "Test Reduce Hook"; VLOG(6) << "Test Reduce Hook";
auto reduce_hook = [&](void) -> void { auto reduce_hook = [&](void) -> void {
auto* et_ptr = std::dynamic_pointer_cast<pten::DenseTensor>(et1.impl()) auto* et_ptr =
->mutable_data<float>(); std::dynamic_pointer_cast<pten::DenseTensor>(et1.impl())->data<float>();
et_ptr[0] = 100.0; et_ptr[0] = 100.0;
VLOG(6) << "Running Reduce Hook"; VLOG(6) << "Running Reduce Hook";
}; };
......
...@@ -41,7 +41,7 @@ class GradTestNode : public egr::GradNodeBase { ...@@ -41,7 +41,7 @@ class GradTestNode : public egr::GradNodeBase {
paddle::platform::CPUPlace()) paddle::platform::CPUPlace())
.get(), .get(),
meta); meta);
auto* dt_ptr = dt->mutable_data<float>(); auto* dt_ptr = dt->mutable_data<float>(paddle::platform::CPUPlace());
dt_ptr[0] = 6.0f; dt_ptr[0] = 6.0f;
egr::EagerTensor et1(dt); egr::EagerTensor et1(dt);
std::vector<std::vector<egr::EagerTensor>> res = {{et1}}; std::vector<std::vector<egr::EagerTensor>> res = {{et1}};
......
...@@ -57,7 +57,7 @@ TEST(GradTensorHolder, Interfaces) { ...@@ -57,7 +57,7 @@ TEST(GradTensorHolder, Interfaces) {
paddle::platform::CPUPlace()) paddle::platform::CPUPlace())
.get(), .get(),
meta); meta);
dt0->mutable_data<float>()[0] = 10.0; dt0->mutable_data<float>(paddle::platform::CPUPlace())[0] = 10.0;
EagerTensor et0 = EagerTensor(dt0); EagerTensor et0 = EagerTensor(dt0);
std::shared_ptr<pten::DenseTensor> dt1 = std::make_shared<pten::DenseTensor>( std::shared_ptr<pten::DenseTensor> dt1 = std::make_shared<pten::DenseTensor>(
...@@ -65,7 +65,7 @@ TEST(GradTensorHolder, Interfaces) { ...@@ -65,7 +65,7 @@ TEST(GradTensorHolder, Interfaces) {
paddle::platform::CPUPlace()) paddle::platform::CPUPlace())
.get(), .get(),
meta); meta);
dt1->mutable_data<float>()[0] = 20.0; dt1->mutable_data<float>(paddle::platform::CPUPlace())[0] = 20.0;
EagerTensor et1 = EagerTensor(dt1); EagerTensor et1 = EagerTensor(dt1);
// Constructor empty GradTensorHolder // Constructor empty GradTensorHolder
......
...@@ -29,7 +29,7 @@ TEST(TensorWrapper, Basic) { ...@@ -29,7 +29,7 @@ TEST(TensorWrapper, Basic) {
paddle::platform::CPUPlace()) paddle::platform::CPUPlace())
.get(), .get(),
meta); meta);
auto* dt_ptr = dt->mutable_data<float>(); auto* dt_ptr = dt->mutable_data<float>(paddle::platform::CPUPlace());
dt_ptr[0] = 5.0f; dt_ptr[0] = 5.0f;
dt_ptr[1] = 10.0f; dt_ptr[1] = 10.0f;
et1.set_impl(dt); et1.set_impl(dt);
...@@ -56,7 +56,7 @@ TEST(TensorWrapper, Basic) { ...@@ -56,7 +56,7 @@ TEST(TensorWrapper, Basic) {
paddle::platform::CPUPlace()) paddle::platform::CPUPlace())
.get(), .get(),
meta2); meta2);
auto* dt_ptr2 = dt->mutable_data<float>(); auto* dt_ptr2 = dt->mutable_data<float>(paddle::platform::CPUPlace());
dt_ptr2[0] = 6.0f; dt_ptr2[0] = 6.0f;
dt_ptr2[1] = 11.0f; dt_ptr2[1] = 11.0f;
et2.set_impl(dt2); et2.set_impl(dt2);
......
...@@ -35,7 +35,7 @@ TEST(EagerUtils, AutoGradMeta) { ...@@ -35,7 +35,7 @@ TEST(EagerUtils, AutoGradMeta) {
paddle::platform::CPUPlace()) paddle::platform::CPUPlace())
.get(), .get(),
meta); meta);
dt0->mutable_data<float>()[0] = 10.0; dt0->mutable_data<float>(paddle::platform::CPUPlace())[0] = 10.0;
EagerTensor et0 = EagerTensor(dt0); EagerTensor et0 = EagerTensor(dt0);
std::shared_ptr<pten::DenseTensor> dt1 = std::make_shared<pten::DenseTensor>( std::shared_ptr<pten::DenseTensor> dt1 = std::make_shared<pten::DenseTensor>(
...@@ -43,7 +43,7 @@ TEST(EagerUtils, AutoGradMeta) { ...@@ -43,7 +43,7 @@ TEST(EagerUtils, AutoGradMeta) {
paddle::platform::CPUPlace()) paddle::platform::CPUPlace())
.get(), .get(),
meta); meta);
dt1->mutable_data<float>()[0] = 20.0; dt1->mutable_data<float>(paddle::platform::CPUPlace())[0] = 20.0;
EagerTensor et1 = EagerTensor(dt1); EagerTensor et1 = EagerTensor(dt1);
std::vector<EagerTensor> ets = {et0, et1}; std::vector<EagerTensor> ets = {et0, et1};
...@@ -112,7 +112,7 @@ egr::EagerTensor CreateTestCPUTensor(T val, ...@@ -112,7 +112,7 @@ egr::EagerTensor CreateTestCPUTensor(T val,
paddle::platform::CPUPlace()) paddle::platform::CPUPlace())
.get(), .get(),
meta); meta);
auto* dt_ptr = dt->mutable_data<T>(); auto* dt_ptr = dt->mutable_data<T>(paddle::platform::CPUPlace());
for (int64_t i = 0; i < dt->numel(); i++) { for (int64_t i = 0; i < dt->numel(); i++) {
dt_ptr[i] = val; dt_ptr[i] = val;
} }
......
...@@ -44,8 +44,8 @@ egr::EagerTensor hook_function(const egr::EagerTensor& t) { ...@@ -44,8 +44,8 @@ egr::EagerTensor hook_function(const egr::EagerTensor& t) {
paddle::memory::Alloc(place, bytes_size)), paddle::memory::Alloc(place, bytes_size)),
std::move(ret_meta)); std::move(ret_meta));
float* t_ptr = t_dense->mutable_data<float>(); float* t_ptr = t_dense->mutable_data<float>(place);
float* ret_ptr = ret_dense->mutable_data<float>(); float* ret_ptr = ret_dense->mutable_data<float>(place);
for (int i = 0; i < ret_dense->numel(); i++) { for (int i = 0; i < ret_dense->numel(); i++) {
ret_ptr[i] = t_ptr[i] + 5.0; ret_ptr[i] = t_ptr[i] + 5.0;
} }
...@@ -184,7 +184,7 @@ TEST(FwdBwdJoint, BranchedNodes) { ...@@ -184,7 +184,7 @@ TEST(FwdBwdJoint, BranchedNodes) {
// Examine Forward Output 2 // Examine Forward Output 2
{ {
auto dense_out = std::dynamic_pointer_cast<pten::DenseTensor>(out2.impl()); auto dense_out = std::dynamic_pointer_cast<pten::DenseTensor>(out2.impl());
float* ptr = dense_out->mutable_data<float>(); float* ptr = dense_out->mutable_data<float>(paddle::platform::CPUPlace());
for (int i = 0; i < 20; i++) { for (int i = 0; i < 20; i++) {
PADDLE_ENFORCE(ptr[i] == 150.0, PADDLE_ENFORCE(ptr[i] == 150.0,
paddle::platform::errors::Fatal( paddle::platform::errors::Fatal(
......
...@@ -45,8 +45,8 @@ egr::EagerTensor hook_function(const egr::EagerTensor& t) { ...@@ -45,8 +45,8 @@ egr::EagerTensor hook_function(const egr::EagerTensor& t) {
paddle::memory::Alloc(place, bytes_size)), paddle::memory::Alloc(place, bytes_size)),
std::move(ret_meta)); std::move(ret_meta));
float* t_ptr = t_dense->mutable_data<float>(); float* t_ptr = t_dense->mutable_data<float>(place);
float* ret_ptr = ret_dense->mutable_data<float>(); float* ret_ptr = ret_dense->mutable_data<float>(place);
for (int i = 0; i < ret_dense->numel(); i++) { for (int i = 0; i < ret_dense->numel(); i++) {
ret_ptr[i] = t_ptr[i] + 3.0; ret_ptr[i] = t_ptr[i] + 3.0;
} }
......
...@@ -34,7 +34,7 @@ bool CompareGradTensorWithValue(const egr::EagerTensor& target, T value) { ...@@ -34,7 +34,7 @@ bool CompareGradTensorWithValue(const egr::EagerTensor& target, T value) {
egr::AutogradMeta* meta = egr::EagerUtils::unsafe_autograd_meta(target); egr::AutogradMeta* meta = egr::EagerUtils::unsafe_autograd_meta(target);
auto grad_dense = auto grad_dense =
std::dynamic_pointer_cast<pten::DenseTensor>(meta->Grad().impl()); std::dynamic_pointer_cast<pten::DenseTensor>(meta->Grad().impl());
T* ptr = grad_dense->mutable_data<T>(); T* ptr = grad_dense->data<T>();
std::vector<T> host_data(grad_dense->numel()); std::vector<T> host_data(grad_dense->numel());
if (paddle::platform::is_gpu_place(grad_dense->place())) { if (paddle::platform::is_gpu_place(grad_dense->place())) {
...@@ -67,7 +67,7 @@ template <typename T> ...@@ -67,7 +67,7 @@ template <typename T>
bool CompareTensorWithValue(const egr::EagerTensor& target, T value) { bool CompareTensorWithValue(const egr::EagerTensor& target, T value) {
// TODO(jiabin): Support Selected Rows later // TODO(jiabin): Support Selected Rows later
auto dense_t = std::dynamic_pointer_cast<pten::DenseTensor>(target.impl()); auto dense_t = std::dynamic_pointer_cast<pten::DenseTensor>(target.impl());
T* ptr = dense_t->mutable_data<T>(); T* ptr = dense_t->data<T>();
std::vector<T> host_data(dense_t->numel()); std::vector<T> host_data(dense_t->numel());
if (paddle::platform::is_gpu_place(dense_t->place())) { if (paddle::platform::is_gpu_place(dense_t->place())) {
......
...@@ -80,6 +80,7 @@ void EmptyEagerTensorInitializer( ...@@ -80,6 +80,7 @@ void EmptyEagerTensorInitializer(
std::make_shared<pten::DenseTensor>( std::make_shared<pten::DenseTensor>(
pten::make_intrusive<paddle::experimental::SharedStorage>(place), pten::make_intrusive<paddle::experimental::SharedStorage>(place),
pten::DenseTensorMeta(pten::TransToPtenDataType(dtype), ddims)); pten::DenseTensorMeta(pten::TransToPtenDataType(dtype), ddims));
dense_tensor->mutable_data(place);
self->eager_tensor.set_impl(dense_tensor); self->eager_tensor.set_impl(dense_tensor);
} else { } else {
PADDLE_THROW(platform::errors::InvalidArgument( PADDLE_THROW(platform::errors::InvalidArgument(
......
...@@ -272,16 +272,10 @@ namespace paddle { ...@@ -272,16 +272,10 @@ namespace paddle {
NAME, ::pten::DataType::UINT8, uint8_t, __VA_ARGS__) \ NAME, ::pten::DataType::UINT8, uint8_t, __VA_ARGS__) \
PD_PRIVATE_CASE_TYPE( \ PD_PRIVATE_CASE_TYPE( \
NAME, ::pten::DataType::INT16, int16_t, __VA_ARGS__) \ NAME, ::pten::DataType::INT16, int16_t, __VA_ARGS__) \
PD_PRIVATE_CASE_TYPE( \
NAME, ::pten::DataType::UINT16, uint16_t, __VA_ARGS__) \
PD_PRIVATE_CASE_TYPE( \ PD_PRIVATE_CASE_TYPE( \
NAME, ::pten::DataType::INT32, int32_t, __VA_ARGS__) \ NAME, ::pten::DataType::INT32, int32_t, __VA_ARGS__) \
PD_PRIVATE_CASE_TYPE( \
NAME, ::pten::DataType::UINT32, uint32_t, __VA_ARGS__) \
PD_PRIVATE_CASE_TYPE( \ PD_PRIVATE_CASE_TYPE( \
NAME, ::pten::DataType::INT64, int64_t, __VA_ARGS__) \ NAME, ::pten::DataType::INT64, int64_t, __VA_ARGS__) \
PD_PRIVATE_CASE_TYPE( \
NAME, ::pten::DataType::UINT64, uint64_t, __VA_ARGS__) \
PD_PRIVATE_CASE_TYPE(NAME, \ PD_PRIVATE_CASE_TYPE(NAME, \
::pten::DataType::BFLOAT16, \ ::pten::DataType::BFLOAT16, \
paddle::experimental::bfloat16, \ paddle::experimental::bfloat16, \
......
...@@ -149,8 +149,8 @@ bool Tensor::is_cuda() const { ...@@ -149,8 +149,8 @@ bool Tensor::is_cuda() const {
template <typename T> template <typename T>
T *Tensor::mutable_data() { T *Tensor::mutable_data() {
if (is_dense_tensor()) { if (is_dense_tensor()) {
return std::dynamic_pointer_cast<pten::DenseTensor>(impl_) return std::dynamic_pointer_cast<pten::DenseTensor>(impl_)->mutable_data<T>(
->mutable_data<T>(); ConvertExtPlaceToInnerPlace(place()));
} }
return nullptr; return nullptr;
} }
...@@ -173,12 +173,18 @@ Tensor::mutable_data<paddle::platform::float16>(); ...@@ -173,12 +173,18 @@ Tensor::mutable_data<paddle::platform::float16>();
template <typename T> template <typename T>
T *Tensor::mutable_data(const PlaceType &place) { T *Tensor::mutable_data(const PlaceType &place) {
auto inner_place = ConvertExtPlaceToInnerPlace(place); auto inner_place = ConvertExtPlaceToInnerPlace(place);
PADDLE_ENFORCE_EQ( if (impl_->initialized()) {
platform::is_same_place(inner_place, impl_->place()), PADDLE_ENFORCE_EQ(
true, platform::is_same_place(inner_place, impl_->place()),
platform::errors::Unimplemented("Modification of tensor place through " true,
"mutable_data is not supported now")); platform::errors::Unimplemented("Modification of tensor place through "
return mutable_data<T>(); "mutable_data is not supported now"));
}
if (is_dense_tensor()) {
return std::dynamic_pointer_cast<pten::DenseTensor>(impl_)->mutable_data<T>(
inner_place);
}
return nullptr;
} }
template PADDLE_API float *Tensor::mutable_data<float>(const PlaceType &place); template PADDLE_API float *Tensor::mutable_data<float>(const PlaceType &place);
...@@ -205,7 +211,8 @@ Tensor::mutable_data<paddle::platform::float16>(const PlaceType &place); ...@@ -205,7 +211,8 @@ Tensor::mutable_data<paddle::platform::float16>(const PlaceType &place);
template <typename T> template <typename T>
const T *Tensor::data() const { const T *Tensor::data() const {
if (is_dense_tensor()) { if (is_dense_tensor()) {
return std::dynamic_pointer_cast<pten::DenseTensor>(impl_)->data<T>(); return std::dynamic_pointer_cast<pten::DenseTensor>(impl_)->mutable_data<T>(
ConvertExtPlaceToInnerPlace(place()));
} }
return nullptr; return nullptr;
} }
...@@ -217,7 +224,6 @@ template PADDLE_API const int32_t *Tensor::data<int32_t>() const; ...@@ -217,7 +224,6 @@ template PADDLE_API const int32_t *Tensor::data<int32_t>() const;
template PADDLE_API const uint8_t *Tensor::data<uint8_t>() const; template PADDLE_API const uint8_t *Tensor::data<uint8_t>() const;
template PADDLE_API const int8_t *Tensor::data<int8_t>() const; template PADDLE_API const int8_t *Tensor::data<int8_t>() const;
template PADDLE_API const int16_t *Tensor::data<int16_t>() const; template PADDLE_API const int16_t *Tensor::data<int16_t>() const;
template PADDLE_API const uint16_t *Tensor::data<uint16_t>() const;
template PADDLE_API const bool *Tensor::data<bool>() const; template PADDLE_API const bool *Tensor::data<bool>() const;
template PADDLE_API const paddle::platform::complex<float> template PADDLE_API const paddle::platform::complex<float>
*Tensor::data<paddle::platform::complex<float>>() const; *Tensor::data<paddle::platform::complex<float>>() const;
......
...@@ -65,6 +65,7 @@ PADDLE_API Tensor copy_to(const Tensor& x, Backend backend, bool blocking) { ...@@ -65,6 +65,7 @@ PADDLE_API Tensor copy_to(const Tensor& x, Backend backend, bool blocking) {
pten::make_intrusive<paddle::experimental::SharedStorage>( pten::make_intrusive<paddle::experimental::SharedStorage>(
pten::TransToFluidPlace(backend)), pten::TransToFluidPlace(backend)),
std::move(out_meta)); std::move(out_meta));
dense_out->mutable_data(pten::TransToFluidPlace(backend));
kernel_context.EmplaceBackOutput(dense_out.get()); kernel_context.EmplaceBackOutput(dense_out.get());
Tensor out; Tensor out;
out.set_impl(dense_out); out.set_impl(dense_out);
......
...@@ -39,6 +39,18 @@ class ExternalStorage : public pten::Storage { ...@@ -39,6 +39,18 @@ class ExternalStorage : public pten::Storage {
size_ = 0; size_ = 0;
} }
void set_data_shared(
const std::shared_ptr<paddle::memory::Allocation>& holder) override {
CHECK(holder);
data_ = holder;
size_ = holder->size();
}
std::shared_ptr<paddle::memory::Allocation>&& move_data_shared() override {
size_ = 0;
return std::move(data_);
}
size_t size() const noexcept override { return size_; } size_t size() const noexcept override { return size_; }
const paddle::platform::Place& place() const override { const paddle::platform::Place& place() const override {
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
...@@ -92,6 +104,12 @@ class SharedStorage : public pten::Storage { ...@@ -92,6 +104,12 @@ class SharedStorage : public pten::Storage {
} }
} }
std::shared_ptr<paddle::memory::Allocation>&& move_data_shared() override {
size_ = 0;
place_ = Place();
return std::move(data_);
}
size_t size() const noexcept override { size_t size() const noexcept override {
return data_ ? data_->size() : size_; return data_ ? data_->size() : size_;
} }
......
...@@ -133,9 +133,6 @@ class ScalarBase { ...@@ -133,9 +133,6 @@ class ScalarBase {
case DataType::INT8: case DataType::INT8:
data_.i8 = tensor.template data<int8_t>()[0]; data_.i8 = tensor.template data<int8_t>()[0];
break; break;
case DataType::UINT16:
data_.ui16 = tensor.template data<uint16_t>()[0];
break;
case DataType::UINT8: case DataType::UINT8:
data_.ui8 = tensor.template data<uint8_t>()[0]; data_.ui8 = tensor.template data<uint8_t>()[0];
break; break;
......
...@@ -15,46 +15,16 @@ limitations under the License. */ ...@@ -15,46 +15,16 @@ limitations under the License. */
#pragma once #pragma once
#include <cstdint> #include <cstdint>
#include <functional>
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
#include "paddle/pten/core/candidate/allocator.h"
namespace pten { namespace pten {
namespace deprecated {
/// \brief Encapsulates strategies for access/addressing, allocation/ /// \brief Fancy pointer with deleter. The use of this data type
/// deallocation and construction/destruction of objects.
class RawAllocator {
public:
using Place = paddle::platform::Place;
/// \brief Default destructor.
virtual ~RawAllocator() = default;
/// \brief Allocates storage suitable for an array object of n bytes
/// and creates the array, but does not construct array elements.
/// May throw exceptions.
/// \param bytes_size The number of bytes to allocate.
/// \return The first address allocated.
virtual void* Allocate(size_t bytes_size) = 0;
/// \brief Deallocates storage pointed to ptr, which must be a value
/// returned by a previous call to allocate that has not been
/// invalidated by an intervening call to deallocate. The bytes_size
/// must match the value previously passed to allocate.
/// \param ptr The first address to deallocate.
/// \param bytes_size The number of bytes to deallocate.
virtual void Deallocate(void* ptr, size_t bytes_size) = 0;
/// \brief Get the place value of the allocator and the allocation.
/// \return The place value of the allocator and the allocation.
virtual const Place& place() const = 0;
};
/// \brief Fancy pointer with context. The use of this data type
/// is to be compatible with allocators from different frameworks /// is to be compatible with allocators from different frameworks
/// without significant performance loss. This class does not /// without significant performance loss. This class does not
/// support being inherited. /// support being inherited.
class Allocation final { class Allocation {
public: public:
using Place = paddle::platform::Place; using Place = paddle::platform::Place;
using DeleterFnPtr = void (*)(Allocation*); using DeleterFnPtr = void (*)(Allocation*);
...@@ -62,63 +32,54 @@ class Allocation final { ...@@ -62,63 +32,54 @@ class Allocation final {
Allocation() = default; Allocation() = default;
// Don't own resources, only provide access. // Don't own resources, only provide access.
Allocation(void* data, const Place& place) : data_(data), place_(place) {} Allocation(void* data, size_t size, const Place& place)
: ptr_(data), size_(size), place_(place) {}
// Own resources. // Own resources.
Allocation(void* data, void* ctx, DeleterFnPtr deleter, const Place& place) Allocation(void* data, size_t size, DeleterFnPtr deleter, const Place& place)
: data_(data), ctx_(ctx), deleter_(deleter), place_(place) {} : ptr_(data), size_(size), deleter_(deleter), place_(place) {}
Allocation(Allocation&& other) { swap(*this, other); } Allocation(Allocation&& other) noexcept { swap(*this, other); }
Allocation& operator=(Allocation&& other) { Allocation& operator=(Allocation&& other) noexcept {
// Exchange them explicitly to avoid moving is equivalent // Exchange them explicitly to avoid moving is equivalent
// to copying. // to copying.
swap(*this, other); swap(*this, other);
return *this; return *this;
} }
~Allocation() { Clear(); }
void* ptr() const noexcept { return data_; } virtual ~Allocation() {
void* operator->() const noexcept { return data_; }
operator bool() const noexcept { return data_ || ctx_; }
const Place& place() const noexcept { return place_; }
void Clear() {
if (deleter_) { if (deleter_) {
deleter_(this); deleter_(this);
} }
ctx_ = nullptr;
deleter_ = nullptr;
data_ = nullptr;
} }
// Returns the holding pointer.
// NOTE: For performance consideration, it is better not to make this method
// as a virtual method. If we want to implement a `defragmentation` later,
// we might need to make `ptr_` field as a protected field, and add a virtual
// method like `defragmentation` to change `ptr_`.
void* ptr() const noexcept { return ptr_; }
// Returns the size of this memory buffer, i.e., ptr() + size() - 1 is the
// last valid element.
//
// NOTE: Some allocator might alloc more memory than request. The size
// could larger than its request. For example,
// the AlignedAllocator will always allocate memory as size + kAlignment.
// The raw pointer might not aligned, so an offset might be added to raw
// the pointer. The size of this allocation will be
// `size + kAlignemnt - offset`.
size_t size() const noexcept { return size_; }
void* operator->() const noexcept { return ptr_; }
operator bool() const noexcept { return ptr_; }
const Place& place() const noexcept { return place_; }
DeleterFnPtr deleter() const noexcept { return deleter_; } DeleterFnPtr deleter() const noexcept { return deleter_; }
template <typename T> protected:
T* CastContextWithoutCheck() const noexcept {
return static_cast<T*>(ctx_);
}
/// \brief Statically cast the void pointer of the context object to
/// the primitive type. Conversion of any pointer to void* and back
/// to pointer to the original cv type preserves its original value.
/// \param T The primitive type name of the context pointer.
/// \param expected_deleter The destructor passed in to enhance type
/// safety checking.
template <typename T>
T* CastContext(DeleterFnPtr expected_deleter) const {
PADDLE_ENFORCE_EQ(
deleter_ == expected_deleter,
true,
paddle::platform::errors::InvalidArgument(
"The deleter of the allocation does not match, so the pointer "
"cannot be safely removed."));
return CastContextWithoutCheck<T>();
}
private:
friend void swap(Allocation& a, Allocation& b) noexcept; friend void swap(Allocation& a, Allocation& b) noexcept;
void* data_{nullptr}; void* ptr_{nullptr};
void* ctx_{nullptr}; size_t size_{};
DeleterFnPtr deleter_{nullptr}; DeleterFnPtr deleter_{nullptr};
// TODO(Shixiaowei02): Enum needs to be used instead to reduce // TODO(Shixiaowei02): Enum needs to be used instead to reduce
// the construction overhead by more than 50%. // the construction overhead by more than 50%.
...@@ -126,28 +87,21 @@ class Allocation final { ...@@ -126,28 +87,21 @@ class Allocation final {
}; };
inline void swap(Allocation& a, Allocation& b) noexcept { inline void swap(Allocation& a, Allocation& b) noexcept {
::std::swap(a.data_, b.data_); ::std::swap(a.ptr_, b.ptr_);
::std::swap(a.ctx_, b.ctx_);
::std::swap(a.deleter_, b.deleter_); ::std::swap(a.deleter_, b.deleter_);
::std::swap(a.place_, b.place_); ::std::swap(a.place_, b.place_);
::std::swap(a.size_, b.size_);
} }
/// \brief Context compatible allocator interface. This allocator is
/// mainly used for general data structures such as Tensor. The raw
/// allocator is more universal and efficient.
class Allocator { class Allocator {
using Place = paddle::platform::Place;
public: public:
using DeleterType = std::function<void(Allocation*)>;
using AllocationPtr = std::unique_ptr<Allocation, DeleterType>;
virtual ~Allocator() = default; virtual ~Allocator() = default;
virtual Allocation Allocate(size_t bytes_size) = 0; virtual AllocationPtr Allocate(size_t bytes_size) = 0;
virtual const Place& place() = 0;
};
inline Allocation Allocate(const std::shared_ptr<Allocator>& a, size_t n) { virtual bool IsAllocThreadSafe() const { return false; }
CHECK(a); };
return a->Allocate(n);
}
} // namespace deprecated
} // namespace pten } // namespace pten
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <cstdint>
#include <functional>
#include "paddle/fluid/platform/place.h"
namespace pten {
/// \brief Fancy pointer with deleter. The use of this data type
/// is to be compatible with allocators from different frameworks
/// without significant performance loss. This class does not
/// support being inherited.
class Allocation {
public:
using Place = paddle::platform::Place;
using DeleterFnPtr = void (*)(Allocation*);
Allocation() = default;
// Don't own resources, only provide access.
Allocation(void* data, size_t size, const Place& place)
: ptr_(data), size_(size), place_(place) {}
// Own resources.
Allocation(void* data, size_t size, DeleterFnPtr deleter, const Place& place)
: ptr_(data), size_(size), deleter_(deleter), place_(place) {}
Allocation(Allocation&& other) noexcept { swap(*this, other); }
Allocation& operator=(Allocation&& other) noexcept {
// Exchange them explicitly to avoid moving is equivalent
// to copying.
swap(*this, other);
return *this;
}
virtual ~Allocation() {
if (deleter_) {
deleter_(this);
}
}
// Returns the holding pointer.
// NOTE: For performance consideration, it is better not to make this method
// as a virtual method. If we want to implement a `defragmentation` later,
// we might need to make `ptr_` field as a protected field, and add a virtual
// method like `defragmentation` to change `ptr_`.
void* ptr() const noexcept { return ptr_; }
// Returns the size of this memory buffer, i.e., ptr() + size() - 1 is the
// last valid element.
//
// NOTE: Some allocator might alloc more memory than request. The size
// could larger than its request. For example,
// the AlignedAllocator will always allocate memory as size + kAlignment.
// The raw pointer might not aligned, so an offset might be added to raw
// the pointer. The size of this allocation will be
// `size + kAlignemnt - offset`.
size_t size() const noexcept { return size_; }
void* operator->() const noexcept { return ptr_; }
operator bool() const noexcept { return ptr_; }
const Place& place() const noexcept { return place_; }
DeleterFnPtr deleter() const noexcept { return deleter_; }
protected:
friend void swap(Allocation& a, Allocation& b) noexcept;
void* ptr_{nullptr};
size_t size_{};
DeleterFnPtr deleter_{nullptr};
// TODO(Shixiaowei02): Enum needs to be used instead to reduce
// the construction overhead by more than 50%.
Place place_;
};
inline void swap(Allocation& a, Allocation& b) noexcept {
::std::swap(a.ptr_, b.ptr_);
::std::swap(a.deleter_, b.deleter_);
::std::swap(a.place_, b.place_);
::std::swap(a.size_, b.size_);
}
class Allocator {
public:
using DeleterType = std::function<void(Allocation*)>;
using AllocationPtr = std::unique_ptr<Allocation, DeleterType>;
virtual ~Allocator() = default;
virtual AllocationPtr Allocate(size_t bytes_size) = 0;
virtual bool IsAllocThreadSafe() const { return false; }
};
} // namespace pten
...@@ -33,28 +33,17 @@ extern void TensorCopy(const pten::DenseTensor& src, ...@@ -33,28 +33,17 @@ extern void TensorCopy(const pten::DenseTensor& src,
namespace pten { namespace pten {
DenseTensor::DenseTensor(Allocator* a, const DenseTensorMeta& meta) DenseTensor::DenseTensor(Allocator* a, const DenseTensorMeta& meta)
: meta_(meta), : meta_(meta), holder_(a->Allocate(SizeOf(dtype()) * numel())) {}
storage_(make_intrusive<TensorStorage>(a, SizeOf(dtype()) * numel())) {}
DenseTensor::DenseTensor(Allocator* a, DenseTensorMeta&& meta) DenseTensor::DenseTensor(Allocator* a, DenseTensorMeta&& meta)
: meta_(std::move(meta)), : meta_(std::move(meta)), holder_(a->Allocate(SizeOf(dtype()) * numel())) {}
storage_(make_intrusive<TensorStorage>(a, SizeOf(dtype()) * numel())) {}
DenseTensor::DenseTensor(intrusive_ptr<Storage> storage, DenseTensor::DenseTensor(const std::shared_ptr<pten::Allocation>& holder,
const DenseTensorMeta& meta) const DenseTensorMeta& meta)
: meta_(meta), storage_(std::move(storage)) {} : meta_(meta), holder_(holder) {}
DenseTensor::DenseTensor(intrusive_ptr<Storage> storage, DenseTensorMeta&& meta)
: meta_(std::move(meta)), storage_(std::move(storage)) {}
DenseTensor::DenseTensor(const DenseTensor& other) : meta_(other.meta()) { DenseTensor::DenseTensor(const DenseTensor& other) : meta_(other.meta()) {
if (storage_ == nullptr) { holder_ = other.holder_;
storage_ = make_intrusive<paddle::experimental::SharedStorage>(
paddle::platform::CPUPlace());
}
if (other.storage_ != nullptr && other.storage_->data_shared()) {
storage_->set_data_shared(other.storage_->data_shared());
}
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
format_ = other.format_; format_ = other.format_;
...@@ -63,13 +52,7 @@ DenseTensor::DenseTensor(const DenseTensor& other) : meta_(other.meta()) { ...@@ -63,13 +52,7 @@ DenseTensor::DenseTensor(const DenseTensor& other) : meta_(other.meta()) {
DenseTensor& DenseTensor::operator=(const DenseTensor& other) { DenseTensor& DenseTensor::operator=(const DenseTensor& other) {
meta_ = other.meta(); meta_ = other.meta();
if (storage_ == nullptr) { holder_ = other.holder_;
storage_ = make_intrusive<paddle::experimental::SharedStorage>(
paddle::platform::CPUPlace());
}
if (other.storage_ != nullptr && other.storage_->data_shared()) {
storage_->set_data_shared(other.storage_->data_shared());
}
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
format_ = other.format_; format_ = other.format_;
#endif #endif
...@@ -78,7 +61,7 @@ DenseTensor& DenseTensor::operator=(const DenseTensor& other) { ...@@ -78,7 +61,7 @@ DenseTensor& DenseTensor::operator=(const DenseTensor& other) {
DenseTensor& DenseTensor::operator=(DenseTensor&& other) { DenseTensor& DenseTensor::operator=(DenseTensor&& other) {
meta_ = std::move(other.meta_); meta_ = std::move(other.meta_);
storage_.swap(other.storage_); std::swap(holder_, other.holder_);
return *this; return *this;
} }
...@@ -90,59 +73,7 @@ int64_t DenseTensor::numel() const { ...@@ -90,59 +73,7 @@ int64_t DenseTensor::numel() const {
} }
bool DenseTensor::IsSharedWith(const DenseTensor& b) const { bool DenseTensor::IsSharedWith(const DenseTensor& b) const {
return storage_.get() == b.storage_.get() && storage_.get() != nullptr; return holder_ && holder_ == b.Holder();
}
void* DenseTensor::mutable_data(size_t request_bytes) {
PADDLE_ENFORCE(
valid(),
paddle::platform::errors::PreconditionNotMet(
"The meta data must be valid when call the mutable data function."));
PADDLE_ENFORCE_NOT_NULL(
storage_,
paddle::platform::errors::PreconditionNotMet(
"The storage must be valid when call the mutable data function."));
size_t bytes = numel() * SizeOf(dtype());
if (request_bytes) {
PADDLE_ENFORCE_GE(request_bytes,
bytes,
paddle::platform::errors::InvalidArgument(
"The reserved size %d should be enough to meet the "
"volume required by metadata %d.",
request_bytes,
bytes));
bytes = request_bytes;
}
if (!storage_->data() || storage_->size() < bytes + meta_.offset ||
storage_->size() == 0) {
VLOG(10) << "mutbale data realloc, original size: " << storage_->size()
<< ", new size: " << bytes;
storage_->Realloc(bytes);
meta_.offset = 0;
}
return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(storage_->data()) +
meta_.offset);
}
template <typename T>
T* DenseTensor::mutable_data() {
// In order to be compatible with the original Tensor design and
// execution system, we have to reset the datatype in mutable_data<T>.
// When the compatibility phase is over in the future, we can delete it
if (meta_.dtype == DataType::UNDEFINED) {
VLOG(10) << "change data type in mutbale_data, target dtype - "
<< paddle::experimental::CppTypeToDataType<T>::Type();
const_cast<DataType&>(meta_.dtype) =
paddle::experimental::CppTypeToDataType<T>::Type();
}
PADDLE_ENFORCE(
(dtype() == paddle::experimental::CppTypeToDataType<T>::Type()),
paddle::platform::errors::InvalidArgument(
"The type of data (%d) we are trying to retrieve does not match the "
"type of data currently contained in the container (%d).",
static_cast<int>(paddle::experimental::CppTypeToDataType<T>::Type()),
static_cast<int>(dtype())));
return static_cast<T*>(mutable_data());
} }
template <typename T> template <typename T>
...@@ -164,29 +95,27 @@ T* DenseTensor::data() { ...@@ -164,29 +95,27 @@ T* DenseTensor::data() {
paddle::platform::errors::InvalidArgument( paddle::platform::errors::InvalidArgument(
"The type of data we are trying to retrieve does not match the " "The type of data we are trying to retrieve does not match the "
"type of data currently contained in the container.")); "type of data currently contained in the container."));
PADDLE_ENFORCE_NOT_NULL( return static_cast<T*>(data());
storage_,
paddle::platform::errors::PreconditionNotMet(
"The storage must be valid when call the mutable data function."));
return reinterpret_cast<T*>(data());
} }
void* DenseTensor::data() { void* DenseTensor::data() {
check_memory_size();
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
storage_, holder_,
paddle::platform::errors::PreconditionNotMet( paddle::platform::errors::PreconditionNotMet(
"The storage must be valid when call the mutable data function.")); "The storage must be valid when call the data function."));
return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(storage_->data()) + return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(holder_->ptr()) +
meta_.offset); meta_.offset);
} }
const void* DenseTensor::data() const { const void* DenseTensor::data() const {
check_memory_size();
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
storage_, holder_,
paddle::platform::errors::PreconditionNotMet( paddle::platform::errors::PreconditionNotMet(
"The storage must be valid when call the mutable data function.")); "The storage must be valid when call the data function."));
return reinterpret_cast<const void*>( return reinterpret_cast<const void*>(
reinterpret_cast<uintptr_t>(storage_->data()) + meta_.offset); reinterpret_cast<uintptr_t>(holder_->ptr()) + meta_.offset);
} }
void DenseTensor::set_meta(DenseTensorMeta&& meta) { void DenseTensor::set_meta(DenseTensorMeta&& meta) {
...@@ -209,15 +138,14 @@ void DenseTensor::set_meta(DenseTensorMeta&& meta) { ...@@ -209,15 +138,14 @@ void DenseTensor::set_meta(DenseTensorMeta&& meta) {
*/ */
void DenseTensor::ResizeAndAllocate(const DDim& dims) { void DenseTensor::ResizeAndAllocate(const DDim& dims) {
meta_.dims = dims; meta_.dims = dims;
if (storage_ != nullptr) { if (holder_ != nullptr && place().GetType() != AllocationType::UNDEFINED) {
mutable_data(); mutable_data(place());
} }
} }
void DenseTensor::ResetLoD(const LoD& lod) { meta_.lod = lod; } void DenseTensor::ResetLoD(const LoD& lod) { meta_.lod = lod; }
#define DATA_MEMBER_FUNC_INSTANTIATION(dtype) \ #define DATA_MEMBER_FUNC_INSTANTIATION(dtype) \
template dtype* DenseTensor::mutable_data(); \
template const dtype* DenseTensor::data() const; \ template const dtype* DenseTensor::data() const; \
template dtype* DenseTensor::data(); template dtype* DenseTensor::data();
...@@ -243,68 +171,47 @@ DATA_MEMBER_FUNC_INSTANTIATION(::paddle::experimental::complex128); ...@@ -243,68 +171,47 @@ DATA_MEMBER_FUNC_INSTANTIATION(::paddle::experimental::complex128);
/* From framework::Tensor */ /* From framework::Tensor */
/* --------------------------- */ /* --------------------------- */
DenseTensor::DenseTensor() { DenseTensor::DenseTensor() {
storage_ = make_intrusive<paddle::experimental::SharedStorage>(
paddle::platform::CPUPlace());
inplace_version_counter_ = std::make_shared<TensorInplaceVersion>(0); inplace_version_counter_ = std::make_shared<TensorInplaceVersion>(0);
meta_.dtype = paddle::experimental::DataType::FLOAT32; meta_.dtype = paddle::experimental::DataType::FLOAT32;
meta_.offset = 0; meta_.offset = 0;
} }
DenseTensor::DenseTensor(const paddle::framework::proto::VarType::Type& dtype) { DenseTensor::DenseTensor(paddle::framework::proto::VarType::Type dtype) {
storage_ = make_intrusive<paddle::experimental::SharedStorage>(
paddle::platform::CPUPlace());
inplace_version_counter_ = std::make_shared<TensorInplaceVersion>(0); inplace_version_counter_ = std::make_shared<TensorInplaceVersion>(0);
meta_.dtype = TransToPtenDataType(dtype); meta_.dtype = TransToPtenDataType(dtype);
meta_.offset = 0; meta_.offset = 0;
} }
size_t DenseTensor::memory_size() const { size_t DenseTensor::memory_size() const {
if (storage_ == nullptr || storage_->data_shared() == nullptr) { return holder_ == nullptr ? 0UL : holder_->size() - meta_.offset;
return 0UL;
}
return storage_->data_shared()->size() - meta_.offset;
} }
void DenseTensor::check_memory_size() const { void DenseTensor::check_memory_size() const {
PADDLE_ENFORCE_NOT_NULL(storage_, PADDLE_ENFORCE_NOT_NULL(holder_,
paddle::platform::errors::PreconditionNotMet( paddle::platform::errors::PreconditionNotMet(
"Tensor holds no memory. " "Tensor holds no memory. "
"Call Tensor::mutable_data firstly.")); "Call Tensor::mutable_data firstly."));
PADDLE_ENFORCE_NOT_NULL(storage_->data_shared(),
paddle::platform::errors::PreconditionNotMet(
"Tensor holds no memory. "
"Call Tensor::mutable_data firstly."));
size_t size = numel() * SizeOf(dtype());
PADDLE_ENFORCE_LE( PADDLE_ENFORCE_LE(
size, numel() * SizeOf(dtype()),
memory_size(), memory_size(),
paddle::platform::errors::PreconditionNotMet( paddle::platform::errors::PreconditionNotMet(
"Tensor's dimension is out of bound." "Tensor's dimension is out of bound."
"Tensor's dimension must be equal or less than the size of its " "Tensor's dimension must be equal or less than the size of its "
"memory." "memory."
"But received Tensor's dimension is d%, memory's size is %d.", "But received Tensor's dimension is d%, memory's size is %d.",
size, numel() * SizeOf(dtype()),
memory_size())); memory_size()));
} }
const paddle::platform::Place& DenseTensor::place() const { const paddle::platform::Place& DenseTensor::place() const {
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
storage_, holder_,
paddle::platform::errors::PreconditionNotMet( paddle::platform::errors::PreconditionNotMet(
"Tensor not initialized yet when Tensor::place() is called.")); "Tensor not initialized yet when DenseTensor::place() is called."));
if (storage_->data_shared()) { return holder_->place();
return storage_->data_shared()->place();
}
return storage_->place();
} }
paddle::framework::proto::VarType::Type DenseTensor::type() const { paddle::framework::proto::VarType::Type DenseTensor::type() const {
PADDLE_ENFORCE_NOT_NULL(
storage_,
paddle::platform::errors::PreconditionNotMet(
"Tensor not initialized yet when Tensor::type() is called."));
return TransToProtoVarType(meta_.dtype); return TransToProtoVarType(meta_.dtype);
} }
...@@ -316,39 +223,31 @@ void DenseTensor::set_layout(const paddle::framework::DataLayout layout) { ...@@ -316,39 +223,31 @@ void DenseTensor::set_layout(const paddle::framework::DataLayout layout) {
meta_.layout = layout; meta_.layout = layout;
} }
void DenseTensor::ResetHolder( void DenseTensor::ResetHolder(const std::shared_ptr<pten::Allocation>& holder) {
const std::shared_ptr<paddle::memory::Allocation>& holder) {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
meta_.offset, meta_.offset,
0, 0,
paddle::platform::errors::Fatal( paddle::platform::errors::Fatal(
"Only the offset is supported to zero when the holder is reset.")); "Only the offset is supported to zero when the holder is reset."));
PADDLE_ENFORCE_NOT_NULL( if (holder_) {
storage_,
paddle::platform::errors::PreconditionNotMet(
"The storage must be valid when call the mutable data function."));
if (storage_->data_shared()) {
PADDLE_ENFORCE_LE( PADDLE_ENFORCE_LE(
numel() * SizeOf(dtype()) + meta_.offset, numel() * SizeOf(dtype()) + meta_.offset,
holder->size(), holder->size(),
paddle::platform::errors::InvalidArgument( paddle::platform::errors::InvalidArgument(
"The size of Holder is not enough to store the Tensor.")); "The size of Holder is not enough to store the Tensor."));
} }
holder_ = holder;
storage_->set_data_shared(holder);
} }
void DenseTensor::ResetHolderWithType( void DenseTensor::ResetHolderWithType(
const std::shared_ptr<paddle::memory::Allocation>& holder, const std::shared_ptr<pten::Allocation>& holder,
const paddle::framework::proto::VarType::Type& type) { paddle::framework::proto::VarType::Type type) {
set_type(type); set_type(type);
ResetHolder(holder); ResetHolder(holder);
} }
void DenseTensor::set_type( void DenseTensor::set_type(paddle::framework::proto::VarType::Type type) {
const paddle::framework::proto::VarType::Type& type) {
meta_.dtype = TransToPtenDataType(type); meta_.dtype = TransToPtenDataType(type);
} }
...@@ -369,19 +268,14 @@ void* DenseTensor::mutable_data(const paddle::platform::Place& place, ...@@ -369,19 +268,14 @@ void* DenseTensor::mutable_data(const paddle::platform::Place& place,
size = requested_size; size = requested_size;
} }
if (storage_ == nullptr) {
storage_ = make_intrusive<paddle::experimental::SharedStorage>(place);
}
/* some versions of boost::variant don't have operator!= */ /* some versions of boost::variant don't have operator!= */
if (storage_->data_shared() == nullptr || if (holder_ == nullptr || !(holder_->place() == place) ||
!(storage_->data_shared()->place() == place) || holder_->size() < size + meta_.offset) {
storage_->data_shared()->size() < size + meta_.offset) { holder_.reset();
storage_->Clear(); holder_ = paddle::memory::AllocShared(place, size);
storage_->set_data_shared(paddle::memory::AllocShared(place, size));
meta_.offset = 0; meta_.offset = 0;
} }
return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(storage_->data()) + return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(holder_->ptr()) +
meta_.offset); meta_.offset);
} }
...@@ -404,21 +298,16 @@ void* DenseTensor::mutable_data(const paddle::platform::Place& place, ...@@ -404,21 +298,16 @@ void* DenseTensor::mutable_data(const paddle::platform::Place& place,
"] now")); "] now"));
size_t size = numel() * SizeOf(dtype()); size_t size = numel() * SizeOf(dtype());
if (storage_ == nullptr) {
storage_ = make_intrusive<paddle::experimental::SharedStorage>(place);
}
/* some versions of boost::variant don't have operator!= */ /* some versions of boost::variant don't have operator!= */
if (storage_->data_shared() == nullptr || if (holder_ == nullptr || !(holder_->place() == place) ||
!(storage_->data_shared()->place() == place) || holder_->size() < size + meta_.offset ||
storage_->data_shared()->size() < size + meta_.offset ||
!(paddle::platform::is_gpu_place(place) && !(paddle::platform::is_gpu_place(place) &&
paddle::memory::InSameStream(storage_->data_shared(), stream))) { paddle::memory::InSameStream(holder_, stream))) {
storage_->Clear(); holder_.reset();
storage_->set_data_shared(paddle::memory::AllocShared(place, size, stream)); holder_ = paddle::memory::AllocShared(place, size, stream);
meta_.offset = 0; meta_.offset = 0;
} }
return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(storage_->data()) + return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(holder_->ptr()) +
meta_.offset); meta_.offset);
} }
...@@ -445,14 +334,9 @@ inline T* DenseTensor::mutable_data(const paddle::platform::Place& place, ...@@ -445,14 +334,9 @@ inline T* DenseTensor::mutable_data(const paddle::platform::Place& place,
} }
void DenseTensor::ShareBufferWith(const DenseTensor& tensor) { void DenseTensor::ShareBufferWith(const DenseTensor& tensor) {
if (storage_ == nullptr) { holder_ = tensor.holder_;
storage_ = make_intrusive<paddle::experimental::SharedStorage>(
paddle::platform::CPUPlace());
}
if (storage_ != nullptr && tensor.storage_ != nullptr) {
storage_->set_data_shared(tensor.storage_->data_shared());
}
meta_.offset = tensor.meta().offset; meta_.offset = tensor.meta().offset;
meta_.dtype = tensor.dtype();
} }
#define LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(dtype) \ #define LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(dtype) \
...@@ -467,7 +351,7 @@ LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(bool) ...@@ -467,7 +351,7 @@ LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(bool)
LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(int8_t) LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(int8_t)
LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(uint8_t) LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(uint8_t)
LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(int16_t) LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(int16_t)
LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(int) LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(int32_t)
LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(int64_t) LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(int64_t)
LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(float) LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(float)
LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(double) LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(double)
...@@ -482,6 +366,13 @@ LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(::paddle::experimental::complex128) ...@@ -482,6 +366,13 @@ LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(::paddle::experimental::complex128)
/* From framework::LoDTensor */ /* From framework::LoDTensor */
/* ------------------------------ */ /* ------------------------------ */
DenseTensor::DenseTensor(intrusive_ptr<Storage> storage,
const DenseTensorMeta& meta)
: meta_(meta), holder_(storage->move_data_shared()) {}
DenseTensor::DenseTensor(intrusive_ptr<Storage> storage, DenseTensorMeta&& meta)
: meta_(std::move(meta)), holder_(storage->move_data_shared()) {}
DenseTensor::DenseTensor(const LoD& lod) : DenseTensor() { meta_.lod = lod; } DenseTensor::DenseTensor(const LoD& lod) : DenseTensor() { meta_.lod = lod; }
void DenseTensor::set_lod(const LoD& lod) { meta_.lod = lod; } void DenseTensor::set_lod(const LoD& lod) { meta_.lod = lod; }
...@@ -559,9 +450,8 @@ DenseTensor DenseTensor::Slice(int64_t begin_idx, int64_t end_idx) const { ...@@ -559,9 +450,8 @@ DenseTensor DenseTensor::Slice(int64_t begin_idx, int64_t end_idx) const {
} else { } else {
size_t base = numel() / meta_.dims[0]; size_t base = numel() / meta_.dims[0];
DenseTensor dst; DenseTensor dst;
dst.storage_ = pten::make_intrusive<paddle::experimental::SharedStorage>( dst.holder_ = holder_;
storage_->data_shared()); dst.set_layout(meta_.layout);
dst.meta_.layout = meta_.layout;
dst.meta_.dtype = meta_.dtype; dst.meta_.dtype = meta_.dtype;
DDim dst_dims = meta_.dims; DDim dst_dims = meta_.dims;
dst_dims[0] = end_idx - begin_idx; dst_dims[0] = end_idx - begin_idx;
......
...@@ -70,17 +70,8 @@ class DenseTensor : public TensorBase, ...@@ -70,17 +70,8 @@ class DenseTensor : public TensorBase,
/// \param meta The meta data of dense tensor. /// \param meta The meta data of dense tensor.
DenseTensor(Allocator* a, DenseTensorMeta&& meta); DenseTensor(Allocator* a, DenseTensorMeta&& meta);
/// \brief Use existing storage space to create dense tensor. This interface DenseTensor(const std::shared_ptr<pten::Allocation>& holder,
/// can be used to deliberately create an uninitialized dense tensor. const DenseTensorMeta& meta);
/// \param storage The existing storage.
/// \param meta The meta data of dense tensor.
DenseTensor(intrusive_ptr<Storage> storage, const DenseTensorMeta& meta);
/// \brief Use existing storage space to create dense tensor. This interface
/// can be used to deliberately create an uninitialized dense tensor.
/// \param storage The existing storage.
/// \param meta The meta data of dense tensor.
DenseTensor(intrusive_ptr<Storage> storage, DenseTensorMeta&& meta);
/// \brief Because dense tensor is a kind of container, we give a default /// \brief Because dense tensor is a kind of container, we give a default
/// constructor to use for stl container. But the dense tensor created with /// constructor to use for stl container. But the dense tensor created with
...@@ -146,9 +137,7 @@ class DenseTensor : public TensorBase, ...@@ -146,9 +137,7 @@ class DenseTensor : public TensorBase,
/// \brief Test whether the storage is allocated. /// \brief Test whether the storage is allocated.
/// return Whether the storage is allocated. /// return Whether the storage is allocated.
bool initialized() const override { bool initialized() const override { return holder_ && holder_->ptr(); }
return storage_ != nullptr && storage_->data() != nullptr;
}
/// \brief Check if storage is shared with other objects. /// \brief Check if storage is shared with other objects.
/// \return Whether the storage is shared with other objects. /// \return Whether the storage is shared with other objects.
...@@ -170,25 +159,7 @@ class DenseTensor : public TensorBase, ...@@ -170,25 +159,7 @@ class DenseTensor : public TensorBase,
/// \brief Returns the actual storage size occupied by tensor, may be larger /// \brief Returns the actual storage size occupied by tensor, may be larger
/// than its shape dims. /// than its shape dims.
/// \return The actual storage size occupied by tensor. /// \return The actual storage size occupied by tensor.
size_t capacity() const { return storage_->size(); } size_t capacity() const { return holder_->size(); }
/// \brief Get the mutable data pointer value of type T.
/// Memory allocation may occur when calling this interface:
/// 1. When the storage size is not enough to meet the current shape of the
/// data.
/// \return The mutable data pointer value of type T.
template <typename T>
T* mutable_data();
/// \brief Get the mutable data pointer value of raw type.
/// Memory allocation may occur when calling this interface:
/// 1. When the storage size is not enough to meet the current shape of the
/// data.
/// 2. When more request_bytes parameters are used to reserve the data
/// storage.
/// param request_bytes The bytes to reserve the data storage.
/// \return The mutable data pointer value of type T.
void* mutable_data(size_t request_bytes = 0);
/// \brief Get the const data pointer value of type T. /// \brief Get the const data pointer value of type T.
/// \return The const data pointer value of type T. /// \return The const data pointer value of type T.
...@@ -204,7 +175,7 @@ class DenseTensor : public TensorBase, ...@@ -204,7 +175,7 @@ class DenseTensor : public TensorBase,
protected: protected:
DenseTensorMeta meta_; DenseTensorMeta meta_;
intrusive_ptr<Storage> storage_; std::shared_ptr<pten::Allocation> holder_;
/* --------------------------- */ /* --------------------------- */
/* From framework::Tensor */ /* From framework::Tensor */
...@@ -223,11 +194,21 @@ class DenseTensor : public TensorBase, ...@@ -223,11 +194,21 @@ class DenseTensor : public TensorBase,
/* @jim19930609: Remove dependency on protobuf after Tensor Unification. /* @jim19930609: Remove dependency on protobuf after Tensor Unification.
*/ */
explicit DenseTensor(const paddle::framework::proto::VarType::Type& dtype); explicit DenseTensor(paddle::framework::proto::VarType::Type dtype);
inline bool IsInitialized() const { /// \brief Use existing storage space to create dense tensor. This interface
return storage_ != nullptr && storage_->data_shared() != nullptr; /// can be used to deliberately create an uninitialized dense tensor.
} /// \param storage The existing storage.
/// \param meta The meta data of dense tensor.
DenseTensor(intrusive_ptr<Storage> storage, const DenseTensorMeta& meta);
/// \brief Use existing storage space to create dense tensor. This interface
/// can be used to deliberately create an uninitialized dense tensor.
/// \param storage The existing storage.
/// \param meta The meta data of dense tensor.
DenseTensor(intrusive_ptr<Storage> storage, DenseTensorMeta&& meta);
inline bool IsInitialized() const { return holder_ != nullptr; }
template <typename T> template <typename T>
T* data(); T* data();
...@@ -270,7 +251,7 @@ class DenseTensor : public TensorBase, ...@@ -270,7 +251,7 @@ class DenseTensor : public TensorBase,
void set_layout(const paddle::framework::DataLayout layout); void set_layout(const paddle::framework::DataLayout layout);
void clear() { void clear() {
storage_.reset(); holder_.reset();
meta_.offset = 0; meta_.offset = 0;
} }
...@@ -281,31 +262,24 @@ class DenseTensor : public TensorBase, ...@@ -281,31 +262,24 @@ class DenseTensor : public TensorBase,
} }
bool IsSharedBufferWith(const DenseTensor& src) const { bool IsSharedBufferWith(const DenseTensor& src) const {
if (storage_ == nullptr || src.storage_ == nullptr) return false; return holder_ && holder_ == src.Holder();
if (storage_->data_shared() == src.storage_->data_shared()) return true;
return false;
} }
const std::shared_ptr<paddle::memory::Allocation> Holder() const { const std::shared_ptr<pten::Allocation>& Holder() const { return holder_; }
return storage_ == nullptr ? nullptr : std::move(storage_->data_shared());
}
void set_offset(size_t offset) { meta_.offset = offset; } void set_offset(size_t offset) { meta_.offset = offset; }
size_t offset() const { return meta_.offset; } size_t offset() const { return meta_.offset; }
std::shared_ptr<paddle::memory::Allocation> MoveMemoryHolder() { std::shared_ptr<pten::Allocation> MoveMemoryHolder() {
return storage_ == nullptr ? nullptr return std::move(holder_);
: std::move(storage_->move_data_shared());
} }
void ResetHolder(const std::shared_ptr<paddle::memory::Allocation>& holder); void ResetHolder(const std::shared_ptr<pten::Allocation>& holder);
void ResetHolderWithType( void ResetHolderWithType(const std::shared_ptr<pten::Allocation>& holder,
const std::shared_ptr<paddle::memory::Allocation>& holder, paddle::framework::proto::VarType::Type type);
const paddle::framework::proto::VarType::Type& type);
void set_type(const paddle::framework::proto::VarType::Type& type); void set_type(paddle::framework::proto::VarType::Type type);
TensorInplaceVersion& InplaceVersionCounter() { TensorInplaceVersion& InplaceVersionCounter() {
return *inplace_version_counter_; return *inplace_version_counter_;
......
...@@ -19,7 +19,7 @@ limitations under the License. */ ...@@ -19,7 +19,7 @@ limitations under the License. */
// TODO(wilber): Do we need to use place in pten kernel? // TODO(wilber): Do we need to use place in pten kernel?
#include "paddle/pten/common/place.h" #include "paddle/pten/common/place.h"
#include "paddle/pten/core/candidate/allocator.h" #include "paddle/pten/core/allocator.h"
namespace pten { namespace pten {
class TensorBase; class TensorBase;
......
...@@ -56,18 +56,14 @@ class Storage : public intrusive_ref_counter<Storage> { ...@@ -56,18 +56,14 @@ class Storage : public intrusive_ref_counter<Storage> {
: nullptr; : nullptr;
} }
const std::shared_ptr<paddle::memory::Allocation> data_shared() const { const std::shared_ptr<paddle::memory::Allocation>& data_shared() const {
return data_; return data_;
} }
virtual void set_data_shared( virtual void set_data_shared(
const std::shared_ptr<paddle::memory::Allocation>& holder) { const std::shared_ptr<paddle::memory::Allocation>& holder) = 0;
data_ = holder;
}
std::shared_ptr<paddle::memory::Allocation> move_data_shared() { virtual std::shared_ptr<paddle::memory::Allocation>&& move_data_shared() = 0;
return std::move(data_);
}
virtual void ReallocShared(size_t n) { virtual void ReallocShared(size_t n) {
PADDLE_THROW(paddle::platform::errors::Unimplemented( PADDLE_THROW(paddle::platform::errors::Unimplemented(
...@@ -123,6 +119,18 @@ class TensorStorage : public Storage { ...@@ -123,6 +119,18 @@ class TensorStorage : public Storage {
bool OwnsMemory() const noexcept override { return true; } bool OwnsMemory() const noexcept override { return true; }
void set_data_shared(
const std::shared_ptr<paddle::memory::Allocation>& holder) override {
CHECK(holder);
data_ = holder;
size_ = holder->size();
}
std::shared_ptr<paddle::memory::Allocation>&& move_data_shared() override {
size_ = 0;
return std::move(data_);
}
private: private:
Allocator* alloc_; Allocator* alloc_;
int64_t size_{0}; int64_t size_{0};
......
...@@ -36,7 +36,7 @@ void CastKernelImpl(const CPUContext& dev_ctx, ...@@ -36,7 +36,7 @@ void CastKernelImpl(const CPUContext& dev_ctx,
auto numel = x.numel(); auto numel = x.numel();
auto* in_end = in_begin + numel; auto* in_end = in_begin + numel;
auto* out_begin = out->mutable_data<OutT>(); auto* out_begin = out->mutable_data<OutT>(dev_ctx.GetPlace());
paddle::platform::Transform<CPUContext> trans; paddle::platform::Transform<CPUContext> trans;
trans(dev_ctx, trans(dev_ctx,
......
...@@ -32,17 +32,16 @@ void Copy(const Context& dev_ctx, ...@@ -32,17 +32,16 @@ void Copy(const Context& dev_ctx,
DenseTensor* dst) { DenseTensor* dst) {
auto* src_ptr = src.data(); auto* src_ptr = src.data();
const auto& src_place = src.place(); const auto& src_place = src.place();
const auto& dst_place = dst->place();
VLOG(3) << "TensorCopy " << src.dims() << " from " << src.place() << " to " VLOG(3) << "TensorCopy " << src.dims() << " from " << src.place() << " to "
<< dst_place; << src_place;
dst->ResizeAndAllocate(src.dims()); dst->Resize(src.dims());
auto* dst_ptr = dst->mutable_data(); auto* dst_ptr = dst->mutable_data(src_place);
if (src_ptr == dst_ptr && src_place == dst_place) { if (src_ptr == dst_ptr) {
VLOG(3) << "Skip copy the same data async from " << src_place << " to " VLOG(3) << "Skip copy the same data async from " << src_place << " to "
<< dst_place; << src_place;
return; return;
} }
VLOG(4) << "src:" << src_ptr << ", dst:" << dst_ptr; VLOG(4) << "src:" << src_ptr << ", dst:" << dst_ptr;
...@@ -51,9 +50,8 @@ void Copy(const Context& dev_ctx, ...@@ -51,9 +50,8 @@ void Copy(const Context& dev_ctx,
auto size = src.numel() * auto size = src.numel() *
paddle::framework::SizeOfType(TransToProtoVarType(src.dtype())); paddle::framework::SizeOfType(TransToProtoVarType(src.dtype()));
if (paddle::platform::is_cpu_place(src_place) && if (paddle::platform::is_cpu_place(src_place)) {
paddle::platform::is_cpu_place(dst_place)) { paddle::memory::Copy(src_place, dst_ptr, src_place, src_ptr, size);
paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size);
} }
} }
......
...@@ -29,7 +29,7 @@ void DotKernel(const Context& dev_ctx, ...@@ -29,7 +29,7 @@ void DotKernel(const Context& dev_ctx,
DenseTensor* out) { DenseTensor* out) {
auto const *x_ptr = x.data<T>(), *x_ptr_ = &x_ptr[0]; auto const *x_ptr = x.data<T>(), *x_ptr_ = &x_ptr[0];
auto const *y_ptr = y.data<T>(), *y_ptr_ = &y_ptr[0]; auto const *y_ptr = y.data<T>(), *y_ptr_ = &y_ptr[0];
auto* z = out->mutable_data<T>(); auto* z = out->mutable_data<T>(dev_ctx.GetPlace());
// Loop over the total N elements of both operands while sum-reducing every // Loop over the total N elements of both operands while sum-reducing every
// B pairs along the way where B is the dimension of the least ordered axis // B pairs along the way where B is the dimension of the least ordered axis
......
...@@ -45,7 +45,10 @@ struct SameDimsAddFunctor< ...@@ -45,7 +45,10 @@ struct SameDimsAddFunctor<
const DenseTensor& y, const DenseTensor& y,
DenseTensor* z) { DenseTensor* z) {
auto blas = paddle::operators::math::GetBlas<DevCtx, T>(dev_ctx); auto blas = paddle::operators::math::GetBlas<DevCtx, T>(dev_ctx);
blas.VADD(x.numel(), x.data<T>(), y.data<T>(), z->mutable_data<T>()); blas.VADD(x.numel(),
x.data<T>(),
y.data<T>(),
z->mutable_data<T>(dev_ctx.GetPlace()));
} }
}; };
...@@ -58,7 +61,7 @@ struct SameDimsAddFunctor< ...@@ -58,7 +61,7 @@ struct SameDimsAddFunctor<
const DenseTensor& x, const DenseTensor& x,
const DenseTensor& y, const DenseTensor& y,
DenseTensor* z) { DenseTensor* z) {
z->mutable_data<T>(); z->mutable_data<T>(dev_ctx.GetPlace());
auto eigen_x = pten::EigenVector<T>::Flatten(x); auto eigen_x = pten::EigenVector<T>::Flatten(x);
auto eigen_y = pten::EigenVector<T>::Flatten(y); auto eigen_y = pten::EigenVector<T>::Flatten(y);
auto eigen_z = pten::EigenVector<T>::Flatten(*z); auto eigen_z = pten::EigenVector<T>::Flatten(*z);
...@@ -86,7 +89,10 @@ struct SameDimsSubtractFunctor< ...@@ -86,7 +89,10 @@ struct SameDimsSubtractFunctor<
const DenseTensor& y, const DenseTensor& y,
DenseTensor* z) { DenseTensor* z) {
auto blas = paddle::operators::math::GetBlas<DevCtx, T>(dev_ctx); auto blas = paddle::operators::math::GetBlas<DevCtx, T>(dev_ctx);
blas.VSUB(x.numel(), x.data<T>(), y.data<T>(), z->mutable_data<T>()); blas.VSUB(x.numel(),
x.data<T>(),
y.data<T>(),
z->mutable_data<T>(dev_ctx.GetPlace()));
} }
}; };
...@@ -141,7 +147,10 @@ struct SameDimsDivideFunctor< ...@@ -141,7 +147,10 @@ struct SameDimsDivideFunctor<
const DenseTensor& y, const DenseTensor& y,
DenseTensor* z) { DenseTensor* z) {
auto blas = paddle::operators::math::GetBlas<DevCtx, T>(dev_ctx); auto blas = paddle::operators::math::GetBlas<DevCtx, T>(dev_ctx);
blas.VDIV(x.numel(), x.data<T>(), y.data<T>(), z->mutable_data<T>()); blas.VDIV(x.numel(),
x.data<T>(),
y.data<T>(),
z->mutable_data<T>(dev_ctx.GetPlace()));
} }
}; };
...@@ -164,7 +173,10 @@ struct SameDimsMultiplyFunctor< ...@@ -164,7 +173,10 @@ struct SameDimsMultiplyFunctor<
const DenseTensor& y, const DenseTensor& y,
DenseTensor* z) { DenseTensor* z) {
auto blas = paddle::operators::math::GetBlas<DevCtx, T>(dev_ctx); auto blas = paddle::operators::math::GetBlas<DevCtx, T>(dev_ctx);
blas.VMUL(x.numel(), x.data<T>(), y.data<T>(), z->mutable_data<T>()); blas.VMUL(x.numel(),
x.data<T>(),
y.data<T>(),
z->mutable_data<T>(dev_ctx.GetPlace()));
} }
}; };
...@@ -280,7 +292,7 @@ void CommonForwardBroadcastCPU(const DenseTensor& x, ...@@ -280,7 +292,7 @@ void CommonForwardBroadcastCPU(const DenseTensor& x,
PADDLE_ENFORCE_NOT_NULL(y_data, PADDLE_ENFORCE_NOT_NULL(y_data,
paddle::platform::errors::InvalidArgument( paddle::platform::errors::InvalidArgument(
"The input Y should not be empty.")); "The input Y should not be empty."));
OutType* out_data = z->mutable_data<OutType>(); OutType* out_data = z->mutable_data<OutType>(ctx.GetPlace());
const int out_size = std::accumulate( const int out_size = std::accumulate(
out_dims_array, out_dims_array + max_dim, 1, std::multiplies<int>()); out_dims_array, out_dims_array + max_dim, 1, std::multiplies<int>());
...@@ -361,7 +373,7 @@ void ElementwiseCompute(const CPUContext& dev_ctx, ...@@ -361,7 +373,7 @@ void ElementwiseCompute(const CPUContext& dev_ctx,
int axis, int axis,
Functor func, Functor func,
DenseTensor* z) { DenseTensor* z) {
z->mutable_data<OutType>(); z->mutable_data<OutType>(dev_ctx.GetPlace());
auto x_dims = x.dims(); auto x_dims = x.dims();
auto y_dims = y.dims(); auto y_dims = y.dims();
bool is_xsize_larger = true; bool is_xsize_larger = true;
......
...@@ -37,7 +37,7 @@ namespace pten { ...@@ -37,7 +37,7 @@ namespace pten {
const DenseTensor& y, \ const DenseTensor& y, \
int axis, \ int axis, \
DenseTensor* out) { \ DenseTensor* out) { \
out->mutable_data<T>(); \ out->mutable_data<T>(dev_ctx.GetPlace()); \
if (x.dims() == y.dims()) { \ if (x.dims() == y.dims()) { \
SameDimsElementwiseCompute<SameDims##name##Functor<CPUContext, T>>()( \ SameDimsElementwiseCompute<SameDims##name##Functor<CPUContext, T>>()( \
dev_ctx, x, y, out); \ dev_ctx, x, y, out); \
...@@ -85,7 +85,7 @@ void DivideRawKernel(const Context& dev_ctx, ...@@ -85,7 +85,7 @@ void DivideRawKernel(const Context& dev_ctx,
int axis, int axis,
DenseTensor* out) { DenseTensor* out) {
// allocate memory for out // allocate memory for out
out->mutable_data<T>(); out->mutable_data<T>(dev_ctx.GetPlace());
if (x.dims() == y.dims() && std::is_floating_point<T>::value) { if (x.dims() == y.dims() && std::is_floating_point<T>::value) {
SameDimsElementwiseCompute<SameDimsDivideFunctor<CPUContext, T>>()( SameDimsElementwiseCompute<SameDimsDivideFunctor<CPUContext, T>>()(
dev_ctx, x, y, out); dev_ctx, x, y, out);
......
...@@ -119,7 +119,7 @@ void GetShuffledInput(const DeviceContext& dev_ctx, ...@@ -119,7 +119,7 @@ void GetShuffledInput(const DeviceContext& dev_ctx,
GetShuffledDim(input.dims(), &shuffled_dims, dims, &perm_axis); GetShuffledDim(input.dims(), &shuffled_dims, dims, &perm_axis);
shuffled_input->ResizeAndAllocate(shuffled_dims); shuffled_input->ResizeAndAllocate(shuffled_dims);
shuffled_input->mutable_data<OutT>(); shuffled_input->mutable_data<OutT>(dev_ctx.GetPlace());
pten::math::TransposeNormal<DeviceContext, OutT> trans; pten::math::TransposeNormal<DeviceContext, OutT> trans;
trans(dev_ctx, input, shuffled_input, perm_axis); trans(dev_ctx, input, shuffled_input, perm_axis);
...@@ -158,7 +158,7 @@ void ReduceKernelImpl(const DeviceContext& dev_ctx, ...@@ -158,7 +158,7 @@ void ReduceKernelImpl(const DeviceContext& dev_ctx,
const std::vector<int64_t>& dims, const std::vector<int64_t>& dims,
bool keep_dim, bool keep_dim,
bool reduce_all) { bool reduce_all) {
output->mutable_data<OutT>(); output->mutable_data<OutT>(dev_ctx.GetPlace());
if (reduce_all) { if (reduce_all) {
// Flatten and reduce 1-D tensor // Flatten and reduce 1-D tensor
......
...@@ -33,7 +33,7 @@ void ScaleKernel(const Context& dev_ctx, ...@@ -33,7 +33,7 @@ void ScaleKernel(const Context& dev_ctx,
bool bias_after_scale, bool bias_after_scale,
DenseTensor* out) { DenseTensor* out) {
// calc // calc
out->mutable_data<T>(); out->mutable_data<T>(dev_ctx.GetPlace());
auto eigen_out = pten::EigenVector<T>::Flatten(*out); auto eigen_out = pten::EigenVector<T>::Flatten(*out);
auto eigen_x = pten::EigenVector<T>::Flatten(x); auto eigen_x = pten::EigenVector<T>::Flatten(x);
auto& dev = *dev_ctx.eigen_device(); auto& dev = *dev_ctx.eigen_device();
......
...@@ -29,7 +29,7 @@ void EmptyKernel(const Context& dev_ctx, ...@@ -29,7 +29,7 @@ void EmptyKernel(const Context& dev_ctx,
template <typename T, typename Context> template <typename T, typename Context>
void EmptyLikeKernel(const Context& dev_ctx, DenseTensor* out) { void EmptyLikeKernel(const Context& dev_ctx, DenseTensor* out) {
out->mutable_data<T>(); out->mutable_data<T>(dev_ctx.GetPlace());
} }
} // namespace pten } // namespace pten
......
...@@ -227,7 +227,7 @@ class TransformFunctor { ...@@ -227,7 +227,7 @@ class TransformFunctor {
const bool is_xsize_larger = true) const bool is_xsize_larger = true)
: x_(x.data<T>()), : x_(x.data<T>()),
y_(y.data<T>()), y_(y.data<T>()),
z_(z->mutable_data<OutType>()), z_(z->mutable_data<OutType>(ctx.GetPlace())),
nx_(x.numel()), nx_(x.numel()),
ctx_(ctx), ctx_(ctx),
func_(func), func_(func),
...@@ -585,7 +585,7 @@ void ElementwiseCudaKernel(const KPDevice &ctx, ...@@ -585,7 +585,7 @@ void ElementwiseCudaKernel(const KPDevice &ctx,
ins_data[i] = ins[i]->data<InT>(); ins_data[i] = ins[i]->data<InT>();
} }
for (int i = 0; i < NumOuts; ++i) { for (int i = 0; i < NumOuts; ++i) {
outs_data[i] = (*outs)[i]->mutable_data<OutT>(); outs_data[i] = (*outs)[i]->mutable_data<OutT>(ctx.GetPlace());
} }
#ifdef PADDLE_WITH_XPU2 #ifdef PADDLE_WITH_XPU2
int block_size = 64; int block_size = 64;
......
...@@ -36,7 +36,7 @@ struct TransposeNormal<CPUContext, T> { ...@@ -36,7 +36,7 @@ struct TransposeNormal<CPUContext, T> {
auto in_stride = pten::framework::stride(in.dims()); auto in_stride = pten::framework::stride(in.dims());
auto out_stride = pten::framework::stride(out->dims()); auto out_stride = pten::framework::stride(out->dims());
const T* in_ptr = in.data<T>(); const T* in_ptr = in.data<T>();
T* out_ptr = out->mutable_data<T>(); T* out_ptr = out->mutable_data<T>(dev_ctx.GetPlace());
auto transpose_helper = [&](int64_t beg, int64_t end) { auto transpose_helper = [&](int64_t beg, int64_t end) {
for (int64_t out_idx = beg; out_idx < end; ++out_idx) { for (int64_t out_idx = beg; out_idx < end; ++out_idx) {
...@@ -63,11 +63,8 @@ DEFINE_CPU_TRANS_NORMAL(bool); ...@@ -63,11 +63,8 @@ DEFINE_CPU_TRANS_NORMAL(bool);
DEFINE_CPU_TRANS_NORMAL(int8_t); DEFINE_CPU_TRANS_NORMAL(int8_t);
DEFINE_CPU_TRANS_NORMAL(uint8_t); DEFINE_CPU_TRANS_NORMAL(uint8_t);
DEFINE_CPU_TRANS_NORMAL(int16_t); DEFINE_CPU_TRANS_NORMAL(int16_t);
DEFINE_CPU_TRANS_NORMAL(uint16_t);
DEFINE_CPU_TRANS_NORMAL(int32_t); DEFINE_CPU_TRANS_NORMAL(int32_t);
DEFINE_CPU_TRANS_NORMAL(uint32_t);
DEFINE_CPU_TRANS_NORMAL(int64_t); DEFINE_CPU_TRANS_NORMAL(int64_t);
DEFINE_CPU_TRANS_NORMAL(uint64_t);
DEFINE_CPU_TRANS_NORMAL(float); DEFINE_CPU_TRANS_NORMAL(float);
DEFINE_CPU_TRANS_NORMAL(double); DEFINE_CPU_TRANS_NORMAL(double);
DEFINE_CPU_TRANS_NORMAL(paddle::platform::float16); DEFINE_CPU_TRANS_NORMAL(paddle::platform::float16);
......
...@@ -61,7 +61,7 @@ struct TransposeNormal<GPUContext, T> { ...@@ -61,7 +61,7 @@ struct TransposeNormal<GPUContext, T> {
auto in_stride = pten::framework::stride(in.dims()); auto in_stride = pten::framework::stride(in.dims());
auto out_stride = pten::framework::stride(out->dims()); auto out_stride = pten::framework::stride(out->dims());
auto* in_ptr = in.data<T>(); auto* in_ptr = in.data<T>();
auto* out_ptr = out->mutable_data<T>(); auto* out_ptr = out->mutable_data<T>(dev_ctx.GetPlace());
// copy in_stride, out_stride, axis to gpu device // copy in_stride, out_stride, axis to gpu device
const paddle::platform::CUDAPlace& cuda_place = dev_ctx.GetPlace(); const paddle::platform::CUDAPlace& cuda_place = dev_ctx.GetPlace();
...@@ -110,11 +110,8 @@ DEFINE_GPU_TRANS_NORMAL(bool); ...@@ -110,11 +110,8 @@ DEFINE_GPU_TRANS_NORMAL(bool);
DEFINE_GPU_TRANS_NORMAL(int8_t); DEFINE_GPU_TRANS_NORMAL(int8_t);
DEFINE_GPU_TRANS_NORMAL(uint8_t); DEFINE_GPU_TRANS_NORMAL(uint8_t);
DEFINE_GPU_TRANS_NORMAL(int16_t); DEFINE_GPU_TRANS_NORMAL(int16_t);
DEFINE_GPU_TRANS_NORMAL(uint16_t);
DEFINE_GPU_TRANS_NORMAL(int32_t); DEFINE_GPU_TRANS_NORMAL(int32_t);
DEFINE_GPU_TRANS_NORMAL(uint32_t);
DEFINE_GPU_TRANS_NORMAL(int64_t); DEFINE_GPU_TRANS_NORMAL(int64_t);
DEFINE_GPU_TRANS_NORMAL(uint64_t);
DEFINE_GPU_TRANS_NORMAL(float); DEFINE_GPU_TRANS_NORMAL(float);
DEFINE_GPU_TRANS_NORMAL(double); DEFINE_GPU_TRANS_NORMAL(double);
DEFINE_GPU_TRANS_NORMAL(paddle::platform::float16); DEFINE_GPU_TRANS_NORMAL(paddle::platform::float16);
......
...@@ -43,7 +43,7 @@ void CastCUDAKernelImpl(const GPUContext& dev_ctx, ...@@ -43,7 +43,7 @@ void CastCUDAKernelImpl(const GPUContext& dev_ctx,
std::vector<DenseTensor*> outputs; std::vector<DenseTensor*> outputs;
inputs.emplace_back(&x); inputs.emplace_back(&x);
outputs.emplace_back(out); outputs.emplace_back(out);
out->mutable_data<OutT>(); out->mutable_data<OutT>(dev_ctx.GetPlace());
pten::funcs::LaunchSameDimsElementwiseCudaKernel<ElementwiseType::kUnary, pten::funcs::LaunchSameDimsElementwiseCudaKernel<ElementwiseType::kUnary,
InT, InT,
OutT>( OutT>(
......
...@@ -43,7 +43,7 @@ void Copy(const Context& dev_ctx, ...@@ -43,7 +43,7 @@ void Copy(const Context& dev_ctx,
<< dst_place; << dst_place;
dst->ResizeAndAllocate(src.dims()); dst->ResizeAndAllocate(src.dims());
auto* dst_ptr = dst->mutable_data(); auto* dst_ptr = dst->mutable_data(dst_place);
if (src_ptr == dst_ptr && src_place == dst_place) { if (src_ptr == dst_ptr && src_place == dst_place) {
VLOG(3) << "Skip copy the same data async from " << src_place << " to " VLOG(3) << "Skip copy the same data async from " << src_place << " to "
......
...@@ -29,7 +29,7 @@ void DotKernel(const Context& dev_ctx, ...@@ -29,7 +29,7 @@ void DotKernel(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const DenseTensor& y, const DenseTensor& y,
DenseTensor* out) { DenseTensor* out) {
out->mutable_data<T>(); out->mutable_data<T>(dev_ctx.GetPlace());
if (1 == out->dims().size()) { if (1 == out->dims().size()) {
auto eigen_out = pten::EigenScalar<T>::From(*out); auto eigen_out = pten::EigenScalar<T>::From(*out);
auto eigen_x = pten::EigenVector<T>::Flatten(x); auto eigen_x = pten::EigenVector<T>::Flatten(x);
......
...@@ -350,7 +350,7 @@ void LaunchKernel(const KPDevice &ctx, ...@@ -350,7 +350,7 @@ void LaunchKernel(const KPDevice &ctx,
pten::framework::Array<_ptr_ OutT *, NumOuts> outs_data; pten::framework::Array<_ptr_ OutT *, NumOuts> outs_data;
for (int i = 0; i < NumOuts; ++i) { for (int i = 0; i < NumOuts; ++i) {
outs_data[i] = (*outs)[i]->mutable_data<OutT>(); outs_data[i] = (*outs)[i]->mutable_data<OutT>(ctx.GetPlace());
} }
for (int i = 0; i < Arity; i++) { for (int i = 0; i < Arity; i++) {
......
...@@ -47,7 +47,7 @@ namespace pten { ...@@ -47,7 +47,7 @@ namespace pten {
inputs.emplace_back(&x); \ inputs.emplace_back(&x); \
inputs.emplace_back(&y); \ inputs.emplace_back(&y); \
outputs.emplace_back(out); \ outputs.emplace_back(out); \
out->mutable_data<T>(); \ out->mutable_data<T>(dev_ctx.GetPlace()); \
LaunchElementwiseCudaKernel<ElementwiseType::kBinary, T, T>( \ LaunchElementwiseCudaKernel<ElementwiseType::kBinary, T, T>( \
dev_ctx, inputs, &outputs, axis, funcs::name##Functor<T>()); \ dev_ctx, inputs, &outputs, axis, funcs::name##Functor<T>()); \
} }
......
...@@ -328,7 +328,7 @@ struct ReduceConfig { ...@@ -328,7 +328,7 @@ struct ReduceConfig {
if (should_reduce_again) { if (should_reduce_again) {
tmp->ResizeAndAllocate(pten::framework::make_ddim( tmp->ResizeAndAllocate(pten::framework::make_ddim(
{static_cast<int64_t>(left_num * grid.z * grid.y * sizeof(Ty))})); {static_cast<int64_t>(left_num * grid.z * grid.y * sizeof(Ty))}));
output_data = tmp->mutable_data<Ty>(); output_data = tmp->mutable_data<Ty>(place);
} else { } else {
output_data = y_data; output_data = y_data;
} }
...@@ -1032,7 +1032,7 @@ static ...@@ -1032,7 +1032,7 @@ static
pten::framework::make_ddim( pten::framework::make_ddim(
{static_cast<int64_t>(temp_storage_bytes)}))); {static_cast<int64_t>(temp_storage_bytes)})));
auto* temp_storage = tmp.mutable_data<uint8_t>(); auto* temp_storage = tmp.mutable_data<uint8_t>(place);
cub::DeviceReduce::Reduce(temp_storage, cub::DeviceReduce::Reduce(temp_storage,
temp_storage_bytes, temp_storage_bytes,
...@@ -1070,8 +1070,7 @@ void TensorReduceFunctorImpl(const pten::DenseTensor& x, ...@@ -1070,8 +1070,7 @@ void TensorReduceFunctorImpl(const pten::DenseTensor& x,
const TransformOp& transform, const TransformOp& transform,
const std::vector<int>& origin_reduce_dims, const std::vector<int>& origin_reduce_dims,
gpuStream_t stream) { gpuStream_t stream) {
// Allocate memory y->mutable_data<Ty>(x.place());
y->mutable_data<Ty>();
auto x_dim = pten::framework::vectorize<int>(x.dims()); auto x_dim = pten::framework::vectorize<int>(x.dims());
auto config = ReduceConfig<Ty>(origin_reduce_dims, x_dim); auto config = ReduceConfig<Ty>(origin_reduce_dims, x_dim);
...@@ -1088,7 +1087,7 @@ void TensorReduceFunctorImpl(const pten::DenseTensor& x, ...@@ -1088,7 +1087,7 @@ void TensorReduceFunctorImpl(const pten::DenseTensor& x,
pten::DenseTensorMeta(y->dtype(), tmp_ddim, y->layout())); pten::DenseTensorMeta(y->dtype(), tmp_ddim, y->layout()));
auto x_data = x.data<Tx>(); auto x_data = x.data<Tx>();
auto y_data = y->mutable_data<Ty>(); auto y_data = y->data<Ty>();
auto* dev_ctx = static_cast<paddle::platform::CUDADeviceContext*>( auto* dev_ctx = static_cast<paddle::platform::CUDADeviceContext*>(
paddle::platform::DeviceContextPool::Instance().Get(x.place())); paddle::platform::DeviceContextPool::Instance().Get(x.place()));
......
...@@ -54,7 +54,7 @@ void ScaleKernel(const Context& dev_ctx, ...@@ -54,7 +54,7 @@ void ScaleKernel(const Context& dev_ctx,
std::vector<DenseTensor*> outputs; std::vector<DenseTensor*> outputs;
inputs.emplace_back(&x); inputs.emplace_back(&x);
outputs.emplace_back(out); outputs.emplace_back(out);
out->mutable_data<T>(); out->mutable_data<T>(dev_ctx.GetPlace());
pten::funcs::LaunchSameDimsElementwiseCudaKernel<ElementwiseType::kUnary, pten::funcs::LaunchSameDimsElementwiseCudaKernel<ElementwiseType::kUnary,
T, T,
T>( T>(
......
...@@ -26,7 +26,7 @@ void ConjKernel(const Context& dev_ctx, ...@@ -26,7 +26,7 @@ void ConjKernel(const Context& dev_ctx,
DenseTensor* out) { DenseTensor* out) {
auto numel = x.numel(); auto numel = x.numel();
auto* x_data = x.data<T>(); auto* x_data = x.data<T>();
auto* out_data = out->mutable_data<T>(); auto* out_data = out->mutable_data<T>(dev_ctx.GetPlace());
paddle::platform::ForRange<Context> for_range(dev_ctx, numel); paddle::platform::ForRange<Context> for_range(dev_ctx, numel);
paddle::operators::math::ConjFunctor<T> functor(x_data, numel, out_data); paddle::operators::math::ConjFunctor<T> functor(x_data, numel, out_data);
......
...@@ -73,7 +73,7 @@ struct DotGradFunction<DeviceContext, ...@@ -73,7 +73,7 @@ struct DotGradFunction<DeviceContext,
auto dout = EigenMatrix<T>::From(*tensor_dout); auto dout = EigenMatrix<T>::From(*tensor_dout);
if (tensor_dx) { if (tensor_dx) {
tensor_dx->mutable_data<T>(); tensor_dx->mutable_data<T>(ctx.GetPlace());
auto y = EigenMatrix<T>::From(*tensor_y); auto y = EigenMatrix<T>::From(*tensor_y);
auto& dev = *ctx.eigen_device(); auto& dev = *ctx.eigen_device();
Eigen::DSizes<int, 2> size(1, tensor_dx->dims()[1]); Eigen::DSizes<int, 2> size(1, tensor_dx->dims()[1]);
...@@ -85,7 +85,7 @@ struct DotGradFunction<DeviceContext, ...@@ -85,7 +85,7 @@ struct DotGradFunction<DeviceContext,
} }
if (tensor_dy) { if (tensor_dy) {
tensor_dy->mutable_data<T>(); tensor_dy->mutable_data<T>(ctx.GetPlace());
auto x = EigenMatrix<T>::From(*tensor_x); auto x = EigenMatrix<T>::From(*tensor_x);
auto& dev = *ctx.eigen_device(); auto& dev = *ctx.eigen_device();
Eigen::DSizes<int, 2> size(1, tensor_dy->dims()[1]); Eigen::DSizes<int, 2> size(1, tensor_dy->dims()[1]);
...@@ -100,7 +100,7 @@ struct DotGradFunction<DeviceContext, ...@@ -100,7 +100,7 @@ struct DotGradFunction<DeviceContext,
const auto* data_dout = tensor_dout->data<T>(); const auto* data_dout = tensor_dout->data<T>();
if (tensor_dx) { if (tensor_dx) {
auto* data_dx = tensor_dx->mutable_data<T>(); auto* data_dx = tensor_dx->mutable_data<T>(ctx.GetPlace());
const auto* data_y = tensor_y->data<T>(); const auto* data_y = tensor_y->data<T>();
const DDim& dim = tensor_x->dims(); const DDim& dim = tensor_x->dims();
size_t N = static_cast<size_t>(pten::framework::product(dim)); size_t N = static_cast<size_t>(pten::framework::product(dim));
...@@ -115,7 +115,7 @@ struct DotGradFunction<DeviceContext, ...@@ -115,7 +115,7 @@ struct DotGradFunction<DeviceContext,
} }
if (tensor_dy) { if (tensor_dy) {
auto* data_dy = tensor_dy->mutable_data<T>(); auto* data_dy = tensor_dy->mutable_data<T>(ctx.GetPlace());
const auto* data_x = tensor_x->data<T>(); const auto* data_x = tensor_x->data<T>();
const DDim& dim = tensor_y->dims(); const DDim& dim = tensor_y->dims();
size_t N = static_cast<size_t>(pten::framework::product(dim)); size_t N = static_cast<size_t>(pten::framework::product(dim));
...@@ -164,7 +164,7 @@ struct DotGradFunction<DeviceContext, ...@@ -164,7 +164,7 @@ struct DotGradFunction<DeviceContext,
auto dout = EigenMatrix<T>::From(*tensor_dout); auto dout = EigenMatrix<T>::From(*tensor_dout);
if (tensor_dx) { if (tensor_dx) {
tensor_dx->mutable_data<T>(); tensor_dx->mutable_data<T>(ctx.GetPlace());
auto y = EigenMatrix<T>::From(*tensor_y); auto y = EigenMatrix<T>::From(*tensor_y);
auto dx = EigenMatrix<T>::From(*tensor_dx); auto dx = EigenMatrix<T>::From(*tensor_dx);
auto& dev = *ctx.eigen_device(); auto& dev = *ctx.eigen_device();
...@@ -173,7 +173,7 @@ struct DotGradFunction<DeviceContext, ...@@ -173,7 +173,7 @@ struct DotGradFunction<DeviceContext,
} }
if (tensor_dy) { if (tensor_dy) {
tensor_dy->mutable_data<T>(); tensor_dy->mutable_data<T>(ctx.GetPlace());
auto x = EigenMatrix<T>::From(*tensor_x); auto x = EigenMatrix<T>::From(*tensor_x);
auto dy = EigenMatrix<T>::From(*tensor_dy); auto dy = EigenMatrix<T>::From(*tensor_dy);
auto& dev = *ctx.eigen_device(); auto& dev = *ctx.eigen_device();
...@@ -189,7 +189,7 @@ struct DotGradFunction<DeviceContext, ...@@ -189,7 +189,7 @@ struct DotGradFunction<DeviceContext,
auto const B = d[d.size() - 1]; auto const B = d[d.size() - 1];
if (tensor_dx) { if (tensor_dx) {
auto* dx = tensor_dx->mutable_data<T>(); auto* dx = tensor_dx->mutable_data<T>(ctx.GetPlace());
for (auto j = 0; j < N / B; ++j) { for (auto j = 0; j < N / B; ++j) {
auto const ss = dz[j]; auto const ss = dz[j];
for (auto i = 0; i < B; ++i) *dx++ = *y++ * ss; for (auto i = 0; i < B; ++i) *dx++ = *y++ * ss;
...@@ -197,7 +197,7 @@ struct DotGradFunction<DeviceContext, ...@@ -197,7 +197,7 @@ struct DotGradFunction<DeviceContext,
} }
if (tensor_dy) { if (tensor_dy) {
auto* dy = tensor_dy->mutable_data<T>(); auto* dy = tensor_dy->mutable_data<T>(ctx.GetPlace());
for (auto j = 0; j < N / B; ++j) { for (auto j = 0; j < N / B; ++j) {
auto const ss = dz[j]; auto const ss = dz[j];
for (auto i = 0; i < B; i++) *dy++ = *x++ * ss; for (auto i = 0; i < B; i++) *dy++ = *x++ * ss;
...@@ -272,7 +272,7 @@ struct DotDoubleGradFunction<DeviceContext, ...@@ -272,7 +272,7 @@ struct DotDoubleGradFunction<DeviceContext,
const auto* data_dout = tensor_dout->data<T>(); const auto* data_dout = tensor_dout->data<T>();
if (tensor_dx) { if (tensor_dx) {
auto* data_dx = tensor_dx->mutable_data<T>(); auto* data_dx = tensor_dx->mutable_data<T>(ctx.GetPlace());
const auto* data_ddy = tensor_ddy->data<T>(); const auto* data_ddy = tensor_ddy->data<T>();
const DDim& dim = tensor_dx->dims(); const DDim& dim = tensor_dx->dims();
size_t N = static_cast<size_t>(product(dim)); size_t N = static_cast<size_t>(product(dim));
...@@ -287,7 +287,7 @@ struct DotDoubleGradFunction<DeviceContext, ...@@ -287,7 +287,7 @@ struct DotDoubleGradFunction<DeviceContext,
} }
if (tensor_dy) { if (tensor_dy) {
auto* data_dy = tensor_dy->mutable_data<T>(); auto* data_dy = tensor_dy->mutable_data<T>(ctx.GetPlace());
const auto* data_ddx = tensor_ddx->data<T>(); const auto* data_ddx = tensor_ddx->data<T>();
const DDim& dim = tensor_dy->dims(); const DDim& dim = tensor_dy->dims();
size_t N = static_cast<size_t>(product(dim)); size_t N = static_cast<size_t>(product(dim));
...@@ -302,7 +302,7 @@ struct DotDoubleGradFunction<DeviceContext, ...@@ -302,7 +302,7 @@ struct DotDoubleGradFunction<DeviceContext,
} }
if (tensor_ddout) { if (tensor_ddout) {
auto* data_ddout = tensor_ddout->mutable_data<T>(); auto* data_ddout = tensor_ddout->mutable_data<T>(ctx.GetPlace());
auto* data_x = tensor_x->data<T>(); auto* data_x = tensor_x->data<T>();
auto* data_y = tensor_y->data<T>(); auto* data_y = tensor_y->data<T>();
auto* data_ddx = tensor_ddx->data<T>(); auto* data_ddx = tensor_ddx->data<T>();
...@@ -351,7 +351,7 @@ struct DotDoubleGradFunction<DeviceContext, ...@@ -351,7 +351,7 @@ struct DotDoubleGradFunction<DeviceContext,
auto& dev = *ctx.eigen_device(); auto& dev = *ctx.eigen_device();
auto dout = EigenVector<T>::Flatten(*tensor_dout); auto dout = EigenVector<T>::Flatten(*tensor_dout);
if (tensor_dx) { if (tensor_dx) {
tensor_dx->mutable_data<T>(); tensor_dx->mutable_data<T>(ctx.GetPlace());
auto ddy = EigenVector<T>::Flatten(*tensor_ddy); auto ddy = EigenVector<T>::Flatten(*tensor_ddy);
Eigen::DSizes<int, 1> size(tensor_ddy->numel()); Eigen::DSizes<int, 1> size(tensor_ddy->numel());
auto dx = EigenVector<T>::Flatten(*tensor_dx); auto dx = EigenVector<T>::Flatten(*tensor_dx);
...@@ -359,7 +359,7 @@ struct DotDoubleGradFunction<DeviceContext, ...@@ -359,7 +359,7 @@ struct DotDoubleGradFunction<DeviceContext,
} }
if (tensor_dy) { if (tensor_dy) {
tensor_dy->mutable_data<T>(); tensor_dy->mutable_data<T>(ctx.GetPlace());
auto ddx = EigenVector<T>::Flatten(*tensor_ddx); auto ddx = EigenVector<T>::Flatten(*tensor_ddx);
Eigen::DSizes<int, 1> size(tensor_ddx->numel()); Eigen::DSizes<int, 1> size(tensor_ddx->numel());
...@@ -368,7 +368,7 @@ struct DotDoubleGradFunction<DeviceContext, ...@@ -368,7 +368,7 @@ struct DotDoubleGradFunction<DeviceContext,
} }
if (tensor_ddout) { if (tensor_ddout) {
tensor_ddout->mutable_data<T>(); tensor_ddout->mutable_data<T>(ctx.GetPlace());
auto x = EigenVector<T>::Flatten(*tensor_x); auto x = EigenVector<T>::Flatten(*tensor_x);
auto y = EigenVector<T>::Flatten(*tensor_y); auto y = EigenVector<T>::Flatten(*tensor_y);
auto ddx = EigenVector<T>::Flatten(*tensor_ddx); auto ddx = EigenVector<T>::Flatten(*tensor_ddx);
...@@ -381,7 +381,7 @@ struct DotDoubleGradFunction<DeviceContext, ...@@ -381,7 +381,7 @@ struct DotDoubleGradFunction<DeviceContext,
const auto* data_dout = tensor_dout->data<T>(); const auto* data_dout = tensor_dout->data<T>();
if (tensor_dx) { if (tensor_dx) {
auto* data_dx = tensor_dx->mutable_data<T>(); auto* data_dx = tensor_dx->mutable_data<T>(ctx.GetPlace());
const auto* data_ddy = tensor_ddy->data<T>(); const auto* data_ddy = tensor_ddy->data<T>();
const DDim& dim = tensor_dx->dims(); const DDim& dim = tensor_dx->dims();
size_t N = static_cast<size_t>(product(dim)); size_t N = static_cast<size_t>(product(dim));
...@@ -396,7 +396,7 @@ struct DotDoubleGradFunction<DeviceContext, ...@@ -396,7 +396,7 @@ struct DotDoubleGradFunction<DeviceContext,
} }
if (tensor_dy) { if (tensor_dy) {
auto* data_dy = tensor_dy->mutable_data<T>(); auto* data_dy = tensor_dy->mutable_data<T>(ctx.GetPlace());
const auto* data_ddx = tensor_ddx->data<T>(); const auto* data_ddx = tensor_ddx->data<T>();
const DDim& dim = tensor_dy->dims(); const DDim& dim = tensor_dy->dims();
size_t N = static_cast<size_t>(product(dim)); size_t N = static_cast<size_t>(product(dim));
...@@ -411,7 +411,7 @@ struct DotDoubleGradFunction<DeviceContext, ...@@ -411,7 +411,7 @@ struct DotDoubleGradFunction<DeviceContext,
} }
if (tensor_ddout) { if (tensor_ddout) {
auto* data_ddout = tensor_ddout->mutable_data<T>(); auto* data_ddout = tensor_ddout->mutable_data<T>(ctx.GetPlace());
auto* data_x = tensor_x->data<T>(); auto* data_x = tensor_x->data<T>();
auto* data_y = tensor_y->data<T>(); auto* data_y = tensor_y->data<T>();
auto* data_ddx = tensor_ddx->data<T>(); auto* data_ddx = tensor_ddx->data<T>();
...@@ -552,7 +552,7 @@ struct DotTripleGradFunction<DeviceContext, ...@@ -552,7 +552,7 @@ struct DotTripleGradFunction<DeviceContext,
const auto* data_d_ddout = in_tensor_d_ddout->data<T>(); const auto* data_d_ddout = in_tensor_d_ddout->data<T>();
if (out_tensor_d_x) { if (out_tensor_d_x) {
auto* data_d_x = out_tensor_d_x->mutable_data<T>(); auto* data_d_x = out_tensor_d_x->mutable_data<T>(ctx.GetPlace());
const auto* data_ddy = in_tensor_ddy->data<T>(); const auto* data_ddy = in_tensor_ddy->data<T>();
const DDim& dim = out_tensor_d_x->dims(); const DDim& dim = out_tensor_d_x->dims();
...@@ -567,7 +567,7 @@ struct DotTripleGradFunction<DeviceContext, ...@@ -567,7 +567,7 @@ struct DotTripleGradFunction<DeviceContext,
} }
if (out_tensor_d_y) { if (out_tensor_d_y) {
auto* data_d_y = out_tensor_d_y->mutable_data<T>(); auto* data_d_y = out_tensor_d_y->mutable_data<T>(ctx.GetPlace());
const auto* data_ddx = in_tensor_ddx->data<T>(); const auto* data_ddx = in_tensor_ddx->data<T>();
const DDim& dim = out_tensor_d_y->dims(); const DDim& dim = out_tensor_d_y->dims();
...@@ -582,7 +582,7 @@ struct DotTripleGradFunction<DeviceContext, ...@@ -582,7 +582,7 @@ struct DotTripleGradFunction<DeviceContext,
} }
if (out_tensor_d_dout) { if (out_tensor_d_dout) {
auto* data_d_dout = out_tensor_d_dout->mutable_data<T>(); auto* data_d_dout = out_tensor_d_dout->mutable_data<T>(ctx.GetPlace());
auto* data_ddx = in_tensor_ddx->data<T>(); auto* data_ddx = in_tensor_ddx->data<T>();
auto* data_ddy = in_tensor_ddy->data<T>(); auto* data_ddy = in_tensor_ddy->data<T>();
auto* data_d_dx = in_tensor_d_dx->data<T>(); auto* data_d_dx = in_tensor_d_dx->data<T>();
...@@ -613,7 +613,7 @@ struct DotTripleGradFunction<DeviceContext, ...@@ -613,7 +613,7 @@ struct DotTripleGradFunction<DeviceContext,
} }
if (out_tensor_d_ddx) { if (out_tensor_d_ddx) {
auto* data_d_ddx = out_tensor_d_ddx->mutable_data<T>(); auto* data_d_ddx = out_tensor_d_ddx->mutable_data<T>(ctx.GetPlace());
auto* data_dout = in_tensor_dout->data<T>(); auto* data_dout = in_tensor_dout->data<T>();
auto* data_d_dy = in_tensor_d_dy->data<T>(); auto* data_d_dy = in_tensor_d_dy->data<T>();
auto* data_y = in_tensor_y->data<T>(); auto* data_y = in_tensor_y->data<T>();
...@@ -633,7 +633,7 @@ struct DotTripleGradFunction<DeviceContext, ...@@ -633,7 +633,7 @@ struct DotTripleGradFunction<DeviceContext,
} }
if (out_tensor_d_ddy) { if (out_tensor_d_ddy) {
auto* data_d_ddy = out_tensor_d_ddy->mutable_data<T>(); auto* data_d_ddy = out_tensor_d_ddy->mutable_data<T>(ctx.GetPlace());
auto* data_dout = in_tensor_dout->data<T>(); auto* data_dout = in_tensor_dout->data<T>();
auto* data_d_dx = in_tensor_d_dx->data<T>(); auto* data_d_dx = in_tensor_d_dx->data<T>();
auto* data_x = in_tensor_x->data<T>(); auto* data_x = in_tensor_x->data<T>();
...@@ -678,7 +678,7 @@ struct DotTripleGradFunction<DeviceContext, ...@@ -678,7 +678,7 @@ struct DotTripleGradFunction<DeviceContext,
auto& dev = *ctx.eigen_device(); auto& dev = *ctx.eigen_device();
auto d_ddout = EigenVector<T>::Flatten(*in_tensor_d_ddout); auto d_ddout = EigenVector<T>::Flatten(*in_tensor_d_ddout);
if (out_tensor_d_x) { if (out_tensor_d_x) {
out_tensor_d_x->mutable_data<T>(); out_tensor_d_x->mutable_data<T>(ctx.GetPlace());
auto ddy = EigenVector<T>::Flatten(*in_tensor_ddy); auto ddy = EigenVector<T>::Flatten(*in_tensor_ddy);
Eigen::DSizes<int, 1> size(in_tensor_ddy->numel()); Eigen::DSizes<int, 1> size(in_tensor_ddy->numel());
auto d_x = EigenVector<T>::Flatten(*out_tensor_d_x); auto d_x = EigenVector<T>::Flatten(*out_tensor_d_x);
...@@ -686,7 +686,7 @@ struct DotTripleGradFunction<DeviceContext, ...@@ -686,7 +686,7 @@ struct DotTripleGradFunction<DeviceContext,
} }
if (out_tensor_d_y) { if (out_tensor_d_y) {
out_tensor_d_y->mutable_data<T>(); out_tensor_d_y->mutable_data<T>(ctx.GetPlace());
auto ddx = EigenVector<T>::Flatten(*in_tensor_ddx); auto ddx = EigenVector<T>::Flatten(*in_tensor_ddx);
Eigen::DSizes<int, 1> size(in_tensor_ddx->numel()); Eigen::DSizes<int, 1> size(in_tensor_ddx->numel());
...@@ -695,7 +695,7 @@ struct DotTripleGradFunction<DeviceContext, ...@@ -695,7 +695,7 @@ struct DotTripleGradFunction<DeviceContext,
} }
if (out_tensor_d_dout) { if (out_tensor_d_dout) {
out_tensor_d_dout->mutable_data<T>(); out_tensor_d_dout->mutable_data<T>(ctx.GetPlace());
auto ddx = EigenVector<T>::Flatten(*in_tensor_ddx); auto ddx = EigenVector<T>::Flatten(*in_tensor_ddx);
auto ddy = EigenVector<T>::Flatten(*in_tensor_ddy); auto ddy = EigenVector<T>::Flatten(*in_tensor_ddy);
auto d_dx = EigenVector<T>::Flatten(*in_tensor_d_dx); auto d_dx = EigenVector<T>::Flatten(*in_tensor_d_dx);
...@@ -705,7 +705,7 @@ struct DotTripleGradFunction<DeviceContext, ...@@ -705,7 +705,7 @@ struct DotTripleGradFunction<DeviceContext,
} }
if (out_tensor_d_ddx) { if (out_tensor_d_ddx) {
out_tensor_d_ddx->mutable_data<T>(); out_tensor_d_ddx->mutable_data<T>(ctx.GetPlace());
auto dout = EigenVector<T>::Flatten(*in_tensor_dout); auto dout = EigenVector<T>::Flatten(*in_tensor_dout);
auto y = EigenVector<T>::Flatten(*in_tensor_y); auto y = EigenVector<T>::Flatten(*in_tensor_y);
auto d_ddout = EigenVector<T>::Flatten(*in_tensor_d_ddout); auto d_ddout = EigenVector<T>::Flatten(*in_tensor_d_ddout);
...@@ -717,7 +717,7 @@ struct DotTripleGradFunction<DeviceContext, ...@@ -717,7 +717,7 @@ struct DotTripleGradFunction<DeviceContext,
} }
if (out_tensor_d_ddy) { if (out_tensor_d_ddy) {
out_tensor_d_ddy->mutable_data<T>(); out_tensor_d_ddy->mutable_data<T>(ctx.GetPlace());
auto dout = EigenVector<T>::Flatten(*in_tensor_dout); auto dout = EigenVector<T>::Flatten(*in_tensor_dout);
auto x = EigenVector<T>::Flatten(*in_tensor_x); auto x = EigenVector<T>::Flatten(*in_tensor_x);
auto d_ddout = EigenVector<T>::Flatten(*in_tensor_d_ddout); auto d_ddout = EigenVector<T>::Flatten(*in_tensor_d_ddout);
...@@ -732,7 +732,7 @@ struct DotTripleGradFunction<DeviceContext, ...@@ -732,7 +732,7 @@ struct DotTripleGradFunction<DeviceContext,
const auto* data_d_ddout = in_tensor_d_ddout->data<T>(); const auto* data_d_ddout = in_tensor_d_ddout->data<T>();
if (out_tensor_d_x) { if (out_tensor_d_x) {
auto* data_d_x = out_tensor_d_x->mutable_data<T>(); auto* data_d_x = out_tensor_d_x->mutable_data<T>(ctx.GetPlace());
const auto* data_ddy = in_tensor_ddy->data<T>(); const auto* data_ddy = in_tensor_ddy->data<T>();
const DDim& dim = out_tensor_d_x->dims(); const DDim& dim = out_tensor_d_x->dims();
...@@ -747,7 +747,7 @@ struct DotTripleGradFunction<DeviceContext, ...@@ -747,7 +747,7 @@ struct DotTripleGradFunction<DeviceContext,
} }
if (out_tensor_d_y) { if (out_tensor_d_y) {
auto* data_d_y = out_tensor_d_y->mutable_data<T>(); auto* data_d_y = out_tensor_d_y->mutable_data<T>(ctx.GetPlace());
const auto* data_ddx = in_tensor_ddx->data<T>(); const auto* data_ddx = in_tensor_ddx->data<T>();
const DDim& dim = out_tensor_d_y->dims(); const DDim& dim = out_tensor_d_y->dims();
...@@ -762,7 +762,7 @@ struct DotTripleGradFunction<DeviceContext, ...@@ -762,7 +762,7 @@ struct DotTripleGradFunction<DeviceContext,
} }
if (out_tensor_d_dout) { if (out_tensor_d_dout) {
auto* data_d_dout = out_tensor_d_dout->mutable_data<T>(); auto* data_d_dout = out_tensor_d_dout->mutable_data<T>(ctx.GetPlace());
auto* data_ddx = in_tensor_ddx->data<T>(); auto* data_ddx = in_tensor_ddx->data<T>();
auto* data_ddy = in_tensor_ddy->data<T>(); auto* data_ddy = in_tensor_ddy->data<T>();
auto* data_d_dx = in_tensor_d_dx->data<T>(); auto* data_d_dx = in_tensor_d_dx->data<T>();
...@@ -790,7 +790,7 @@ struct DotTripleGradFunction<DeviceContext, ...@@ -790,7 +790,7 @@ struct DotTripleGradFunction<DeviceContext,
} }
if (out_tensor_d_ddx) { if (out_tensor_d_ddx) {
auto* data_d_ddx = out_tensor_d_ddx->mutable_data<T>(); auto* data_d_ddx = out_tensor_d_ddx->mutable_data<T>(ctx.GetPlace());
auto* data_dout = in_tensor_dout->data<T>(); auto* data_dout = in_tensor_dout->data<T>();
auto* data_d_dy = in_tensor_d_dy->data<T>(); auto* data_d_dy = in_tensor_d_dy->data<T>();
auto* data_y = in_tensor_y->data<T>(); auto* data_y = in_tensor_y->data<T>();
...@@ -809,7 +809,7 @@ struct DotTripleGradFunction<DeviceContext, ...@@ -809,7 +809,7 @@ struct DotTripleGradFunction<DeviceContext,
} }
if (out_tensor_d_ddy) { if (out_tensor_d_ddy) {
auto* data_d_ddy = out_tensor_d_ddy->mutable_data<T>(); auto* data_d_ddy = out_tensor_d_ddy->mutable_data<T>(ctx.GetPlace());
auto* data_dout = in_tensor_dout->data<T>(); auto* data_dout = in_tensor_dout->data<T>();
auto* data_d_dx = in_tensor_d_dx->data<T>(); auto* data_d_dx = in_tensor_d_dx->data<T>();
auto* data_x = in_tensor_x->data<T>(); auto* data_x = in_tensor_x->data<T>();
...@@ -838,10 +838,10 @@ void DotGradKernel(const Context& dev_ctx, ...@@ -838,10 +838,10 @@ void DotGradKernel(const Context& dev_ctx,
DenseTensor* dx, DenseTensor* dx,
DenseTensor* dy) { DenseTensor* dy) {
if (dx) { if (dx) {
dx->mutable_data<T>(); dx->mutable_data<T>(dev_ctx.GetPlace());
} }
if (dy) { if (dy) {
dy->mutable_data<T>(); dy->mutable_data<T>(dev_ctx.GetPlace());
} }
DotGradFunction<Context, T>()(dev_ctx, &x, &y, &dout, dx, dy); DotGradFunction<Context, T>()(dev_ctx, &x, &y, &dout, dx, dy);
} }
...@@ -857,13 +857,13 @@ void DotDoubleGradKernel(const Context& dev_ctx, ...@@ -857,13 +857,13 @@ void DotDoubleGradKernel(const Context& dev_ctx,
DenseTensor* dy, DenseTensor* dy,
DenseTensor* ddout) { DenseTensor* ddout) {
if (dx) { if (dx) {
dx->mutable_data<T>(); dx->mutable_data<T>(dev_ctx.GetPlace());
} }
if (dy) { if (dy) {
dy->mutable_data<T>(); dy->mutable_data<T>(dev_ctx.GetPlace());
} }
if (ddout) { if (ddout) {
ddout->mutable_data<T>(); ddout->mutable_data<T>(dev_ctx.GetPlace());
} }
DotDoubleGradFunction<Context, T>()( DotDoubleGradFunction<Context, T>()(
dev_ctx, &x, &y, &dout, ddx, ddy, dx, dy, ddout); dev_ctx, &x, &y, &dout, ddx, ddy, dx, dy, ddout);
...@@ -885,19 +885,19 @@ void DotTripleGradKernel(const Context& dev_ctx, ...@@ -885,19 +885,19 @@ void DotTripleGradKernel(const Context& dev_ctx,
DenseTensor* d_ddy, DenseTensor* d_ddy,
DenseTensor* d_dout) { DenseTensor* d_dout) {
if (d_x) { if (d_x) {
d_x->mutable_data<T>(); d_x->mutable_data<T>(dev_ctx.GetPlace());
} }
if (d_y) { if (d_y) {
d_y->mutable_data<T>(); d_y->mutable_data<T>(dev_ctx.GetPlace());
} }
if (d_ddx) { if (d_ddx) {
d_ddx->mutable_data<T>(); d_ddx->mutable_data<T>(dev_ctx.GetPlace());
} }
if (d_ddy) { if (d_ddy) {
d_ddy->mutable_data<T>(); d_ddy->mutable_data<T>(dev_ctx.GetPlace());
} }
if (d_dout) { if (d_dout) {
d_dout->mutable_data<T>(); d_dout->mutable_data<T>(dev_ctx.GetPlace());
} }
DotTripleGradFunction<Context, T>()(dev_ctx, DotTripleGradFunction<Context, T>()(dev_ctx,
......
...@@ -26,7 +26,7 @@ namespace pten { ...@@ -26,7 +26,7 @@ namespace pten {
template <typename T, typename Context, typename VType> template <typename T, typename Context, typename VType>
void FullValue(const Context& dev_ctx, DenseTensor* tensor, VType val) { void FullValue(const Context& dev_ctx, DenseTensor* tensor, VType val) {
tensor->mutable_data<T>(); tensor->mutable_data<T>(dev_ctx.GetPlace());
auto t = pten::EigenVector<T>::Flatten(*tensor); auto t = pten::EigenVector<T>::Flatten(*tensor);
t.device(*dev_ctx.eigen_device()) = t.constant(static_cast<T>(val)); t.device(*dev_ctx.eigen_device()) = t.constant(static_cast<T>(val));
} }
......
...@@ -105,7 +105,7 @@ void MatMul(const Context& dev_ctx, ...@@ -105,7 +105,7 @@ void MatMul(const Context& dev_ctx,
bool trans_b, bool trans_b,
DenseTensor* out, DenseTensor* out,
bool flag = false) { bool flag = false) {
out->mutable_data<T>(); out->mutable_data<T>(dev_ctx.GetPlace());
auto blas = paddle::operators::math::GetBlas<Context, T>(dev_ctx); auto blas = paddle::operators::math::GetBlas<Context, T>(dev_ctx);
auto mat_dim_a = auto mat_dim_a =
paddle::operators::math::CreateMatrixDescriptor(a.dims(), 0, trans_a); paddle::operators::math::CreateMatrixDescriptor(a.dims(), 0, trans_a);
...@@ -123,7 +123,7 @@ void MatMul(const Context& dev_ctx, ...@@ -123,7 +123,7 @@ void MatMul(const Context& dev_ctx,
b.data<T>(), b.data<T>(),
mat_dim_b, mat_dim_b,
static_cast<T>(1), static_cast<T>(1),
out->mutable_data<T>(), out->data<T>(),
static_cast<T>(flag)); static_cast<T>(flag));
} }
...@@ -242,8 +242,8 @@ void MatmulGradKernel(const Context& dev_ctx, ...@@ -242,8 +242,8 @@ void MatmulGradKernel(const Context& dev_ctx,
// Case1 : x's or y's dim = 1 // Case1 : x's or y's dim = 1
if (x_ndim == 1 && y_ndim == 1) { if (x_ndim == 1 && y_ndim == 1) {
if (dx) dx->mutable_data<T>(); if (dx) dx->mutable_data<T>(dev_ctx.GetPlace());
if (dy) dy->mutable_data<T>(); if (dy) dy->mutable_data<T>(dev_ctx.GetPlace());
if (out_grad.numel() == 1) { if (out_grad.numel() == 1) {
DotGradFunction<Context, T>()(dev_ctx, &x, &y, &out_grad, dx, dy); DotGradFunction<Context, T>()(dev_ctx, &x, &y, &out_grad, dx, dy);
return; return;
......
...@@ -118,7 +118,7 @@ void MatMulFunction(const Context& dev_ctx, ...@@ -118,7 +118,7 @@ void MatMulFunction(const Context& dev_ctx,
N)); N));
VLOG(3) << "MatMul's case 1"; VLOG(3) << "MatMul's case 1";
Out->Resize({1}); Out->Resize({1});
Out->mutable_data<T>(); Out->mutable_data<T>(dev_ctx.GetPlace());
blas.GEMM(CblasNoTrans, blas.GEMM(CblasNoTrans,
CblasTrans, CblasTrans,
1, 1,
...@@ -128,7 +128,7 @@ void MatMulFunction(const Context& dev_ctx, ...@@ -128,7 +128,7 @@ void MatMulFunction(const Context& dev_ctx,
y_data, y_data,
x_data, x_data,
static_cast<T>(flag), static_cast<T>(flag),
Out->mutable_data<T>()); Out->data<T>());
return; return;
} }
...@@ -165,7 +165,7 @@ void MatMulFunction(const Context& dev_ctx, ...@@ -165,7 +165,7 @@ void MatMulFunction(const Context& dev_ctx,
out_dims.back() = y_dims.back(); out_dims.back() = y_dims.back();
} }
Out->ResizeAndAllocate(pten::framework::make_ddim(out_dims)); Out->ResizeAndAllocate(pten::framework::make_ddim(out_dims));
Out->mutable_data<T>(); Out->mutable_data<T>(dev_ctx.GetPlace());
if (trans_y) { if (trans_y) {
const int M = Y.numel() / N; const int M = Y.numel() / N;
VLOG(3) << "MatMul's case 2"; VLOG(3) << "MatMul's case 2";
...@@ -176,7 +176,7 @@ void MatMulFunction(const Context& dev_ctx, ...@@ -176,7 +176,7 @@ void MatMulFunction(const Context& dev_ctx,
y_data, y_data,
x_data, x_data,
static_cast<T>(flag), static_cast<T>(flag),
Out->mutable_data<T>()); Out->data<T>());
} else { } else {
const int M = y_dims[y_ndim - 1]; const int M = y_dims[y_ndim - 1];
const int batch_size = Y.numel() / (M * N); const int batch_size = Y.numel() / (M * N);
...@@ -189,7 +189,7 @@ void MatMulFunction(const Context& dev_ctx, ...@@ -189,7 +189,7 @@ void MatMulFunction(const Context& dev_ctx,
y_data, y_data,
x_data, x_data,
static_cast<T>(flag), static_cast<T>(flag),
Out->mutable_data<T>()); Out->data<T>());
} else { } else {
VLOG(3) << "MatMul's case 4"; VLOG(3) << "MatMul's case 4";
blas.BatchedGEMM(CblasTrans, blas.BatchedGEMM(CblasTrans,
...@@ -201,7 +201,7 @@ void MatMulFunction(const Context& dev_ctx, ...@@ -201,7 +201,7 @@ void MatMulFunction(const Context& dev_ctx,
y_data, y_data,
x_data, x_data,
static_cast<T>(flag), static_cast<T>(flag),
Out->mutable_data<T>(), Out->data<T>(),
batch_size, batch_size,
M * N, M * N,
0); 0);
...@@ -243,7 +243,7 @@ void MatMulFunction(const Context& dev_ctx, ...@@ -243,7 +243,7 @@ void MatMulFunction(const Context& dev_ctx,
std::copy_n(x_dims.cbegin(), x_ndim - 1, out_dims.begin()); std::copy_n(x_dims.cbegin(), x_ndim - 1, out_dims.begin());
} }
Out->ResizeAndAllocate(pten::framework::make_ddim(out_dims)); Out->ResizeAndAllocate(pten::framework::make_ddim(out_dims));
Out->mutable_data<T>(); Out->mutable_data<T>(dev_ctx.GetPlace());
if (trans_x) { if (trans_x) {
const int M = x_dims[x_ndim - 1]; const int M = x_dims[x_ndim - 1];
...@@ -257,7 +257,7 @@ void MatMulFunction(const Context& dev_ctx, ...@@ -257,7 +257,7 @@ void MatMulFunction(const Context& dev_ctx,
x_data, x_data,
y_data, y_data,
static_cast<T>(flag), static_cast<T>(flag),
Out->mutable_data<T>()); Out->data<T>());
} else { } else {
VLOG(3) << "MatMul's case 6"; VLOG(3) << "MatMul's case 6";
blas.BatchedGEMM(CblasTrans, blas.BatchedGEMM(CblasTrans,
...@@ -269,7 +269,7 @@ void MatMulFunction(const Context& dev_ctx, ...@@ -269,7 +269,7 @@ void MatMulFunction(const Context& dev_ctx,
x_data, x_data,
y_data, y_data,
static_cast<T>(flag), static_cast<T>(flag),
Out->mutable_data<T>(), Out->data<T>(),
batch_size, batch_size,
M * N, M * N,
0); 0);
...@@ -284,7 +284,7 @@ void MatMulFunction(const Context& dev_ctx, ...@@ -284,7 +284,7 @@ void MatMulFunction(const Context& dev_ctx,
x_data, x_data,
y_data, y_data,
static_cast<T>(flag), static_cast<T>(flag),
Out->mutable_data<T>()); Out->data<T>());
} }
return; return;
} }
...@@ -331,7 +331,7 @@ void MatMulFunction(const Context& dev_ctx, ...@@ -331,7 +331,7 @@ void MatMulFunction(const Context& dev_ctx,
out_broadcast_dims[ndim - 1] = N; out_broadcast_dims[ndim - 1] = N;
Out->ResizeAndAllocate(pten::framework::make_ddim(out_broadcast_dims)); Out->ResizeAndAllocate(pten::framework::make_ddim(out_broadcast_dims));
Out->mutable_data<T>(); Out->mutable_data<T>(dev_ctx.GetPlace());
const int batch_dim = ndim - 2; const int batch_dim = ndim - 2;
// broadcast message // broadcast message
...@@ -367,7 +367,7 @@ void MatMulFunction(const Context& dev_ctx, ...@@ -367,7 +367,7 @@ void MatMulFunction(const Context& dev_ctx,
x_data, x_data,
y_data, y_data,
static_cast<T>(flag), static_cast<T>(flag),
Out->mutable_data<T>()); Out->data<T>());
} else if (x_batch_size == 1) { } else if (x_batch_size == 1) {
if (M == 1 && trans_y) { if (M == 1 && trans_y) {
VLOG(3) << "MatMul's case 9"; VLOG(3) << "MatMul's case 9";
...@@ -378,7 +378,7 @@ void MatMulFunction(const Context& dev_ctx, ...@@ -378,7 +378,7 @@ void MatMulFunction(const Context& dev_ctx,
y_data, y_data,
x_data, x_data,
static_cast<T>(flag), static_cast<T>(flag),
Out->mutable_data<T>()); Out->data<T>());
} else { } else {
VLOG(3) << "MatMul's case 10"; VLOG(3) << "MatMul's case 10";
blas.BatchedGEMM(trans_x ? CblasTrans : CblasNoTrans, blas.BatchedGEMM(trans_x ? CblasTrans : CblasNoTrans,
...@@ -390,7 +390,7 @@ void MatMulFunction(const Context& dev_ctx, ...@@ -390,7 +390,7 @@ void MatMulFunction(const Context& dev_ctx,
x_data, x_data,
y_data, y_data,
static_cast<T>(flag), static_cast<T>(flag),
Out->mutable_data<T>(), Out->data<T>(),
out_batch_size, out_batch_size,
0, 0,
K * N); K * N);
...@@ -407,7 +407,7 @@ void MatMulFunction(const Context& dev_ctx, ...@@ -407,7 +407,7 @@ void MatMulFunction(const Context& dev_ctx,
x_data, x_data,
y_data, y_data,
static_cast<T>(flag), static_cast<T>(flag),
Out->mutable_data<T>()); Out->data<T>());
} else { } else {
VLOG(3) << "MatMul's case 12"; VLOG(3) << "MatMul's case 12";
blas.BatchedGEMM(CblasTrans, blas.BatchedGEMM(CblasTrans,
...@@ -419,7 +419,7 @@ void MatMulFunction(const Context& dev_ctx, ...@@ -419,7 +419,7 @@ void MatMulFunction(const Context& dev_ctx,
x_data, x_data,
y_data, y_data,
static_cast<T>(flag), static_cast<T>(flag),
Out->mutable_data<T>(), Out->data<T>(),
out_batch_size, out_batch_size,
M * K, M * K,
0); 0);
...@@ -435,7 +435,7 @@ void MatMulFunction(const Context& dev_ctx, ...@@ -435,7 +435,7 @@ void MatMulFunction(const Context& dev_ctx,
x_data, x_data,
y_data, y_data,
static_cast<T>(flag), static_cast<T>(flag),
Out->mutable_data<T>(), Out->data<T>(),
out_batch_size, out_batch_size,
M * K, M * K,
K * N); K * N);
...@@ -454,7 +454,7 @@ void MatMulFunction(const Context& dev_ctx, ...@@ -454,7 +454,7 @@ void MatMulFunction(const Context& dev_ctx,
x_ptr[i] = x_data + x_index * M * K; x_ptr[i] = x_data + x_index * M * K;
y_ptr[i] = y_data + y_index * K * N; y_ptr[i] = y_data + y_index * K * N;
out_ptr[i] = Out->mutable_data<T>() + i * M * N; out_ptr[i] = Out->data<T>() + i * M * N;
IndexIncreaseFromDims(batch_dim, out_broadcast_dims.data(), index.data()); IndexIncreaseFromDims(batch_dim, out_broadcast_dims.data(), index.data());
} }
VLOG(3) << "MatMul's case 14"; VLOG(3) << "MatMul's case 14";
......
...@@ -26,7 +26,7 @@ template <typename T, typename Context> ...@@ -26,7 +26,7 @@ template <typename T, typename Context>
void SignKernel(const Context& dev_ctx, void SignKernel(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
DenseTensor* out) { DenseTensor* out) {
out->mutable_data<T>(); out->mutable_data<T>(dev_ctx.GetPlace());
auto eigen_out = pten::EigenVector<T>::Flatten(*out); auto eigen_out = pten::EigenVector<T>::Flatten(*out);
auto eigen_x = pten::EigenVector<T>::Flatten(x); auto eigen_x = pten::EigenVector<T>::Flatten(x);
......
...@@ -27,12 +27,15 @@ void ReshapeKernel(const Context& dev_ctx, ...@@ -27,12 +27,15 @@ void ReshapeKernel(const Context& dev_ctx,
const ScalarArray& shape, const ScalarArray& shape,
DenseTensor* out) { DenseTensor* out) {
auto out_meta = InferMetaFromVecValue(x.meta(), shape.GetData()); auto out_meta = InferMetaFromVecValue(x.meta(), shape.GetData());
if (x.data() == out->data() && x.numel() == out->numel()) { if (x.initialized() && x.Holder() == out->Holder()) {
out->ResizeAndAllocate(out_meta.dims); out->ResizeAndAllocate(out_meta.dims);
return; return;
} }
out->Resize(x.dims());
out->mutable_data(x.place());
pten::Copy(dev_ctx, x, false, out); pten::Copy(dev_ctx, x, false, out);
out->ResizeAndAllocate(out_meta.dims); out->Resize(out_meta.dims);
out->ResetLoD(x.lod()); out->ResetLoD(x.lod());
} }
......
...@@ -30,7 +30,7 @@ void Copy(const Context& dev_ctx, ...@@ -30,7 +30,7 @@ void Copy(const Context& dev_ctx,
bool blocking, bool blocking,
DenseTensor* dst) { DenseTensor* dst) {
auto* src_ptr = src.data(); auto* src_ptr = src.data();
auto* dst_ptr = dst->mutable_data(); auto* dst_ptr = dst->mutable_data(dev_ctx.GetPlace());
const auto& src_place = src.place(); const auto& src_place = src.place();
const auto& dst_place = dst->place(); const auto& dst_place = dst->place();
......
...@@ -37,7 +37,8 @@ TEST(API, cast) { ...@@ -37,7 +37,8 @@ TEST(API, cast) {
pten::DenseTensorMeta(pten::DataType::FLOAT32, pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 4}), framework::make_ddim({3, 4}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* dense_x_data = dense_x->mutable_data<float>(); auto* dense_x_data =
dense_x->mutable_data<float>(paddle::platform::CPUPlace());
for (int i = 0; i < dense_x->numel(); i++) { for (int i = 0; i < dense_x->numel(); i++) {
dense_x_data[i] = i; dense_x_data[i] = i;
......
...@@ -37,7 +37,8 @@ TEST(API, conj) { ...@@ -37,7 +37,8 @@ TEST(API, conj) {
pten::DenseTensorMeta(pten::DataType::COMPLEX64, pten::DenseTensorMeta(pten::DataType::COMPLEX64,
framework::make_ddim({3, 10}), framework::make_ddim({3, 10}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* dense_x_data = dense_x->mutable_data<paddle::complex64>(); auto* dense_x_data =
dense_x->mutable_data<paddle::complex64>(paddle::platform::CPUPlace());
for (size_t i = 0; i < 3; ++i) { for (size_t i = 0; i < 3; ++i) {
for (size_t j = 0; j < 10; ++j) { for (size_t j = 0; j < 10; ++j) {
......
...@@ -37,14 +37,16 @@ TEST(API, dot) { ...@@ -37,14 +37,16 @@ TEST(API, dot) {
pten::DenseTensorMeta(pten::DataType::FLOAT32, pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 10}), framework::make_ddim({3, 10}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* dense_x_data = dense_x->mutable_data<float>(); auto* dense_x_data =
dense_x->mutable_data<float>(paddle::platform::CPUPlace());
auto dense_y = std::make_shared<pten::DenseTensor>( auto dense_y = std::make_shared<pten::DenseTensor>(
alloc.get(), alloc.get(),
pten::DenseTensorMeta(pten::DataType::FLOAT32, pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 10}), framework::make_ddim({3, 10}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* dense_y_data = dense_y->mutable_data<float>(); auto* dense_y_data =
dense_y->mutable_data<float>(paddle::platform::CPUPlace());
float sum[3] = {0.0, 0.0, 0.0}; float sum[3] = {0.0, 0.0, 0.0};
for (size_t i = 0; i < 3; ++i) { for (size_t i = 0; i < 3; ++i) {
......
...@@ -37,14 +37,16 @@ TEST(API, add) { ...@@ -37,14 +37,16 @@ TEST(API, add) {
pten::DenseTensorMeta(pten::DataType::FLOAT32, pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 10}), framework::make_ddim({3, 10}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* dense_x_data = dense_x->mutable_data<float>(); auto* dense_x_data =
dense_x->mutable_data<float>(paddle::platform::CPUPlace());
auto dense_y = std::make_shared<pten::DenseTensor>( auto dense_y = std::make_shared<pten::DenseTensor>(
alloc.get(), alloc.get(),
pten::DenseTensorMeta(pten::DataType::FLOAT32, pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({10}), framework::make_ddim({10}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* dense_y_data = dense_y->mutable_data<float>(); auto* dense_y_data =
dense_y->mutable_data<float>(paddle::platform::CPUPlace());
float sum[3][10] = {0.0}; float sum[3][10] = {0.0};
for (size_t i = 0; i < 3; ++i) { for (size_t i = 0; i < 3; ++i) {
...@@ -91,14 +93,16 @@ TEST(API, subtract) { ...@@ -91,14 +93,16 @@ TEST(API, subtract) {
pten::DenseTensorMeta(pten::DataType::FLOAT32, pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 10}), framework::make_ddim({3, 10}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* dense_x_data = dense_x->mutable_data<float>(); auto* dense_x_data =
dense_x->mutable_data<float>(paddle::platform::CPUPlace());
auto dense_y = std::make_shared<pten::DenseTensor>( auto dense_y = std::make_shared<pten::DenseTensor>(
alloc.get(), alloc.get(),
pten::DenseTensorMeta(pten::DataType::FLOAT32, pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({10}), framework::make_ddim({10}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* dense_y_data = dense_y->mutable_data<float>(); auto* dense_y_data =
dense_y->mutable_data<float>(paddle::platform::CPUPlace());
float sub[3][10] = {0.0}; float sub[3][10] = {0.0};
for (size_t i = 0; i < 3; ++i) { for (size_t i = 0; i < 3; ++i) {
...@@ -145,14 +149,16 @@ TEST(API, divide) { ...@@ -145,14 +149,16 @@ TEST(API, divide) {
pten::DenseTensorMeta(pten::DataType::FLOAT32, pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 10}), framework::make_ddim({3, 10}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* dense_x_data = dense_x->mutable_data<float>(); auto* dense_x_data =
dense_x->mutable_data<float>(paddle::platform::CPUPlace());
auto dense_y = std::make_shared<pten::DenseTensor>( auto dense_y = std::make_shared<pten::DenseTensor>(
alloc.get(), alloc.get(),
pten::DenseTensorMeta(pten::DataType::FLOAT32, pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({10}), framework::make_ddim({10}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* dense_y_data = dense_y->mutable_data<float>(); auto* dense_y_data =
dense_y->mutable_data<float>(paddle::platform::CPUPlace());
float div[3][10] = {0.0}; float div[3][10] = {0.0};
for (size_t i = 0; i < 3; ++i) { for (size_t i = 0; i < 3; ++i) {
...@@ -199,14 +205,16 @@ TEST(API, multiply) { ...@@ -199,14 +205,16 @@ TEST(API, multiply) {
pten::DenseTensorMeta(pten::DataType::FLOAT32, pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 10}), framework::make_ddim({3, 10}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* dense_x_data = dense_x->mutable_data<float>(); auto* dense_x_data =
dense_x->mutable_data<float>(paddle::platform::CPUPlace());
auto dense_y = std::make_shared<pten::DenseTensor>( auto dense_y = std::make_shared<pten::DenseTensor>(
alloc.get(), alloc.get(),
pten::DenseTensorMeta(pten::DataType::FLOAT32, pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({10}), framework::make_ddim({10}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* dense_y_data = dense_y->mutable_data<float>(); auto* dense_y_data =
dense_y->mutable_data<float>(paddle::platform::CPUPlace());
float mul[3][10] = {0.0}; float mul[3][10] = {0.0};
for (size_t i = 0; i < 3; ++i) { for (size_t i = 0; i < 3; ++i) {
......
...@@ -47,10 +47,8 @@ TEST(API, empty_like) { ...@@ -47,10 +47,8 @@ TEST(API, empty_like) {
ASSERT_EQ(out.dims().size(), 2); ASSERT_EQ(out.dims().size(), 2);
ASSERT_EQ(out.dims()[0], 3); ASSERT_EQ(out.dims()[0], 3);
ASSERT_EQ(out.numel(), 6); ASSERT_EQ(out.numel(), 6);
ASSERT_EQ(out.is_cpu(), true);
ASSERT_EQ(out.type(), pten::DataType::FLOAT32); ASSERT_EQ(out.type(), pten::DataType::FLOAT32);
ASSERT_EQ(out.layout(), pten::DataLayout::NCHW); ASSERT_EQ(out.layout(), pten::DataLayout::NCHW);
ASSERT_EQ(out.initialized(), true);
} }
TEST(API, empty1) { TEST(API, empty1) {
...@@ -63,7 +61,8 @@ TEST(API, empty1) { ...@@ -63,7 +61,8 @@ TEST(API, empty1) {
pten::DenseTensorMeta(pten::DataType::INT64, pten::DenseTensorMeta(pten::DataType::INT64,
framework::make_ddim({2}), framework::make_ddim({2}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* shape_data = dense_shape->mutable_data<int64_t>(); auto* shape_data =
dense_shape->mutable_data<int64_t>(paddle::platform::CPUPlace());
shape_data[0] = 2; shape_data[0] = 2;
shape_data[1] = 3; shape_data[1] = 3;
...@@ -76,10 +75,8 @@ TEST(API, empty1) { ...@@ -76,10 +75,8 @@ TEST(API, empty1) {
ASSERT_EQ(out.shape().size(), 2UL); ASSERT_EQ(out.shape().size(), 2UL);
ASSERT_EQ(out.shape()[0], 2); ASSERT_EQ(out.shape()[0], 2);
ASSERT_EQ(out.numel(), 6); ASSERT_EQ(out.numel(), 6);
ASSERT_EQ(out.is_cpu(), true);
ASSERT_EQ(out.type(), pten::DataType::FLOAT32); ASSERT_EQ(out.type(), pten::DataType::FLOAT32);
ASSERT_EQ(out.layout(), pten::DataLayout::NCHW); ASSERT_EQ(out.layout(), pten::DataLayout::NCHW);
ASSERT_EQ(out.initialized(), true);
} }
TEST(API, empty2) { TEST(API, empty2) {
...@@ -91,7 +88,7 @@ TEST(API, empty2) { ...@@ -91,7 +88,7 @@ TEST(API, empty2) {
pten::DenseTensorMeta(pten::DataType::INT32, pten::DenseTensorMeta(pten::DataType::INT32,
framework::make_ddim({1}), framework::make_ddim({1}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
dense_scalar->mutable_data<int32_t>()[0] = 2; dense_scalar->mutable_data<int32_t>(paddle::platform::CPUPlace())[0] = 2;
paddle::experimental::Tensor shape_scalar1(dense_scalar); paddle::experimental::Tensor shape_scalar1(dense_scalar);
paddle::experimental::Tensor shape_scalar2(dense_scalar); paddle::experimental::Tensor shape_scalar2(dense_scalar);
...@@ -103,10 +100,8 @@ TEST(API, empty2) { ...@@ -103,10 +100,8 @@ TEST(API, empty2) {
ASSERT_EQ(out.shape().size(), 2UL); ASSERT_EQ(out.shape().size(), 2UL);
ASSERT_EQ(out.shape()[0], 2); ASSERT_EQ(out.shape()[0], 2);
ASSERT_EQ(out.numel(), 4); ASSERT_EQ(out.numel(), 4);
ASSERT_EQ(out.is_cpu(), true);
ASSERT_EQ(out.type(), pten::DataType::FLOAT32); ASSERT_EQ(out.type(), pten::DataType::FLOAT32);
ASSERT_EQ(out.layout(), pten::DataLayout::NCHW); ASSERT_EQ(out.layout(), pten::DataLayout::NCHW);
ASSERT_EQ(out.initialized(), true);
} }
TEST(API, empty3) { TEST(API, empty3) {
...@@ -117,10 +112,8 @@ TEST(API, empty3) { ...@@ -117,10 +112,8 @@ TEST(API, empty3) {
ASSERT_EQ(out.shape().size(), 2UL); ASSERT_EQ(out.shape().size(), 2UL);
ASSERT_EQ(out.shape()[0], 2); ASSERT_EQ(out.shape()[0], 2);
ASSERT_EQ(out.numel(), 6); ASSERT_EQ(out.numel(), 6);
ASSERT_EQ(out.is_cpu(), true);
ASSERT_EQ(out.type(), pten::DataType::INT32); ASSERT_EQ(out.type(), pten::DataType::INT32);
ASSERT_EQ(out.layout(), pten::DataLayout::NCHW); ASSERT_EQ(out.layout(), pten::DataLayout::NCHW);
ASSERT_EQ(out.initialized(), true);
} }
} // namespace tests } // namespace tests
......
...@@ -37,7 +37,8 @@ TEST(API, full_like) { ...@@ -37,7 +37,8 @@ TEST(API, full_like) {
pten::DenseTensorMeta(pten::DataType::FLOAT32, pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 2}), framework::make_ddim({3, 2}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* dense_x_data = dense_x->mutable_data<float>(); auto* dense_x_data =
dense_x->mutable_data<float>(paddle::platform::CPUPlace());
dense_x_data[0] = 0; dense_x_data[0] = 0;
float val = 1.0; float val = 1.0;
...@@ -72,7 +73,8 @@ TEST(API, zeros_like) { ...@@ -72,7 +73,8 @@ TEST(API, zeros_like) {
pten::DenseTensorMeta(pten::DataType::FLOAT32, pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 2}), framework::make_ddim({3, 2}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* dense_x_data = dense_x->mutable_data<float>(); auto* dense_x_data =
dense_x->mutable_data<float>(paddle::platform::CPUPlace());
dense_x_data[0] = 1; dense_x_data[0] = 1;
paddle::experimental::Tensor x(dense_x); paddle::experimental::Tensor x(dense_x);
...@@ -105,7 +107,8 @@ TEST(API, ones_like) { ...@@ -105,7 +107,8 @@ TEST(API, ones_like) {
pten::DenseTensorMeta(pten::DataType::INT32, pten::DenseTensorMeta(pten::DataType::INT32,
framework::make_ddim({3, 2}), framework::make_ddim({3, 2}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* dense_x_data = dense_x->mutable_data<int32_t>(); auto* dense_x_data =
dense_x->mutable_data<int32_t>(paddle::platform::CPUPlace());
dense_x_data[0] = 0; dense_x_data[0] = 0;
paddle::experimental::Tensor x(dense_x); paddle::experimental::Tensor x(dense_x);
...@@ -139,7 +142,8 @@ TEST(API, full1) { ...@@ -139,7 +142,8 @@ TEST(API, full1) {
pten::DenseTensorMeta(pten::DataType::INT64, pten::DenseTensorMeta(pten::DataType::INT64,
framework::make_ddim({2}), framework::make_ddim({2}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* shape_data = dense_shape->mutable_data<int64_t>(); auto* shape_data =
dense_shape->mutable_data<int64_t>(paddle::platform::CPUPlace());
shape_data[0] = 2; shape_data[0] = 2;
shape_data[1] = 3; shape_data[1] = 3;
...@@ -148,7 +152,7 @@ TEST(API, full1) { ...@@ -148,7 +152,7 @@ TEST(API, full1) {
pten::DenseTensorMeta(pten::DataType::FLOAT32, pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({1}), framework::make_ddim({1}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
dense_scalar->mutable_data<float>()[0] = 1.0; dense_scalar->mutable_data<float>(paddle::platform::CPUPlace())[0] = 1.0;
paddle::experimental::Tensor value(dense_scalar); paddle::experimental::Tensor value(dense_scalar);
...@@ -185,7 +189,7 @@ TEST(API, full2) { ...@@ -185,7 +189,7 @@ TEST(API, full2) {
pten::DenseTensorMeta(pten::DataType::INT32, pten::DenseTensorMeta(pten::DataType::INT32,
framework::make_ddim({1}), framework::make_ddim({1}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
dense_scalar->mutable_data<int32_t>()[0] = 2; dense_scalar->mutable_data<int>(paddle::platform::CPUPlace())[0] = 2;
paddle::experimental::Tensor shape_scalar1(dense_scalar); paddle::experimental::Tensor shape_scalar1(dense_scalar);
paddle::experimental::Tensor shape_scalar2(dense_scalar); paddle::experimental::Tensor shape_scalar2(dense_scalar);
......
...@@ -37,7 +37,8 @@ TEST(API, flatten) { ...@@ -37,7 +37,8 @@ TEST(API, flatten) {
pten::DenseTensorMeta(pten::DataType::FLOAT32, pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 2, 2, 3}), framework::make_ddim({3, 2, 2, 3}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* dense_x_data = dense_x->mutable_data<float>(); auto* dense_x_data =
dense_x->mutable_data<float>(paddle::platform::CPUPlace());
for (int i = 0; i < dense_x->numel(); i++) { for (int i = 0; i < dense_x->numel(); i++) {
dense_x_data[i] = i; dense_x_data[i] = i;
......
...@@ -38,14 +38,16 @@ TEST(API, matmul_cpu) { ...@@ -38,14 +38,16 @@ TEST(API, matmul_cpu) {
framework::make_ddim({3, 3}), framework::make_ddim({3, 3}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* dense_x_data = dense_x->mutable_data<float>(); auto* dense_x_data =
dense_x->mutable_data<float>(paddle::platform::CPUPlace());
auto dense_y = std::make_shared<pten::DenseTensor>( auto dense_y = std::make_shared<pten::DenseTensor>(
alloc.get(), alloc.get(),
pten::DenseTensorMeta(pten::DataType::FLOAT32, pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 3}), framework::make_ddim({3, 3}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* dense_y_data = dense_y->mutable_data<float>(); auto* dense_y_data =
dense_y->mutable_data<float>(paddle::platform::CPUPlace());
for (size_t i = 0; i < 9; ++i) { for (size_t i = 0; i < 9; ++i) {
dense_x_data[i] = 1.0; dense_x_data[i] = 1.0;
...@@ -87,14 +89,14 @@ TEST(API, matmul_cuda) { ...@@ -87,14 +89,14 @@ TEST(API, matmul_cuda) {
framework::make_ddim({3, 3}), framework::make_ddim({3, 3}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* ref_x_data = ref_x->mutable_data<float>(); auto* ref_x_data = ref_x->mutable_data<float>(paddle::platform::CPUPlace());
auto ref_y = std::make_shared<pten::DenseTensor>( auto ref_y = std::make_shared<pten::DenseTensor>(
alloc_cpu.get(), alloc_cpu.get(),
pten::DenseTensorMeta(pten::DataType::FLOAT32, pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 3}), framework::make_ddim({3, 3}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* ref_y_data = ref_y->mutable_data<float>(); auto* ref_y_data = ref_y->mutable_data<float>(paddle::platform::CPUPlace());
for (size_t i = 0; i < 9; ++i) { for (size_t i = 0; i < 9; ++i) {
ref_x_data[i] = 1.0; ref_x_data[i] = 1.0;
......
...@@ -37,7 +37,8 @@ TEST(API, mean) { ...@@ -37,7 +37,8 @@ TEST(API, mean) {
pten::DenseTensorMeta(pten::DataType::FLOAT32, pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 4}), framework::make_ddim({3, 4}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* dense_x_data = dense_x->mutable_data<float>(); auto* dense_x_data =
dense_x->mutable_data<float>(paddle::platform::CPUPlace());
float sum = 0.0; float sum = 0.0;
for (size_t i = 0; i < 12; ++i) { for (size_t i = 0; i < 12; ++i) {
......
...@@ -58,11 +58,11 @@ void TestAPIPlace() { ...@@ -58,11 +58,11 @@ void TestAPIPlace() {
std::vector<int64_t> tensor_shape = {5, 5}; std::vector<int64_t> tensor_shape = {5, 5};
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
auto t1 = experimental::Tensor(paddle::PlaceType::kGPU, tensor_shape); auto t1 = experimental::Tensor(paddle::PlaceType::kGPU, tensor_shape);
t1.mutable_data<float>(); t1.mutable_data<float>(paddle::PlaceType::kGPU);
CHECK((paddle::PlaceType::kGPU == t1.place())); CHECK((paddle::PlaceType::kGPU == t1.place()));
#endif #endif
auto t2 = experimental::Tensor(paddle::PlaceType::kCPU, tensor_shape); auto t2 = experimental::Tensor(paddle::PlaceType::kCPU, tensor_shape);
t2.mutable_data<float>(); t2.mutable_data<float>(paddle::PlaceType::kCPU);
CHECK((paddle::PlaceType::kCPU == t2.place())); CHECK((paddle::PlaceType::kCPU == t2.place()));
} }
...@@ -80,29 +80,30 @@ void TestAPISlice() { ...@@ -80,29 +80,30 @@ void TestAPISlice() {
std::vector<int64_t> tensor_shape_sub2 = {1, 5, 5}; std::vector<int64_t> tensor_shape_sub2 = {1, 5, 5};
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
auto t1 = experimental::Tensor(paddle::PlaceType::kGPU, tensor_shape_origin1); auto t1 = experimental::Tensor(paddle::PlaceType::kGPU, tensor_shape_origin1);
t1.mutable_data<float>(); t1.mutable_data<float>(paddle::PlaceType::kGPU);
CHECK(t1.slice(0, 5).shape() == tensor_shape_origin1); CHECK(t1.slice(0, 5).shape() == tensor_shape_origin1);
CHECK(t1.slice(0, 3).shape() == tensor_shape_sub1); CHECK(t1.slice(0, 3).shape() == tensor_shape_sub1);
auto t2 = experimental::Tensor(paddle::PlaceType::kGPU, tensor_shape_origin2); auto t2 = experimental::Tensor(paddle::PlaceType::kGPU, tensor_shape_origin2);
t2.mutable_data<float>(); t2.mutable_data<float>(paddle::PlaceType::kGPU);
CHECK(t2.slice(4, 5).shape() == tensor_shape_sub2); CHECK(t2.slice(4, 5).shape() == tensor_shape_sub2);
#endif #endif
auto t3 = experimental::Tensor(paddle::PlaceType::kCPU, tensor_shape_origin1); auto t3 = experimental::Tensor(paddle::PlaceType::kCPU, tensor_shape_origin1);
t3.mutable_data<float>(); t3.mutable_data<float>(paddle::PlaceType::kCPU);
CHECK(t3.slice(0, 5).shape() == tensor_shape_origin1); CHECK(t3.slice(0, 5).shape() == tensor_shape_origin1);
CHECK(t3.slice(0, 3).shape() == tensor_shape_sub1); CHECK(t3.slice(0, 3).shape() == tensor_shape_sub1);
auto t4 = experimental::Tensor(paddle::PlaceType::kCPU, tensor_shape_origin2); auto t4 = experimental::Tensor(paddle::PlaceType::kCPU, tensor_shape_origin2);
t4.mutable_data<float>(); t4.mutable_data<float>(paddle::PlaceType::kCPU);
CHECK(t4.slice(4, 5).shape() == tensor_shape_sub2); CHECK(t4.slice(4, 5).shape() == tensor_shape_sub2);
// Test writing function for sliced tensor // Test writing function for sliced tensor
auto t = InitCPUTensorForTest<float>(); auto t = InitCPUTensorForTest<float>();
auto t_sliced = t.slice(0, 1); auto t_sliced = t.slice(0, 1);
auto* t_sliced_data_ptr = t_sliced.mutable_data<float>(); auto* t_sliced_data_ptr =
t_sliced.mutable_data<float>(paddle::PlaceType::kCPU);
for (int64_t i = 0; i < t_sliced.size(); i++) { for (int64_t i = 0; i < t_sliced.size(); i++) {
t_sliced_data_ptr[i] += static_cast<float>(5); t_sliced_data_ptr[i] += static_cast<float>(5);
} }
auto* t_data_ptr = t.mutable_data<float>(); auto* t_data_ptr = t.mutable_data<float>(paddle::PlaceType::kCPU);
for (int64_t i = 0; i < t_sliced.size(); i++) { for (int64_t i = 0; i < t_sliced.size(); i++) {
CHECK_EQ(t_data_ptr[i], static_cast<float>(10)); CHECK_EQ(t_data_ptr[i], static_cast<float>(10));
} }
...@@ -112,7 +113,7 @@ template <typename T> ...@@ -112,7 +113,7 @@ template <typename T>
paddle::DataType TestDtype() { paddle::DataType TestDtype() {
std::vector<int64_t> tensor_shape = {5, 5}; std::vector<int64_t> tensor_shape = {5, 5};
auto t1 = experimental::Tensor(paddle::PlaceType::kCPU, tensor_shape); auto t1 = experimental::Tensor(paddle::PlaceType::kCPU, tensor_shape);
t1.template mutable_data<T>(); t1.template mutable_data<T>(paddle::PlaceType::kCPU);
return t1.type(); return t1.type();
} }
...@@ -120,13 +121,13 @@ template <typename T> ...@@ -120,13 +121,13 @@ template <typename T>
void TestCast(paddle::DataType data_type) { void TestCast(paddle::DataType data_type) {
std::vector<int64_t> tensor_shape = {5, 5}; std::vector<int64_t> tensor_shape = {5, 5};
auto t1 = experimental::Tensor(paddle::PlaceType::kCPU, tensor_shape); auto t1 = experimental::Tensor(paddle::PlaceType::kCPU, tensor_shape);
t1.template mutable_data<T>(); t1.template mutable_data<T>(paddle::PlaceType::kCPU);
auto t2 = t1.cast(data_type); auto t2 = t1.cast(data_type);
CHECK(t2.type() == data_type); CHECK(t2.type() == data_type);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
auto tg1 = experimental::Tensor(paddle::PlaceType::kGPU); auto tg1 = experimental::Tensor(paddle::PlaceType::kGPU);
tg1.reshape(tensor_shape); tg1.reshape(tensor_shape);
tg1.template mutable_data<T>(); tg1.template mutable_data<T>(paddle::PlaceType::kGPU);
auto tg2 = tg1.cast(data_type); auto tg2 = tg1.cast(data_type);
CHECK(tg2.type() == data_type); CHECK(tg2.type() == data_type);
#endif #endif
...@@ -194,7 +195,7 @@ void GroupTestDtype() { ...@@ -194,7 +195,7 @@ void GroupTestDtype() {
void TestInitilized() { void TestInitilized() {
experimental::Tensor test_tensor(paddle::PlaceType::kCPU, {1, 1}); experimental::Tensor test_tensor(paddle::PlaceType::kCPU, {1, 1});
CHECK(test_tensor.is_initialized() == false); CHECK(test_tensor.is_initialized() == false);
test_tensor.mutable_data<float>(); test_tensor.mutable_data<float>(paddle::PlaceType::kCPU);
CHECK(test_tensor.is_initialized() == true); CHECK(test_tensor.is_initialized() == true);
float* tensor_data = test_tensor.mutable_data<float>(); float* tensor_data = test_tensor.mutable_data<float>();
for (int i = 0; i < test_tensor.size(); i++) { for (int i = 0; i < test_tensor.size(); i++) {
......
...@@ -37,7 +37,8 @@ TEST(API, reshape) { ...@@ -37,7 +37,8 @@ TEST(API, reshape) {
pten::DenseTensorMeta(pten::DataType::FLOAT32, pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 2, 2, 3}), framework::make_ddim({3, 2, 2, 3}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* dense_x_data = dense_x->mutable_data<float>(); auto* dense_x_data =
dense_x->mutable_data<float>(paddle::platform::CPUPlace());
for (int i = 0; i < dense_x->numel(); i++) { for (int i = 0; i < dense_x->numel(); i++) {
dense_x_data[i] = i; dense_x_data[i] = i;
...@@ -69,14 +70,15 @@ TEST(API, reshape) { ...@@ -69,14 +70,15 @@ TEST(API, reshape) {
TEST(Tensor, old_reshape) { TEST(Tensor, old_reshape) {
paddle::experimental::Tensor x(paddle::PlaceType::kCPU); paddle::experimental::Tensor x(paddle::PlaceType::kCPU);
x.reshape({3, 4}); x.reshape({3, 4});
x.mutable_data<float>(paddle::PlaceType::kCPU);
ASSERT_EQ(x.shape()[0], 3); ASSERT_EQ(x.shape()[0], 3);
ASSERT_EQ(x.shape()[1], 4); ASSERT_EQ(x.shape()[1], 4);
ASSERT_EQ(x.numel(), 12); ASSERT_EQ(x.numel(), 12);
ASSERT_EQ(x.is_cpu(), true); ASSERT_EQ(x.is_cpu(), true);
ASSERT_EQ(x.type(), pten::DataType::UNDEFINED); ASSERT_EQ(x.type(), pten::DataType::FLOAT32);
ASSERT_EQ(x.layout(), pten::DataLayout::NCHW); ASSERT_EQ(x.layout(), pten::DataLayout::NCHW);
ASSERT_EQ(x.initialized(), false); ASSERT_EQ(x.initialized(), true);
} }
} // namespace tests } // namespace tests
......
...@@ -37,7 +37,8 @@ TEST(API, sum) { ...@@ -37,7 +37,8 @@ TEST(API, sum) {
pten::DenseTensorMeta(pten::DataType::FLOAT32, pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 4}), framework::make_ddim({3, 4}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* dense_x_data = dense_x->mutable_data<float>(); auto* dense_x_data =
dense_x->mutable_data<float>(paddle::platform::CPUPlace());
float sum = 0.0; float sum = 0.0;
for (size_t i = 0; i < 12; ++i) { for (size_t i = 0; i < 12; ++i) {
......
...@@ -35,7 +35,8 @@ paddle::experimental::Tensor CreateInputTensor() { ...@@ -35,7 +35,8 @@ paddle::experimental::Tensor CreateInputTensor() {
pten::DenseTensorMeta(pten::DataType::INT64, pten::DenseTensorMeta(pten::DataType::INT64,
framework::make_ddim({3, 4}), framework::make_ddim({3, 4}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* dense_x_data = dense_x->mutable_data<int64_t>(); auto* dense_x_data =
dense_x->mutable_data<int64_t>(paddle::platform::CPUPlace());
for (int64_t i = 0; i < 12; ++i) { for (int64_t i = 0; i < 12; ++i) {
dense_x_data[i] = i; dense_x_data[i] = i;
......
...@@ -112,8 +112,6 @@ TEST(dense_tensor, resize) { ...@@ -112,8 +112,6 @@ TEST(dense_tensor, resize) {
CHECK_EQ(tensor_0.capacity(), 2u); CHECK_EQ(tensor_0.capacity(), 2u);
tensor_0.ResizeAndAllocate({1, 2, 3}); tensor_0.ResizeAndAllocate({1, 2, 3});
CHECK_EQ(tensor_0.capacity(), 6u); CHECK_EQ(tensor_0.capacity(), 6u);
tensor_0.mutable_data<int8_t>();
CHECK_EQ(tensor_0.capacity(), 6u);
} }
TEST(dense_tensor, shallow_copy) { TEST(dense_tensor, shallow_copy) {
......
...@@ -38,7 +38,8 @@ TEST(DEV_API, cast) { ...@@ -38,7 +38,8 @@ TEST(DEV_API, cast) {
pten::DenseTensorMeta(pten::DataType::FLOAT32, pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 4}), framework::make_ddim({3, 4}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* dense_x_data = dense_x.mutable_data<float>(); auto* dense_x_data =
dense_x.mutable_data<float>(paddle::platform::CPUPlace());
float sum = 0.0; float sum = 0.0;
for (size_t i = 0; i < 12; ++i) { for (size_t i = 0; i < 12; ++i) {
......
...@@ -37,7 +37,8 @@ TEST(DEV_API, conj) { ...@@ -37,7 +37,8 @@ TEST(DEV_API, conj) {
framework::make_ddim({3, 4}), framework::make_ddim({3, 4}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* dense_x_data = dense_x.mutable_data<paddle::complex64>(); auto* dense_x_data =
dense_x.mutable_data<paddle::complex64>(paddle::platform::CPUPlace());
for (size_t i = 0; i < 12; ++i) { for (size_t i = 0; i < 12; ++i) {
dense_x_data[i] = paddle::complex64(i * 1.0, i * 1.0); dense_x_data[i] = paddle::complex64(i * 1.0, i * 1.0);
} }
......
...@@ -39,7 +39,8 @@ TEST(DEV_API, copy) { ...@@ -39,7 +39,8 @@ TEST(DEV_API, copy) {
pten::DenseTensorMeta(pten::DataType::FLOAT32, pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({2, 3}), framework::make_ddim({2, 3}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* dense_x_data = dense_src->mutable_data<float>(); auto* dense_x_data =
dense_src->mutable_data<float>(paddle::platform::CPUPlace());
auto dense_dst = std::make_shared<pten::DenseTensor>( auto dense_dst = std::make_shared<pten::DenseTensor>(
alloc.get(), alloc.get(),
......
...@@ -52,7 +52,8 @@ TEST(DEV_API, empty_like) { ...@@ -52,7 +52,8 @@ TEST(DEV_API, empty_like) {
pten::DenseTensorMeta(pten::DataType::FLOAT32, pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 2}), framework::make_ddim({3, 2}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* dense_x_data = dense_x.mutable_data<float>(); auto* dense_x_data =
dense_x.mutable_data<float>(paddle::platform::CPUPlace());
dense_x_data[0] = 0; dense_x_data[0] = 0;
// 2. test API // 2. test API
...@@ -96,7 +97,8 @@ TEST(DEV_API, full_like) { ...@@ -96,7 +97,8 @@ TEST(DEV_API, full_like) {
pten::DenseTensorMeta(pten::DataType::FLOAT32, pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 2}), framework::make_ddim({3, 2}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* dense_x_data = dense_x.mutable_data<float>(); auto* dense_x_data =
dense_x.mutable_data<float>(paddle::platform::CPUPlace());
dense_x_data[0] = 0; dense_x_data[0] = 0;
float val = 1.0; float val = 1.0;
......
...@@ -36,13 +36,15 @@ TEST(DEV_API, dot) { ...@@ -36,13 +36,15 @@ TEST(DEV_API, dot) {
pten::DenseTensorMeta(pten::DataType::FLOAT32, pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 10}), framework::make_ddim({3, 10}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* dense_x_data = dense_x.mutable_data<float>(); auto* dense_x_data =
dense_x.mutable_data<float>(paddle::platform::CPUPlace());
pten::DenseTensor dense_y(alloc.get(), pten::DenseTensor dense_y(alloc.get(),
pten::DenseTensorMeta(pten::DataType::FLOAT32, pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 10}), framework::make_ddim({3, 10}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* dense_y_data = dense_y.mutable_data<float>(); auto* dense_y_data =
dense_y.mutable_data<float>(paddle::platform::CPUPlace());
float sum[3] = {0.0, 0.0, 0.0}; float sum[3] = {0.0, 0.0, 0.0};
for (size_t i = 0; i < 3; ++i) { for (size_t i = 0; i < 3; ++i) {
......
...@@ -36,13 +36,15 @@ TEST(DEV_API, add) { ...@@ -36,13 +36,15 @@ TEST(DEV_API, add) {
pten::DenseTensorMeta(pten::DataType::FLOAT32, pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 10}), framework::make_ddim({3, 10}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* dense_x_data = dense_x.mutable_data<float>(); auto* dense_x_data =
dense_x.mutable_data<float>(paddle::platform::CPUPlace());
pten::DenseTensor dense_y(alloc.get(), pten::DenseTensor dense_y(alloc.get(),
pten::DenseTensorMeta(pten::DataType::FLOAT32, pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({10}), framework::make_ddim({10}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* dense_y_data = dense_y.mutable_data<float>(); auto* dense_y_data =
dense_y.mutable_data<float>(paddle::platform::CPUPlace());
float sum[3][10] = {0.0}; float sum[3][10] = {0.0};
for (size_t i = 0; i < 3; ++i) { for (size_t i = 0; i < 3; ++i) {
...@@ -82,13 +84,15 @@ TEST(DEV_API, subtract) { ...@@ -82,13 +84,15 @@ TEST(DEV_API, subtract) {
pten::DenseTensorMeta(pten::DataType::FLOAT32, pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 10}), framework::make_ddim({3, 10}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* dense_x_data = dense_x.mutable_data<float>(); auto* dense_x_data =
dense_x.mutable_data<float>(paddle::platform::CPUPlace());
pten::DenseTensor dense_y(alloc.get(), pten::DenseTensor dense_y(alloc.get(),
pten::DenseTensorMeta(pten::DataType::FLOAT32, pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({10}), framework::make_ddim({10}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* dense_y_data = dense_y.mutable_data<float>(); auto* dense_y_data =
dense_y.mutable_data<float>(paddle::platform::CPUPlace());
float sub[3][10] = {0.0}; float sub[3][10] = {0.0};
for (size_t i = 0; i < 3; ++i) { for (size_t i = 0; i < 3; ++i) {
...@@ -128,13 +132,15 @@ TEST(DEV_API, divide) { ...@@ -128,13 +132,15 @@ TEST(DEV_API, divide) {
pten::DenseTensorMeta(pten::DataType::FLOAT32, pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 10}), framework::make_ddim({3, 10}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* dense_x_data = dense_x.mutable_data<float>(); auto* dense_x_data =
dense_x.mutable_data<float>(paddle::platform::CPUPlace());
pten::DenseTensor dense_y(alloc.get(), pten::DenseTensor dense_y(alloc.get(),
pten::DenseTensorMeta(pten::DataType::FLOAT32, pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({10}), framework::make_ddim({10}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* dense_y_data = dense_y.mutable_data<float>(); auto* dense_y_data =
dense_y.mutable_data<float>(paddle::platform::CPUPlace());
float div[3][10] = {0.0}; float div[3][10] = {0.0};
for (size_t i = 0; i < 3; ++i) { for (size_t i = 0; i < 3; ++i) {
...@@ -174,13 +180,15 @@ TEST(DEV_API, multiply) { ...@@ -174,13 +180,15 @@ TEST(DEV_API, multiply) {
pten::DenseTensorMeta(pten::DataType::FLOAT32, pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 10}), framework::make_ddim({3, 10}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* dense_x_data = dense_x.mutable_data<float>(); auto* dense_x_data =
dense_x.mutable_data<float>(paddle::platform::CPUPlace());
pten::DenseTensor dense_y(alloc.get(), pten::DenseTensor dense_y(alloc.get(),
pten::DenseTensorMeta(pten::DataType::FLOAT32, pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({10}), framework::make_ddim({10}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* dense_y_data = dense_y.mutable_data<float>(); auto* dense_y_data =
dense_y.mutable_data<float>(paddle::platform::CPUPlace());
float mul[3][10] = {0.0}; float mul[3][10] = {0.0};
for (size_t i = 0; i < 3; ++i) { for (size_t i = 0; i < 3; ++i) {
......
...@@ -47,7 +47,8 @@ TEST(DEV_API, flatten) { ...@@ -47,7 +47,8 @@ TEST(DEV_API, flatten) {
pten::DenseTensorMeta(pten::DataType::FLOAT32, pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 2, 2, 3}), framework::make_ddim({3, 2, 2, 3}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* dense_x_data = dense_x.mutable_data<float>(); auto* dense_x_data =
dense_x.mutable_data<float>(paddle::platform::CPUPlace());
for (int i = 0; i < dense_x.numel(); i++) { for (int i = 0; i < dense_x.numel(); i++) {
dense_x_data[i] = i; dense_x_data[i] = i;
......
...@@ -36,13 +36,15 @@ TEST(DEV_API, dot) { ...@@ -36,13 +36,15 @@ TEST(DEV_API, dot) {
framework::make_ddim({3, 3}), framework::make_ddim({3, 3}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* dense_x_data = dense_x.mutable_data<float>(); auto* dense_x_data =
dense_x.mutable_data<float>(paddle::platform::CPUPlace());
DenseTensor dense_y(alloc.get(), DenseTensor dense_y(alloc.get(),
pten::DenseTensorMeta(pten::DataType::FLOAT32, pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 3}), framework::make_ddim({3, 3}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* dense_y_data = dense_y.mutable_data<float>(); auto* dense_y_data =
dense_y.mutable_data<float>(paddle::platform::CPUPlace());
for (size_t i = 0; i < 9; ++i) { for (size_t i = 0; i < 9; ++i) {
dense_x_data[i] = 1.0; dense_x_data[i] = 1.0;
......
...@@ -35,7 +35,8 @@ TEST(DEV_API, mean) { ...@@ -35,7 +35,8 @@ TEST(DEV_API, mean) {
pten::DenseTensorMeta(pten::DataType::FLOAT32, pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 4}), framework::make_ddim({3, 4}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* dense_x_data = dense_x.mutable_data<float>(); auto* dense_x_data =
dense_x.mutable_data<float>(paddle::platform::CPUPlace());
float sum = 0.0; float sum = 0.0;
for (size_t i = 0; i < 12; ++i) { for (size_t i = 0; i < 12; ++i) {
......
...@@ -37,7 +37,8 @@ TEST(DEV_API, reshape) { ...@@ -37,7 +37,8 @@ TEST(DEV_API, reshape) {
pten::DenseTensorMeta(pten::DataType::FLOAT32, pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 2, 2, 3}), framework::make_ddim({3, 2, 2, 3}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* dense_x_data = dense_x.mutable_data<float>(); auto* dense_x_data =
dense_x.mutable_data<float>(paddle::platform::CPUPlace());
for (int i = 0; i < dense_x.numel(); i++) { for (int i = 0; i < dense_x.numel(); i++) {
dense_x_data[i] = i; dense_x_data[i] = i;
......
...@@ -36,7 +36,8 @@ TEST(DEV_API, scale) { ...@@ -36,7 +36,8 @@ TEST(DEV_API, scale) {
framework::make_ddim({3, 4}), framework::make_ddim({3, 4}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* dense_x_data = dense_x.mutable_data<float>(); auto* dense_x_data =
dense_x.mutable_data<float>(paddle::platform::CPUPlace());
for (size_t i = 0; i < 12; ++i) { for (size_t i = 0; i < 12; ++i) {
dense_x_data[i] = i * 1.0; dense_x_data[i] = i * 1.0;
} }
...@@ -68,7 +69,8 @@ TEST(DEV_API, scale_host) { ...@@ -68,7 +69,8 @@ TEST(DEV_API, scale_host) {
pten::DenseTensorMeta(pten::DataType::FLOAT32, pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 4}), framework::make_ddim({3, 4}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* dense_x_data = dense_x.mutable_data<float>(); auto* dense_x_data =
dense_x.mutable_data<float>(paddle::platform::CPUPlace());
for (size_t i = 0; i < 12; ++i) { for (size_t i = 0; i < 12; ++i) {
dense_x_data[i] = i * 1.0; dense_x_data[i] = i * 1.0;
} }
...@@ -77,7 +79,7 @@ TEST(DEV_API, scale_host) { ...@@ -77,7 +79,7 @@ TEST(DEV_API, scale_host) {
pten::DenseTensorMeta(pten::DataType::FLOAT32, pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({1}), framework::make_ddim({1}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
scale.mutable_data<float>()[0] = 2; scale.data<float>()[0] = 2;
float bias = 1; float bias = 1;
bool bias_after_scale = true; bool bias_after_scale = true;
......
...@@ -35,7 +35,8 @@ TEST(DEV_API, sum) { ...@@ -35,7 +35,8 @@ TEST(DEV_API, sum) {
pten::DenseTensorMeta(pten::DataType::FLOAT32, pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 4}), framework::make_ddim({3, 4}),
pten::DataLayout::NCHW)); pten::DataLayout::NCHW));
auto* dense_x_data = dense_x.mutable_data<float>(); auto* dense_x_data =
dense_x.mutable_data<float>(paddle::platform::CPUPlace());
float sum = 0.0; float sum = 0.0;
for (size_t i = 0; i < 12; ++i) { for (size_t i = 0; i < 12; ++i) {
......
...@@ -137,7 +137,9 @@ std::vector<paddle::Tensor> AttrTestForward( ...@@ -137,7 +137,9 @@ std::vector<paddle::Tensor> AttrTestForward(
PD_DISPATCH_FLOATING_TYPES( PD_DISPATCH_FLOATING_TYPES(
x.type(), "assign_cpu_kernel", ([&] { x.type(), "assign_cpu_kernel", ([&] {
assign_cpu_kernel<data_t>( assign_cpu_kernel<data_t>(
x.data<data_t>(), out.mutable_data<data_t>(), x.size()); x.data<data_t>(),
out.mutable_data<data_t>(paddle::PlaceType::kCPU),
x.size());
})); }));
// Check attrs value // Check attrs value
...@@ -175,12 +177,13 @@ std::vector<paddle::Tensor> AttrTestBackward( ...@@ -175,12 +177,13 @@ std::vector<paddle::Tensor> AttrTestBackward(
const std::vector<std::string>& str_vec_attr) { const std::vector<std::string>& str_vec_attr) {
auto grad_x = paddle::Tensor(paddle::PlaceType::kCPU, grad_out.shape()); auto grad_x = paddle::Tensor(paddle::PlaceType::kCPU, grad_out.shape());
PD_DISPATCH_FLOATING_TYPES(grad_out.type(), "assign_cpu_kernel", ([&] { PD_DISPATCH_FLOATING_TYPES(
assign_cpu_kernel<data_t>( grad_out.type(), "assign_cpu_kernel", ([&] {
grad_out.data<data_t>(), assign_cpu_kernel<data_t>(
grad_x.mutable_data<data_t>(), grad_out.data<data_t>(),
grad_out.size()); grad_x.mutable_data<data_t>(paddle::PlaceType::kCPU),
})); grad_out.size());
}));
CheckAllBackwardAttrs(int_attr, float_vec_attr, str_vec_attr); CheckAllBackwardAttrs(int_attr, float_vec_attr, str_vec_attr);
...@@ -203,7 +206,9 @@ std::vector<paddle::Tensor> ConstAttrTestForward( ...@@ -203,7 +206,9 @@ std::vector<paddle::Tensor> ConstAttrTestForward(
PD_DISPATCH_FLOATING_TYPES( PD_DISPATCH_FLOATING_TYPES(
x.type(), "assign_cpu_kernel", ([&] { x.type(), "assign_cpu_kernel", ([&] {
assign_cpu_kernel<data_t>( assign_cpu_kernel<data_t>(
x.data<data_t>(), out.mutable_data<data_t>(), x.size()); x.data<data_t>(),
out.mutable_data<data_t>(paddle::PlaceType::kCPU),
x.size());
})); }));
// Check attrs value // Check attrs value
...@@ -241,12 +246,13 @@ std::vector<paddle::Tensor> ConstAttrTestBackward( ...@@ -241,12 +246,13 @@ std::vector<paddle::Tensor> ConstAttrTestBackward(
const std::vector<std::string>& str_vec_attr) { const std::vector<std::string>& str_vec_attr) {
auto grad_x = paddle::Tensor(paddle::PlaceType::kCPU, grad_out.shape()); auto grad_x = paddle::Tensor(paddle::PlaceType::kCPU, grad_out.shape());
PD_DISPATCH_FLOATING_TYPES(grad_out.type(), "assign_cpu_kernel", ([&] { PD_DISPATCH_FLOATING_TYPES(
assign_cpu_kernel<data_t>( grad_out.type(), "assign_cpu_kernel", ([&] {
grad_out.data<data_t>(), assign_cpu_kernel<data_t>(
grad_x.mutable_data<data_t>(), grad_out.data<data_t>(),
grad_out.size()); grad_x.mutable_data<data_t>(paddle::PlaceType::kCPU),
})); grad_out.size());
}));
CheckAllBackwardAttrs(int_attr, float_vec_attr, str_vec_attr); CheckAllBackwardAttrs(int_attr, float_vec_attr, str_vec_attr);
......
...@@ -47,7 +47,7 @@ void ConcatCpuKernel(const std::vector<paddle::Tensor>& ins, ...@@ -47,7 +47,7 @@ void ConcatCpuKernel(const std::vector<paddle::Tensor>& ins,
int64_t out_cols = 0; int64_t out_cols = 0;
auto ins_cols = GetCols(ins, out_rows, &out_cols); auto ins_cols = GetCols(ins, out_rows, &out_cols);
auto* out_data = out->mutable_data<data_t>(); auto* out_data = out->mutable_data<data_t>(paddle::PlaceType::kCPU);
int64_t col_idx = 0; int64_t col_idx = 0;
for (size_t i = 0; i < num; ++i) { for (size_t i = 0; i < num; ++i) {
int64_t col_len = ins_cols[i]; int64_t col_len = ins_cols[i];
...@@ -76,7 +76,9 @@ void SplitCpuKernel(const paddle::Tensor& in, ...@@ -76,7 +76,9 @@ void SplitCpuKernel(const paddle::Tensor& in,
int64_t col_idx = 0; int64_t col_idx = 0;
for (size_t j = 0; j < num; ++j) { for (size_t j = 0; j < num; ++j) {
int64_t col_len = out_cols[j]; int64_t col_len = out_cols[j];
auto* out_data = outs->at(j).mutable_data<data_t>() + i * col_len; auto* out_data =
outs->at(j).mutable_data<data_t>(paddle::PlaceType::kCPU) +
i * col_len;
std::memcpy(out_data, in_data + col_idx, sizeof(data_t) * col_len); std::memcpy(out_data, in_data + col_idx, sizeof(data_t) * col_len);
col_idx += col_len; col_idx += col_len;
} }
......
...@@ -76,7 +76,9 @@ std::vector<paddle::Tensor> ConjFunction(const paddle::Tensor& x) { ...@@ -76,7 +76,9 @@ std::vector<paddle::Tensor> ConjFunction(const paddle::Tensor& x) {
PD_DISPATCH_FLOATING_AND_COMPLEX_TYPES( PD_DISPATCH_FLOATING_AND_COMPLEX_TYPES(
x.type(), "ConjCPUKernel", ([&] { x.type(), "ConjCPUKernel", ([&] {
ConjCPUKernel<data_t>( ConjCPUKernel<data_t>(
x.data<data_t>(), x.size(), out.mutable_data<data_t>()); x.data<data_t>(),
x.size(),
out.mutable_data<data_t>(paddle::PlaceType::kCPU));
})); }));
return {out}; return {out};
......
...@@ -32,7 +32,9 @@ std::vector<paddle::Tensor> DispatchTestInterger(const paddle::Tensor& x) { ...@@ -32,7 +32,9 @@ std::vector<paddle::Tensor> DispatchTestInterger(const paddle::Tensor& x) {
PD_DISPATCH_INTEGRAL_TYPES( PD_DISPATCH_INTEGRAL_TYPES(
x.type(), "assign_cpu_kernel", ([&] { x.type(), "assign_cpu_kernel", ([&] {
assign_cpu_kernel<data_t>( assign_cpu_kernel<data_t>(
x.data<data_t>(), out.mutable_data<data_t>(), x.size()); x.data<data_t>(),
out.mutable_data<data_t>(paddle::PlaceType::kCPU),
x.size());
})); }));
return {out}; return {out};
...@@ -50,7 +52,9 @@ std::vector<paddle::Tensor> DispatchTestFloatAndInteger( ...@@ -50,7 +52,9 @@ std::vector<paddle::Tensor> DispatchTestFloatAndInteger(
PD_DISPATCH_FLOATING_AND_INTEGRAL_TYPES( PD_DISPATCH_FLOATING_AND_INTEGRAL_TYPES(
x.type(), "assign_cpu_kernel", ([&] { x.type(), "assign_cpu_kernel", ([&] {
assign_cpu_kernel<data_t>( assign_cpu_kernel<data_t>(
x.data<data_t>(), out.mutable_data<data_t>(), x.size()); x.data<data_t>(),
out.mutable_data<data_t>(paddle::PlaceType::kCPU),
x.size());
})); }));
return {out}; return {out};
...@@ -67,7 +71,9 @@ std::vector<paddle::Tensor> DispatchTestComplex(const paddle::Tensor& x) { ...@@ -67,7 +71,9 @@ std::vector<paddle::Tensor> DispatchTestComplex(const paddle::Tensor& x) {
PD_DISPATCH_COMPLEX_TYPES( PD_DISPATCH_COMPLEX_TYPES(
x.type(), "assign_cpu_kernel", ([&] { x.type(), "assign_cpu_kernel", ([&] {
assign_cpu_kernel<data_t>( assign_cpu_kernel<data_t>(
x.data<data_t>(), out.mutable_data<data_t>(), x.size()); x.data<data_t>(),
out.mutable_data<data_t>(paddle::PlaceType::kCPU),
x.size());
})); }));
return {out}; return {out};
...@@ -85,7 +91,9 @@ std::vector<paddle::Tensor> DispatchTestFloatAndComplex( ...@@ -85,7 +91,9 @@ std::vector<paddle::Tensor> DispatchTestFloatAndComplex(
PD_DISPATCH_FLOATING_AND_COMPLEX_TYPES( PD_DISPATCH_FLOATING_AND_COMPLEX_TYPES(
x.type(), "assign_cpu_kernel", ([&] { x.type(), "assign_cpu_kernel", ([&] {
assign_cpu_kernel<data_t>( assign_cpu_kernel<data_t>(
x.data<data_t>(), out.mutable_data<data_t>(), x.size()); x.data<data_t>(),
out.mutable_data<data_t>(paddle::PlaceType::kCPU),
x.size());
})); }));
return {out}; return {out};
...@@ -103,7 +111,9 @@ std::vector<paddle::Tensor> DispatchTestFloatAndIntegerAndComplex( ...@@ -103,7 +111,9 @@ std::vector<paddle::Tensor> DispatchTestFloatAndIntegerAndComplex(
PD_DISPATCH_FLOATING_AND_INTEGRAL_AND_COMPLEX_TYPES( PD_DISPATCH_FLOATING_AND_INTEGRAL_AND_COMPLEX_TYPES(
x.type(), "assign_cpu_kernel", ([&] { x.type(), "assign_cpu_kernel", ([&] {
assign_cpu_kernel<data_t>( assign_cpu_kernel<data_t>(
x.data<data_t>(), out.mutable_data<data_t>(), x.size()); x.data<data_t>(),
out.mutable_data<data_t>(paddle::PlaceType::kCPU),
x.size());
})); }));
return {out}; return {out};
...@@ -120,7 +130,9 @@ std::vector<paddle::Tensor> DispatchTestFloatAndHalf(const paddle::Tensor& x) { ...@@ -120,7 +130,9 @@ std::vector<paddle::Tensor> DispatchTestFloatAndHalf(const paddle::Tensor& x) {
PD_DISPATCH_FLOATING_AND_HALF_TYPES( PD_DISPATCH_FLOATING_AND_HALF_TYPES(
x.type(), "assign_cpu_kernel", ([&] { x.type(), "assign_cpu_kernel", ([&] {
assign_cpu_kernel<data_t>( assign_cpu_kernel<data_t>(
x.data<data_t>(), out.mutable_data<data_t>(), x.size()); x.data<data_t>(),
out.mutable_data<data_t>(paddle::PlaceType::kCPU),
x.size());
})); }));
return {out}; return {out};
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册