未验证 提交 a56e16a7 编写于 作者: 石晓伟 提交者: GitHub

[Refactoring Tensor PR #5] replace storage with pten allocation (#39085)

* updates callers, test=develop

* updates tensor, test=develop

* fixes errors, test=develop

* remove some dtypes, test=develop

* fix errors in the base storage modification, test=develop

* fixes a bug, test=develop

* fixes the bugs in push the whole, test=develop

* updates, test=develop

* update

* update, test=develop

* fixes the mac-py3 CI, test=develop

* remove the storage impl, test=develop

* updates some codes, test=develop

* update, test=develop

* updates pten allocation, test=develop
上级 95b081ef
......@@ -216,8 +216,9 @@ void TensorAdd(const egr::EagerTensor& src, egr::EagerTensor* dst) {
#define PADDLE_TENSOR_ADD(cpp_type) \
if (data_type == paddle::framework::DataTypeTrait<cpp_type>::DataType()) { \
TensorAddFunctor<cpp_type> func(numel, src_tensor->data<cpp_type>(), \
dst_tensor->mutable_data<cpp_type>()); \
TensorAddFunctor<cpp_type> func( \
numel, src_tensor->data<cpp_type>(), \
dst_tensor->mutable_data<cpp_type>(place)); \
paddle::platform::VisitPlace(place, func); \
return; \
}
......
......@@ -36,7 +36,8 @@ TEST(AccumulationNode, EagerTensor) {
paddle::platform::CPUPlace())
.get(),
meta);
dt0->mutable_data<paddle::platform::float16>()[0] = 10.0;
dt0->mutable_data<paddle::platform::float16>(
paddle::platform::CPUPlace())[0] = 10.0;
EagerTensor et0 = EagerTensor(dt0);
std::shared_ptr<pten::DenseTensor> dt1 = std::make_shared<pten::DenseTensor>(
......@@ -45,7 +46,8 @@ TEST(AccumulationNode, EagerTensor) {
.get(),
meta);
dt1->mutable_data<paddle::platform::float16>()[0] = 20.0;
dt1->mutable_data<paddle::platform::float16>(
paddle::platform::CPUPlace())[0] = 20.0;
EagerTensor et1 = EagerTensor(dt1);
std::shared_ptr<pten::DenseTensor> grad_dt =
......
......@@ -46,7 +46,7 @@ TEST(AutogradMeta, MemberFunction) {
paddle::platform::CPUPlace())
.get(),
meta);
auto* dt_ptr = dt->mutable_data<float>();
auto* dt_ptr = dt->mutable_data<float>(paddle::platform::CPUPlace());
dt_ptr[0] = 5.0f;
dt_ptr[1] = 10.0f;
grad_t->set_impl(dt);
......
......@@ -40,7 +40,7 @@ TEST(EagerTensor, Constructor) {
paddle::platform::CPUPlace())
.get(),
meta);
auto* dt_ptr = dt->mutable_data<float>();
auto* dt_ptr = dt->mutable_data<float>(paddle::platform::CPUPlace());
dt_ptr[0] = 5.0f;
dt_ptr[1] = 10.0f;
egr::EagerTensor et3 = egr::EagerTensor(dt);
......@@ -70,7 +70,7 @@ TEST(EagerTensor, MemberFunction) {
paddle::platform::CPUPlace())
.get(),
meta);
auto* dt_ptr = dt->mutable_data<float>();
auto* dt_ptr = dt->mutable_data<float>(paddle::platform::CPUPlace());
dt_ptr[0] = 5.0f;
dt_ptr[1] = 10.0f;
VLOG(6) << "Make Dense Tensor";
......
......@@ -45,7 +45,7 @@ TEST(GradNodeInfo, GradNodeBase) {
paddle::platform::CPUPlace())
.get(),
meta);
auto* dt_ptr = dt->mutable_data<float>();
auto* dt_ptr = dt->mutable_data<float>(paddle::platform::CPUPlace());
dt_ptr[0] = 5.0f;
egr::EagerTensor et1(dt);
grads = {{et1}};
......@@ -102,7 +102,7 @@ TEST(GradNodeInfo, GradNodeBase) {
paddle::platform::CPUPlace())
.get(),
meta);
auto* dt_ptr = dt->mutable_data<float>();
auto* dt_ptr = dt->mutable_data<float>(paddle::platform::CPUPlace());
dt_ptr[0] = 6.0f;
auto* et_ptr =
std::dynamic_pointer_cast<pten::DenseTensor>(et.impl())->data<float>();
......@@ -121,8 +121,8 @@ TEST(GradNodeInfo, GradNodeBase) {
VLOG(6) << "Test Reduce Hook";
auto reduce_hook = [&](void) -> void {
auto* et_ptr = std::dynamic_pointer_cast<pten::DenseTensor>(et1.impl())
->mutable_data<float>();
auto* et_ptr =
std::dynamic_pointer_cast<pten::DenseTensor>(et1.impl())->data<float>();
et_ptr[0] = 100.0;
VLOG(6) << "Running Reduce Hook";
};
......
......@@ -41,7 +41,7 @@ class GradTestNode : public egr::GradNodeBase {
paddle::platform::CPUPlace())
.get(),
meta);
auto* dt_ptr = dt->mutable_data<float>();
auto* dt_ptr = dt->mutable_data<float>(paddle::platform::CPUPlace());
dt_ptr[0] = 6.0f;
egr::EagerTensor et1(dt);
std::vector<std::vector<egr::EagerTensor>> res = {{et1}};
......
......@@ -57,7 +57,7 @@ TEST(GradTensorHolder, Interfaces) {
paddle::platform::CPUPlace())
.get(),
meta);
dt0->mutable_data<float>()[0] = 10.0;
dt0->mutable_data<float>(paddle::platform::CPUPlace())[0] = 10.0;
EagerTensor et0 = EagerTensor(dt0);
std::shared_ptr<pten::DenseTensor> dt1 = std::make_shared<pten::DenseTensor>(
......@@ -65,7 +65,7 @@ TEST(GradTensorHolder, Interfaces) {
paddle::platform::CPUPlace())
.get(),
meta);
dt1->mutable_data<float>()[0] = 20.0;
dt1->mutable_data<float>(paddle::platform::CPUPlace())[0] = 20.0;
EagerTensor et1 = EagerTensor(dt1);
// Constructor empty GradTensorHolder
......
......@@ -29,7 +29,7 @@ TEST(TensorWrapper, Basic) {
paddle::platform::CPUPlace())
.get(),
meta);
auto* dt_ptr = dt->mutable_data<float>();
auto* dt_ptr = dt->mutable_data<float>(paddle::platform::CPUPlace());
dt_ptr[0] = 5.0f;
dt_ptr[1] = 10.0f;
et1.set_impl(dt);
......@@ -56,7 +56,7 @@ TEST(TensorWrapper, Basic) {
paddle::platform::CPUPlace())
.get(),
meta2);
auto* dt_ptr2 = dt->mutable_data<float>();
auto* dt_ptr2 = dt->mutable_data<float>(paddle::platform::CPUPlace());
dt_ptr2[0] = 6.0f;
dt_ptr2[1] = 11.0f;
et2.set_impl(dt2);
......
......@@ -35,7 +35,7 @@ TEST(EagerUtils, AutoGradMeta) {
paddle::platform::CPUPlace())
.get(),
meta);
dt0->mutable_data<float>()[0] = 10.0;
dt0->mutable_data<float>(paddle::platform::CPUPlace())[0] = 10.0;
EagerTensor et0 = EagerTensor(dt0);
std::shared_ptr<pten::DenseTensor> dt1 = std::make_shared<pten::DenseTensor>(
......@@ -43,7 +43,7 @@ TEST(EagerUtils, AutoGradMeta) {
paddle::platform::CPUPlace())
.get(),
meta);
dt1->mutable_data<float>()[0] = 20.0;
dt1->mutable_data<float>(paddle::platform::CPUPlace())[0] = 20.0;
EagerTensor et1 = EagerTensor(dt1);
std::vector<EagerTensor> ets = {et0, et1};
......@@ -112,7 +112,7 @@ egr::EagerTensor CreateTestCPUTensor(T val,
paddle::platform::CPUPlace())
.get(),
meta);
auto* dt_ptr = dt->mutable_data<T>();
auto* dt_ptr = dt->mutable_data<T>(paddle::platform::CPUPlace());
for (int64_t i = 0; i < dt->numel(); i++) {
dt_ptr[i] = val;
}
......
......@@ -44,8 +44,8 @@ egr::EagerTensor hook_function(const egr::EagerTensor& t) {
paddle::memory::Alloc(place, bytes_size)),
std::move(ret_meta));
float* t_ptr = t_dense->mutable_data<float>();
float* ret_ptr = ret_dense->mutable_data<float>();
float* t_ptr = t_dense->mutable_data<float>(place);
float* ret_ptr = ret_dense->mutable_data<float>(place);
for (int i = 0; i < ret_dense->numel(); i++) {
ret_ptr[i] = t_ptr[i] + 5.0;
}
......@@ -184,7 +184,7 @@ TEST(FwdBwdJoint, BranchedNodes) {
// Examine Forward Output 2
{
auto dense_out = std::dynamic_pointer_cast<pten::DenseTensor>(out2.impl());
float* ptr = dense_out->mutable_data<float>();
float* ptr = dense_out->mutable_data<float>(paddle::platform::CPUPlace());
for (int i = 0; i < 20; i++) {
PADDLE_ENFORCE(ptr[i] == 150.0,
paddle::platform::errors::Fatal(
......
......@@ -45,8 +45,8 @@ egr::EagerTensor hook_function(const egr::EagerTensor& t) {
paddle::memory::Alloc(place, bytes_size)),
std::move(ret_meta));
float* t_ptr = t_dense->mutable_data<float>();
float* ret_ptr = ret_dense->mutable_data<float>();
float* t_ptr = t_dense->mutable_data<float>(place);
float* ret_ptr = ret_dense->mutable_data<float>(place);
for (int i = 0; i < ret_dense->numel(); i++) {
ret_ptr[i] = t_ptr[i] + 3.0;
}
......
......@@ -34,7 +34,7 @@ bool CompareGradTensorWithValue(const egr::EagerTensor& target, T value) {
egr::AutogradMeta* meta = egr::EagerUtils::unsafe_autograd_meta(target);
auto grad_dense =
std::dynamic_pointer_cast<pten::DenseTensor>(meta->Grad().impl());
T* ptr = grad_dense->mutable_data<T>();
T* ptr = grad_dense->data<T>();
std::vector<T> host_data(grad_dense->numel());
if (paddle::platform::is_gpu_place(grad_dense->place())) {
......@@ -67,7 +67,7 @@ template <typename T>
bool CompareTensorWithValue(const egr::EagerTensor& target, T value) {
// TODO(jiabin): Support Selected Rows later
auto dense_t = std::dynamic_pointer_cast<pten::DenseTensor>(target.impl());
T* ptr = dense_t->mutable_data<T>();
T* ptr = dense_t->data<T>();
std::vector<T> host_data(dense_t->numel());
if (paddle::platform::is_gpu_place(dense_t->place())) {
......
......@@ -80,6 +80,7 @@ void EmptyEagerTensorInitializer(
std::make_shared<pten::DenseTensor>(
pten::make_intrusive<paddle::experimental::SharedStorage>(place),
pten::DenseTensorMeta(pten::TransToPtenDataType(dtype), ddims));
dense_tensor->mutable_data(place);
self->eager_tensor.set_impl(dense_tensor);
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
......
......@@ -272,16 +272,10 @@ namespace paddle {
NAME, ::pten::DataType::UINT8, uint8_t, __VA_ARGS__) \
PD_PRIVATE_CASE_TYPE( \
NAME, ::pten::DataType::INT16, int16_t, __VA_ARGS__) \
PD_PRIVATE_CASE_TYPE( \
NAME, ::pten::DataType::UINT16, uint16_t, __VA_ARGS__) \
PD_PRIVATE_CASE_TYPE( \
NAME, ::pten::DataType::INT32, int32_t, __VA_ARGS__) \
PD_PRIVATE_CASE_TYPE( \
NAME, ::pten::DataType::UINT32, uint32_t, __VA_ARGS__) \
PD_PRIVATE_CASE_TYPE( \
NAME, ::pten::DataType::INT64, int64_t, __VA_ARGS__) \
PD_PRIVATE_CASE_TYPE( \
NAME, ::pten::DataType::UINT64, uint64_t, __VA_ARGS__) \
PD_PRIVATE_CASE_TYPE(NAME, \
::pten::DataType::BFLOAT16, \
paddle::experimental::bfloat16, \
......
......@@ -149,8 +149,8 @@ bool Tensor::is_cuda() const {
template <typename T>
T *Tensor::mutable_data() {
if (is_dense_tensor()) {
return std::dynamic_pointer_cast<pten::DenseTensor>(impl_)
->mutable_data<T>();
return std::dynamic_pointer_cast<pten::DenseTensor>(impl_)->mutable_data<T>(
ConvertExtPlaceToInnerPlace(place()));
}
return nullptr;
}
......@@ -173,12 +173,18 @@ Tensor::mutable_data<paddle::platform::float16>();
template <typename T>
T *Tensor::mutable_data(const PlaceType &place) {
auto inner_place = ConvertExtPlaceToInnerPlace(place);
PADDLE_ENFORCE_EQ(
platform::is_same_place(inner_place, impl_->place()),
true,
platform::errors::Unimplemented("Modification of tensor place through "
"mutable_data is not supported now"));
return mutable_data<T>();
if (impl_->initialized()) {
PADDLE_ENFORCE_EQ(
platform::is_same_place(inner_place, impl_->place()),
true,
platform::errors::Unimplemented("Modification of tensor place through "
"mutable_data is not supported now"));
}
if (is_dense_tensor()) {
return std::dynamic_pointer_cast<pten::DenseTensor>(impl_)->mutable_data<T>(
inner_place);
}
return nullptr;
}
template PADDLE_API float *Tensor::mutable_data<float>(const PlaceType &place);
......@@ -205,7 +211,8 @@ Tensor::mutable_data<paddle::platform::float16>(const PlaceType &place);
template <typename T>
const T *Tensor::data() const {
if (is_dense_tensor()) {
return std::dynamic_pointer_cast<pten::DenseTensor>(impl_)->data<T>();
return std::dynamic_pointer_cast<pten::DenseTensor>(impl_)->mutable_data<T>(
ConvertExtPlaceToInnerPlace(place()));
}
return nullptr;
}
......@@ -217,7 +224,6 @@ template PADDLE_API const int32_t *Tensor::data<int32_t>() const;
template PADDLE_API const uint8_t *Tensor::data<uint8_t>() const;
template PADDLE_API const int8_t *Tensor::data<int8_t>() const;
template PADDLE_API const int16_t *Tensor::data<int16_t>() const;
template PADDLE_API const uint16_t *Tensor::data<uint16_t>() const;
template PADDLE_API const bool *Tensor::data<bool>() const;
template PADDLE_API const paddle::platform::complex<float>
*Tensor::data<paddle::platform::complex<float>>() const;
......
......@@ -65,6 +65,7 @@ PADDLE_API Tensor copy_to(const Tensor& x, Backend backend, bool blocking) {
pten::make_intrusive<paddle::experimental::SharedStorage>(
pten::TransToFluidPlace(backend)),
std::move(out_meta));
dense_out->mutable_data(pten::TransToFluidPlace(backend));
kernel_context.EmplaceBackOutput(dense_out.get());
Tensor out;
out.set_impl(dense_out);
......
......@@ -39,6 +39,18 @@ class ExternalStorage : public pten::Storage {
size_ = 0;
}
void set_data_shared(
const std::shared_ptr<paddle::memory::Allocation>& holder) override {
CHECK(holder);
data_ = holder;
size_ = holder->size();
}
std::shared_ptr<paddle::memory::Allocation>&& move_data_shared() override {
size_ = 0;
return std::move(data_);
}
size_t size() const noexcept override { return size_; }
const paddle::platform::Place& place() const override {
PADDLE_ENFORCE_NOT_NULL(
......@@ -92,6 +104,12 @@ class SharedStorage : public pten::Storage {
}
}
std::shared_ptr<paddle::memory::Allocation>&& move_data_shared() override {
size_ = 0;
place_ = Place();
return std::move(data_);
}
size_t size() const noexcept override {
return data_ ? data_->size() : size_;
}
......
......@@ -133,9 +133,6 @@ class ScalarBase {
case DataType::INT8:
data_.i8 = tensor.template data<int8_t>()[0];
break;
case DataType::UINT16:
data_.ui16 = tensor.template data<uint16_t>()[0];
break;
case DataType::UINT8:
data_.ui8 = tensor.template data<uint8_t>()[0];
break;
......
......@@ -15,46 +15,16 @@ limitations under the License. */
#pragma once
#include <cstdint>
#include <functional>
#include "paddle/fluid/platform/place.h"
#include "paddle/pten/core/candidate/allocator.h"
namespace pten {
namespace deprecated {
/// \brief Encapsulates strategies for access/addressing, allocation/
/// deallocation and construction/destruction of objects.
class RawAllocator {
public:
using Place = paddle::platform::Place;
/// \brief Default destructor.
virtual ~RawAllocator() = default;
/// \brief Allocates storage suitable for an array object of n bytes
/// and creates the array, but does not construct array elements.
/// May throw exceptions.
/// \param bytes_size The number of bytes to allocate.
/// \return The first address allocated.
virtual void* Allocate(size_t bytes_size) = 0;
/// \brief Deallocates storage pointed to ptr, which must be a value
/// returned by a previous call to allocate that has not been
/// invalidated by an intervening call to deallocate. The bytes_size
/// must match the value previously passed to allocate.
/// \param ptr The first address to deallocate.
/// \param bytes_size The number of bytes to deallocate.
virtual void Deallocate(void* ptr, size_t bytes_size) = 0;
/// \brief Get the place value of the allocator and the allocation.
/// \return The place value of the allocator and the allocation.
virtual const Place& place() const = 0;
};
/// \brief Fancy pointer with context. The use of this data type
/// \brief Fancy pointer with deleter. The use of this data type
/// is to be compatible with allocators from different frameworks
/// without significant performance loss. This class does not
/// support being inherited.
class Allocation final {
class Allocation {
public:
using Place = paddle::platform::Place;
using DeleterFnPtr = void (*)(Allocation*);
......@@ -62,63 +32,54 @@ class Allocation final {
Allocation() = default;
// Don't own resources, only provide access.
Allocation(void* data, const Place& place) : data_(data), place_(place) {}
Allocation(void* data, size_t size, const Place& place)
: ptr_(data), size_(size), place_(place) {}
// Own resources.
Allocation(void* data, void* ctx, DeleterFnPtr deleter, const Place& place)
: data_(data), ctx_(ctx), deleter_(deleter), place_(place) {}
Allocation(void* data, size_t size, DeleterFnPtr deleter, const Place& place)
: ptr_(data), size_(size), deleter_(deleter), place_(place) {}
Allocation(Allocation&& other) { swap(*this, other); }
Allocation& operator=(Allocation&& other) {
Allocation(Allocation&& other) noexcept { swap(*this, other); }
Allocation& operator=(Allocation&& other) noexcept {
// Exchange them explicitly to avoid moving is equivalent
// to copying.
swap(*this, other);
return *this;
}
~Allocation() { Clear(); }
void* ptr() const noexcept { return data_; }
void* operator->() const noexcept { return data_; }
operator bool() const noexcept { return data_ || ctx_; }
const Place& place() const noexcept { return place_; }
void Clear() {
virtual ~Allocation() {
if (deleter_) {
deleter_(this);
}
ctx_ = nullptr;
deleter_ = nullptr;
data_ = nullptr;
}
// Returns the holding pointer.
// NOTE: For performance consideration, it is better not to make this method
// as a virtual method. If we want to implement a `defragmentation` later,
// we might need to make `ptr_` field as a protected field, and add a virtual
// method like `defragmentation` to change `ptr_`.
void* ptr() const noexcept { return ptr_; }
// Returns the size of this memory buffer, i.e., ptr() + size() - 1 is the
// last valid element.
//
// NOTE: Some allocator might alloc more memory than request. The size
// could larger than its request. For example,
// the AlignedAllocator will always allocate memory as size + kAlignment.
// The raw pointer might not aligned, so an offset might be added to raw
// the pointer. The size of this allocation will be
// `size + kAlignemnt - offset`.
size_t size() const noexcept { return size_; }
void* operator->() const noexcept { return ptr_; }
operator bool() const noexcept { return ptr_; }
const Place& place() const noexcept { return place_; }
DeleterFnPtr deleter() const noexcept { return deleter_; }
template <typename T>
T* CastContextWithoutCheck() const noexcept {
return static_cast<T*>(ctx_);
}
/// \brief Statically cast the void pointer of the context object to
/// the primitive type. Conversion of any pointer to void* and back
/// to pointer to the original cv type preserves its original value.
/// \param T The primitive type name of the context pointer.
/// \param expected_deleter The destructor passed in to enhance type
/// safety checking.
template <typename T>
T* CastContext(DeleterFnPtr expected_deleter) const {
PADDLE_ENFORCE_EQ(
deleter_ == expected_deleter,
true,
paddle::platform::errors::InvalidArgument(
"The deleter of the allocation does not match, so the pointer "
"cannot be safely removed."));
return CastContextWithoutCheck<T>();
}
private:
protected:
friend void swap(Allocation& a, Allocation& b) noexcept;
void* data_{nullptr};
void* ctx_{nullptr};
void* ptr_{nullptr};
size_t size_{};
DeleterFnPtr deleter_{nullptr};
// TODO(Shixiaowei02): Enum needs to be used instead to reduce
// the construction overhead by more than 50%.
......@@ -126,28 +87,21 @@ class Allocation final {
};
inline void swap(Allocation& a, Allocation& b) noexcept {
::std::swap(a.data_, b.data_);
::std::swap(a.ctx_, b.ctx_);
::std::swap(a.ptr_, b.ptr_);
::std::swap(a.deleter_, b.deleter_);
::std::swap(a.place_, b.place_);
::std::swap(a.size_, b.size_);
}
/// \brief Context compatible allocator interface. This allocator is
/// mainly used for general data structures such as Tensor. The raw
/// allocator is more universal and efficient.
class Allocator {
using Place = paddle::platform::Place;
public:
using DeleterType = std::function<void(Allocation*)>;
using AllocationPtr = std::unique_ptr<Allocation, DeleterType>;
virtual ~Allocator() = default;
virtual Allocation Allocate(size_t bytes_size) = 0;
virtual const Place& place() = 0;
};
virtual AllocationPtr Allocate(size_t bytes_size) = 0;
inline Allocation Allocate(const std::shared_ptr<Allocator>& a, size_t n) {
CHECK(a);
return a->Allocate(n);
}
virtual bool IsAllocThreadSafe() const { return false; }
};
} // namespace deprecated
} // namespace pten
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <cstdint>
#include <functional>
#include "paddle/fluid/platform/place.h"
namespace pten {
/// \brief Fancy pointer with deleter. The use of this data type
/// is to be compatible with allocators from different frameworks
/// without significant performance loss. This class does not
/// support being inherited.
class Allocation {
public:
using Place = paddle::platform::Place;
using DeleterFnPtr = void (*)(Allocation*);
Allocation() = default;
// Don't own resources, only provide access.
Allocation(void* data, size_t size, const Place& place)
: ptr_(data), size_(size), place_(place) {}
// Own resources.
Allocation(void* data, size_t size, DeleterFnPtr deleter, const Place& place)
: ptr_(data), size_(size), deleter_(deleter), place_(place) {}
Allocation(Allocation&& other) noexcept { swap(*this, other); }
Allocation& operator=(Allocation&& other) noexcept {
// Exchange them explicitly to avoid moving is equivalent
// to copying.
swap(*this, other);
return *this;
}
virtual ~Allocation() {
if (deleter_) {
deleter_(this);
}
}
// Returns the holding pointer.
// NOTE: For performance consideration, it is better not to make this method
// as a virtual method. If we want to implement a `defragmentation` later,
// we might need to make `ptr_` field as a protected field, and add a virtual
// method like `defragmentation` to change `ptr_`.
void* ptr() const noexcept { return ptr_; }
// Returns the size of this memory buffer, i.e., ptr() + size() - 1 is the
// last valid element.
//
// NOTE: Some allocator might alloc more memory than request. The size
// could larger than its request. For example,
// the AlignedAllocator will always allocate memory as size + kAlignment.
// The raw pointer might not aligned, so an offset might be added to raw
// the pointer. The size of this allocation will be
// `size + kAlignemnt - offset`.
size_t size() const noexcept { return size_; }
void* operator->() const noexcept { return ptr_; }
operator bool() const noexcept { return ptr_; }
const Place& place() const noexcept { return place_; }
DeleterFnPtr deleter() const noexcept { return deleter_; }
protected:
friend void swap(Allocation& a, Allocation& b) noexcept;
void* ptr_{nullptr};
size_t size_{};
DeleterFnPtr deleter_{nullptr};
// TODO(Shixiaowei02): Enum needs to be used instead to reduce
// the construction overhead by more than 50%.
Place place_;
};
inline void swap(Allocation& a, Allocation& b) noexcept {
::std::swap(a.ptr_, b.ptr_);
::std::swap(a.deleter_, b.deleter_);
::std::swap(a.place_, b.place_);
::std::swap(a.size_, b.size_);
}
class Allocator {
public:
using DeleterType = std::function<void(Allocation*)>;
using AllocationPtr = std::unique_ptr<Allocation, DeleterType>;
virtual ~Allocator() = default;
virtual AllocationPtr Allocate(size_t bytes_size) = 0;
virtual bool IsAllocThreadSafe() const { return false; }
};
} // namespace pten
......@@ -33,28 +33,17 @@ extern void TensorCopy(const pten::DenseTensor& src,
namespace pten {
DenseTensor::DenseTensor(Allocator* a, const DenseTensorMeta& meta)
: meta_(meta),
storage_(make_intrusive<TensorStorage>(a, SizeOf(dtype()) * numel())) {}
: meta_(meta), holder_(a->Allocate(SizeOf(dtype()) * numel())) {}
DenseTensor::DenseTensor(Allocator* a, DenseTensorMeta&& meta)
: meta_(std::move(meta)),
storage_(make_intrusive<TensorStorage>(a, SizeOf(dtype()) * numel())) {}
: meta_(std::move(meta)), holder_(a->Allocate(SizeOf(dtype()) * numel())) {}
DenseTensor::DenseTensor(intrusive_ptr<Storage> storage,
DenseTensor::DenseTensor(const std::shared_ptr<pten::Allocation>& holder,
const DenseTensorMeta& meta)
: meta_(meta), storage_(std::move(storage)) {}
DenseTensor::DenseTensor(intrusive_ptr<Storage> storage, DenseTensorMeta&& meta)
: meta_(std::move(meta)), storage_(std::move(storage)) {}
: meta_(meta), holder_(holder) {}
DenseTensor::DenseTensor(const DenseTensor& other) : meta_(other.meta()) {
if (storage_ == nullptr) {
storage_ = make_intrusive<paddle::experimental::SharedStorage>(
paddle::platform::CPUPlace());
}
if (other.storage_ != nullptr && other.storage_->data_shared()) {
storage_->set_data_shared(other.storage_->data_shared());
}
holder_ = other.holder_;
#ifdef PADDLE_WITH_MKLDNN
format_ = other.format_;
......@@ -63,13 +52,7 @@ DenseTensor::DenseTensor(const DenseTensor& other) : meta_(other.meta()) {
DenseTensor& DenseTensor::operator=(const DenseTensor& other) {
meta_ = other.meta();
if (storage_ == nullptr) {
storage_ = make_intrusive<paddle::experimental::SharedStorage>(
paddle::platform::CPUPlace());
}
if (other.storage_ != nullptr && other.storage_->data_shared()) {
storage_->set_data_shared(other.storage_->data_shared());
}
holder_ = other.holder_;
#ifdef PADDLE_WITH_MKLDNN
format_ = other.format_;
#endif
......@@ -78,7 +61,7 @@ DenseTensor& DenseTensor::operator=(const DenseTensor& other) {
DenseTensor& DenseTensor::operator=(DenseTensor&& other) {
meta_ = std::move(other.meta_);
storage_.swap(other.storage_);
std::swap(holder_, other.holder_);
return *this;
}
......@@ -90,59 +73,7 @@ int64_t DenseTensor::numel() const {
}
bool DenseTensor::IsSharedWith(const DenseTensor& b) const {
return storage_.get() == b.storage_.get() && storage_.get() != nullptr;
}
void* DenseTensor::mutable_data(size_t request_bytes) {
PADDLE_ENFORCE(
valid(),
paddle::platform::errors::PreconditionNotMet(
"The meta data must be valid when call the mutable data function."));
PADDLE_ENFORCE_NOT_NULL(
storage_,
paddle::platform::errors::PreconditionNotMet(
"The storage must be valid when call the mutable data function."));
size_t bytes = numel() * SizeOf(dtype());
if (request_bytes) {
PADDLE_ENFORCE_GE(request_bytes,
bytes,
paddle::platform::errors::InvalidArgument(
"The reserved size %d should be enough to meet the "
"volume required by metadata %d.",
request_bytes,
bytes));
bytes = request_bytes;
}
if (!storage_->data() || storage_->size() < bytes + meta_.offset ||
storage_->size() == 0) {
VLOG(10) << "mutbale data realloc, original size: " << storage_->size()
<< ", new size: " << bytes;
storage_->Realloc(bytes);
meta_.offset = 0;
}
return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(storage_->data()) +
meta_.offset);
}
template <typename T>
T* DenseTensor::mutable_data() {
// In order to be compatible with the original Tensor design and
// execution system, we have to reset the datatype in mutable_data<T>.
// When the compatibility phase is over in the future, we can delete it
if (meta_.dtype == DataType::UNDEFINED) {
VLOG(10) << "change data type in mutbale_data, target dtype - "
<< paddle::experimental::CppTypeToDataType<T>::Type();
const_cast<DataType&>(meta_.dtype) =
paddle::experimental::CppTypeToDataType<T>::Type();
}
PADDLE_ENFORCE(
(dtype() == paddle::experimental::CppTypeToDataType<T>::Type()),
paddle::platform::errors::InvalidArgument(
"The type of data (%d) we are trying to retrieve does not match the "
"type of data currently contained in the container (%d).",
static_cast<int>(paddle::experimental::CppTypeToDataType<T>::Type()),
static_cast<int>(dtype())));
return static_cast<T*>(mutable_data());
return holder_ && holder_ == b.Holder();
}
template <typename T>
......@@ -164,29 +95,27 @@ T* DenseTensor::data() {
paddle::platform::errors::InvalidArgument(
"The type of data we are trying to retrieve does not match the "
"type of data currently contained in the container."));
PADDLE_ENFORCE_NOT_NULL(
storage_,
paddle::platform::errors::PreconditionNotMet(
"The storage must be valid when call the mutable data function."));
return reinterpret_cast<T*>(data());
return static_cast<T*>(data());
}
void* DenseTensor::data() {
check_memory_size();
PADDLE_ENFORCE_NOT_NULL(
storage_,
holder_,
paddle::platform::errors::PreconditionNotMet(
"The storage must be valid when call the mutable data function."));
return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(storage_->data()) +
"The storage must be valid when call the data function."));
return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(holder_->ptr()) +
meta_.offset);
}
const void* DenseTensor::data() const {
check_memory_size();
PADDLE_ENFORCE_NOT_NULL(
storage_,
holder_,
paddle::platform::errors::PreconditionNotMet(
"The storage must be valid when call the mutable data function."));
"The storage must be valid when call the data function."));
return reinterpret_cast<const void*>(
reinterpret_cast<uintptr_t>(storage_->data()) + meta_.offset);
reinterpret_cast<uintptr_t>(holder_->ptr()) + meta_.offset);
}
void DenseTensor::set_meta(DenseTensorMeta&& meta) {
......@@ -209,15 +138,14 @@ void DenseTensor::set_meta(DenseTensorMeta&& meta) {
*/
void DenseTensor::ResizeAndAllocate(const DDim& dims) {
meta_.dims = dims;
if (storage_ != nullptr) {
mutable_data();
if (holder_ != nullptr && place().GetType() != AllocationType::UNDEFINED) {
mutable_data(place());
}
}
void DenseTensor::ResetLoD(const LoD& lod) { meta_.lod = lod; }
#define DATA_MEMBER_FUNC_INSTANTIATION(dtype) \
template dtype* DenseTensor::mutable_data(); \
template const dtype* DenseTensor::data() const; \
template dtype* DenseTensor::data();
......@@ -243,68 +171,47 @@ DATA_MEMBER_FUNC_INSTANTIATION(::paddle::experimental::complex128);
/* From framework::Tensor */
/* --------------------------- */
DenseTensor::DenseTensor() {
storage_ = make_intrusive<paddle::experimental::SharedStorage>(
paddle::platform::CPUPlace());
inplace_version_counter_ = std::make_shared<TensorInplaceVersion>(0);
meta_.dtype = paddle::experimental::DataType::FLOAT32;
meta_.offset = 0;
}
DenseTensor::DenseTensor(const paddle::framework::proto::VarType::Type& dtype) {
storage_ = make_intrusive<paddle::experimental::SharedStorage>(
paddle::platform::CPUPlace());
DenseTensor::DenseTensor(paddle::framework::proto::VarType::Type dtype) {
inplace_version_counter_ = std::make_shared<TensorInplaceVersion>(0);
meta_.dtype = TransToPtenDataType(dtype);
meta_.offset = 0;
}
size_t DenseTensor::memory_size() const {
if (storage_ == nullptr || storage_->data_shared() == nullptr) {
return 0UL;
}
return storage_->data_shared()->size() - meta_.offset;
return holder_ == nullptr ? 0UL : holder_->size() - meta_.offset;
}
void DenseTensor::check_memory_size() const {
PADDLE_ENFORCE_NOT_NULL(storage_,
PADDLE_ENFORCE_NOT_NULL(holder_,
paddle::platform::errors::PreconditionNotMet(
"Tensor holds no memory. "
"Call Tensor::mutable_data firstly."));
PADDLE_ENFORCE_NOT_NULL(storage_->data_shared(),
paddle::platform::errors::PreconditionNotMet(
"Tensor holds no memory. "
"Call Tensor::mutable_data firstly."));
size_t size = numel() * SizeOf(dtype());
PADDLE_ENFORCE_LE(
size,
numel() * SizeOf(dtype()),
memory_size(),
paddle::platform::errors::PreconditionNotMet(
"Tensor's dimension is out of bound."
"Tensor's dimension must be equal or less than the size of its "
"memory."
"But received Tensor's dimension is d%, memory's size is %d.",
size,
"But received Tensor's dimension is d%, memory's size is %d.",
numel() * SizeOf(dtype()),
memory_size()));
}
const paddle::platform::Place& DenseTensor::place() const {
PADDLE_ENFORCE_NOT_NULL(
storage_,
holder_,
paddle::platform::errors::PreconditionNotMet(
"Tensor not initialized yet when Tensor::place() is called."));
if (storage_->data_shared()) {
return storage_->data_shared()->place();
}
return storage_->place();
"Tensor not initialized yet when DenseTensor::place() is called."));
return holder_->place();
}
paddle::framework::proto::VarType::Type DenseTensor::type() const {
PADDLE_ENFORCE_NOT_NULL(
storage_,
paddle::platform::errors::PreconditionNotMet(
"Tensor not initialized yet when Tensor::type() is called."));
return TransToProtoVarType(meta_.dtype);
}
......@@ -316,39 +223,31 @@ void DenseTensor::set_layout(const paddle::framework::DataLayout layout) {
meta_.layout = layout;
}
void DenseTensor::ResetHolder(
const std::shared_ptr<paddle::memory::Allocation>& holder) {
void DenseTensor::ResetHolder(const std::shared_ptr<pten::Allocation>& holder) {
PADDLE_ENFORCE_EQ(
meta_.offset,
0,
paddle::platform::errors::Fatal(
"Only the offset is supported to zero when the holder is reset."));
PADDLE_ENFORCE_NOT_NULL(
storage_,
paddle::platform::errors::PreconditionNotMet(
"The storage must be valid when call the mutable data function."));
if (storage_->data_shared()) {
if (holder_) {
PADDLE_ENFORCE_LE(
numel() * SizeOf(dtype()) + meta_.offset,
holder->size(),
paddle::platform::errors::InvalidArgument(
"The size of Holder is not enough to store the Tensor."));
}
storage_->set_data_shared(holder);
holder_ = holder;
}
void DenseTensor::ResetHolderWithType(
const std::shared_ptr<paddle::memory::Allocation>& holder,
const paddle::framework::proto::VarType::Type& type) {
const std::shared_ptr<pten::Allocation>& holder,
paddle::framework::proto::VarType::Type type) {
set_type(type);
ResetHolder(holder);
}
void DenseTensor::set_type(
const paddle::framework::proto::VarType::Type& type) {
void DenseTensor::set_type(paddle::framework::proto::VarType::Type type) {
meta_.dtype = TransToPtenDataType(type);
}
......@@ -369,19 +268,14 @@ void* DenseTensor::mutable_data(const paddle::platform::Place& place,
size = requested_size;
}
if (storage_ == nullptr) {
storage_ = make_intrusive<paddle::experimental::SharedStorage>(place);
}
/* some versions of boost::variant don't have operator!= */
if (storage_->data_shared() == nullptr ||
!(storage_->data_shared()->place() == place) ||
storage_->data_shared()->size() < size + meta_.offset) {
storage_->Clear();
storage_->set_data_shared(paddle::memory::AllocShared(place, size));
if (holder_ == nullptr || !(holder_->place() == place) ||
holder_->size() < size + meta_.offset) {
holder_.reset();
holder_ = paddle::memory::AllocShared(place, size);
meta_.offset = 0;
}
return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(storage_->data()) +
return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(holder_->ptr()) +
meta_.offset);
}
......@@ -404,21 +298,16 @@ void* DenseTensor::mutable_data(const paddle::platform::Place& place,
"] now"));
size_t size = numel() * SizeOf(dtype());
if (storage_ == nullptr) {
storage_ = make_intrusive<paddle::experimental::SharedStorage>(place);
}
/* some versions of boost::variant don't have operator!= */
if (storage_->data_shared() == nullptr ||
!(storage_->data_shared()->place() == place) ||
storage_->data_shared()->size() < size + meta_.offset ||
if (holder_ == nullptr || !(holder_->place() == place) ||
holder_->size() < size + meta_.offset ||
!(paddle::platform::is_gpu_place(place) &&
paddle::memory::InSameStream(storage_->data_shared(), stream))) {
storage_->Clear();
storage_->set_data_shared(paddle::memory::AllocShared(place, size, stream));
paddle::memory::InSameStream(holder_, stream))) {
holder_.reset();
holder_ = paddle::memory::AllocShared(place, size, stream);
meta_.offset = 0;
}
return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(storage_->data()) +
return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(holder_->ptr()) +
meta_.offset);
}
......@@ -445,14 +334,9 @@ inline T* DenseTensor::mutable_data(const paddle::platform::Place& place,
}
void DenseTensor::ShareBufferWith(const DenseTensor& tensor) {
if (storage_ == nullptr) {
storage_ = make_intrusive<paddle::experimental::SharedStorage>(
paddle::platform::CPUPlace());
}
if (storage_ != nullptr && tensor.storage_ != nullptr) {
storage_->set_data_shared(tensor.storage_->data_shared());
}
holder_ = tensor.holder_;
meta_.offset = tensor.meta().offset;
meta_.dtype = tensor.dtype();
}
#define LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(dtype) \
......@@ -467,7 +351,7 @@ LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(bool)
LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(int8_t)
LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(uint8_t)
LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(int16_t)
LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(int)
LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(int32_t)
LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(int64_t)
LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(float)
LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(double)
......@@ -482,6 +366,13 @@ LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(::paddle::experimental::complex128)
/* From framework::LoDTensor */
/* ------------------------------ */
DenseTensor::DenseTensor(intrusive_ptr<Storage> storage,
const DenseTensorMeta& meta)
: meta_(meta), holder_(storage->move_data_shared()) {}
DenseTensor::DenseTensor(intrusive_ptr<Storage> storage, DenseTensorMeta&& meta)
: meta_(std::move(meta)), holder_(storage->move_data_shared()) {}
DenseTensor::DenseTensor(const LoD& lod) : DenseTensor() { meta_.lod = lod; }
void DenseTensor::set_lod(const LoD& lod) { meta_.lod = lod; }
......@@ -559,9 +450,8 @@ DenseTensor DenseTensor::Slice(int64_t begin_idx, int64_t end_idx) const {
} else {
size_t base = numel() / meta_.dims[0];
DenseTensor dst;
dst.storage_ = pten::make_intrusive<paddle::experimental::SharedStorage>(
storage_->data_shared());
dst.meta_.layout = meta_.layout;
dst.holder_ = holder_;
dst.set_layout(meta_.layout);
dst.meta_.dtype = meta_.dtype;
DDim dst_dims = meta_.dims;
dst_dims[0] = end_idx - begin_idx;
......
......@@ -70,17 +70,8 @@ class DenseTensor : public TensorBase,
/// \param meta The meta data of dense tensor.
DenseTensor(Allocator* a, DenseTensorMeta&& meta);
/// \brief Use existing storage space to create dense tensor. This interface
/// can be used to deliberately create an uninitialized dense tensor.
/// \param storage The existing storage.
/// \param meta The meta data of dense tensor.
DenseTensor(intrusive_ptr<Storage> storage, const DenseTensorMeta& meta);
/// \brief Use existing storage space to create dense tensor. This interface
/// can be used to deliberately create an uninitialized dense tensor.
/// \param storage The existing storage.
/// \param meta The meta data of dense tensor.
DenseTensor(intrusive_ptr<Storage> storage, DenseTensorMeta&& meta);
DenseTensor(const std::shared_ptr<pten::Allocation>& holder,
const DenseTensorMeta& meta);
/// \brief Because dense tensor is a kind of container, we give a default
/// constructor to use for stl container. But the dense tensor created with
......@@ -146,9 +137,7 @@ class DenseTensor : public TensorBase,
/// \brief Test whether the storage is allocated.
/// return Whether the storage is allocated.
bool initialized() const override {
return storage_ != nullptr && storage_->data() != nullptr;
}
bool initialized() const override { return holder_ && holder_->ptr(); }
/// \brief Check if storage is shared with other objects.
/// \return Whether the storage is shared with other objects.
......@@ -170,25 +159,7 @@ class DenseTensor : public TensorBase,
/// \brief Returns the actual storage size occupied by tensor, may be larger
/// than its shape dims.
/// \return The actual storage size occupied by tensor.
size_t capacity() const { return storage_->size(); }
/// \brief Get the mutable data pointer value of type T.
/// Memory allocation may occur when calling this interface:
/// 1. When the storage size is not enough to meet the current shape of the
/// data.
/// \return The mutable data pointer value of type T.
template <typename T>
T* mutable_data();
/// \brief Get the mutable data pointer value of raw type.
/// Memory allocation may occur when calling this interface:
/// 1. When the storage size is not enough to meet the current shape of the
/// data.
/// 2. When more request_bytes parameters are used to reserve the data
/// storage.
/// param request_bytes The bytes to reserve the data storage.
/// \return The mutable data pointer value of type T.
void* mutable_data(size_t request_bytes = 0);
size_t capacity() const { return holder_->size(); }
/// \brief Get the const data pointer value of type T.
/// \return The const data pointer value of type T.
......@@ -204,7 +175,7 @@ class DenseTensor : public TensorBase,
protected:
DenseTensorMeta meta_;
intrusive_ptr<Storage> storage_;
std::shared_ptr<pten::Allocation> holder_;
/* --------------------------- */
/* From framework::Tensor */
......@@ -223,11 +194,21 @@ class DenseTensor : public TensorBase,
/* @jim19930609: Remove dependency on protobuf after Tensor Unification.
*/
explicit DenseTensor(const paddle::framework::proto::VarType::Type& dtype);
explicit DenseTensor(paddle::framework::proto::VarType::Type dtype);
inline bool IsInitialized() const {
return storage_ != nullptr && storage_->data_shared() != nullptr;
}
/// \brief Use existing storage space to create dense tensor. This interface
/// can be used to deliberately create an uninitialized dense tensor.
/// \param storage The existing storage.
/// \param meta The meta data of dense tensor.
DenseTensor(intrusive_ptr<Storage> storage, const DenseTensorMeta& meta);
/// \brief Use existing storage space to create dense tensor. This interface
/// can be used to deliberately create an uninitialized dense tensor.
/// \param storage The existing storage.
/// \param meta The meta data of dense tensor.
DenseTensor(intrusive_ptr<Storage> storage, DenseTensorMeta&& meta);
inline bool IsInitialized() const { return holder_ != nullptr; }
template <typename T>
T* data();
......@@ -270,7 +251,7 @@ class DenseTensor : public TensorBase,
void set_layout(const paddle::framework::DataLayout layout);
void clear() {
storage_.reset();
holder_.reset();
meta_.offset = 0;
}
......@@ -281,31 +262,24 @@ class DenseTensor : public TensorBase,
}
bool IsSharedBufferWith(const DenseTensor& src) const {
if (storage_ == nullptr || src.storage_ == nullptr) return false;
if (storage_->data_shared() == src.storage_->data_shared()) return true;
return false;
return holder_ && holder_ == src.Holder();
}
const std::shared_ptr<paddle::memory::Allocation> Holder() const {
return storage_ == nullptr ? nullptr : std::move(storage_->data_shared());
}
const std::shared_ptr<pten::Allocation>& Holder() const { return holder_; }
void set_offset(size_t offset) { meta_.offset = offset; }
size_t offset() const { return meta_.offset; }
std::shared_ptr<paddle::memory::Allocation> MoveMemoryHolder() {
return storage_ == nullptr ? nullptr
: std::move(storage_->move_data_shared());
std::shared_ptr<pten::Allocation> MoveMemoryHolder() {
return std::move(holder_);
}
void ResetHolder(const std::shared_ptr<paddle::memory::Allocation>& holder);
void ResetHolder(const std::shared_ptr<pten::Allocation>& holder);
void ResetHolderWithType(
const std::shared_ptr<paddle::memory::Allocation>& holder,
const paddle::framework::proto::VarType::Type& type);
void ResetHolderWithType(const std::shared_ptr<pten::Allocation>& holder,
paddle::framework::proto::VarType::Type type);
void set_type(const paddle::framework::proto::VarType::Type& type);
void set_type(paddle::framework::proto::VarType::Type type);
TensorInplaceVersion& InplaceVersionCounter() {
return *inplace_version_counter_;
......
......@@ -19,7 +19,7 @@ limitations under the License. */
// TODO(wilber): Do we need to use place in pten kernel?
#include "paddle/pten/common/place.h"
#include "paddle/pten/core/candidate/allocator.h"
#include "paddle/pten/core/allocator.h"
namespace pten {
class TensorBase;
......
......@@ -56,18 +56,14 @@ class Storage : public intrusive_ref_counter<Storage> {
: nullptr;
}
const std::shared_ptr<paddle::memory::Allocation> data_shared() const {
const std::shared_ptr<paddle::memory::Allocation>& data_shared() const {
return data_;
}
virtual void set_data_shared(
const std::shared_ptr<paddle::memory::Allocation>& holder) {
data_ = holder;
}
const std::shared_ptr<paddle::memory::Allocation>& holder) = 0;
std::shared_ptr<paddle::memory::Allocation> move_data_shared() {
return std::move(data_);
}
virtual std::shared_ptr<paddle::memory::Allocation>&& move_data_shared() = 0;
virtual void ReallocShared(size_t n) {
PADDLE_THROW(paddle::platform::errors::Unimplemented(
......@@ -123,6 +119,18 @@ class TensorStorage : public Storage {
bool OwnsMemory() const noexcept override { return true; }
void set_data_shared(
const std::shared_ptr<paddle::memory::Allocation>& holder) override {
CHECK(holder);
data_ = holder;
size_ = holder->size();
}
std::shared_ptr<paddle::memory::Allocation>&& move_data_shared() override {
size_ = 0;
return std::move(data_);
}
private:
Allocator* alloc_;
int64_t size_{0};
......
......@@ -36,7 +36,7 @@ void CastKernelImpl(const CPUContext& dev_ctx,
auto numel = x.numel();
auto* in_end = in_begin + numel;
auto* out_begin = out->mutable_data<OutT>();
auto* out_begin = out->mutable_data<OutT>(dev_ctx.GetPlace());
paddle::platform::Transform<CPUContext> trans;
trans(dev_ctx,
......
......@@ -32,17 +32,16 @@ void Copy(const Context& dev_ctx,
DenseTensor* dst) {
auto* src_ptr = src.data();
const auto& src_place = src.place();
const auto& dst_place = dst->place();
VLOG(3) << "TensorCopy " << src.dims() << " from " << src.place() << " to "
<< dst_place;
<< src_place;
dst->ResizeAndAllocate(src.dims());
auto* dst_ptr = dst->mutable_data();
dst->Resize(src.dims());
auto* dst_ptr = dst->mutable_data(src_place);
if (src_ptr == dst_ptr && src_place == dst_place) {
if (src_ptr == dst_ptr) {
VLOG(3) << "Skip copy the same data async from " << src_place << " to "
<< dst_place;
<< src_place;
return;
}
VLOG(4) << "src:" << src_ptr << ", dst:" << dst_ptr;
......@@ -51,9 +50,8 @@ void Copy(const Context& dev_ctx,
auto size = src.numel() *
paddle::framework::SizeOfType(TransToProtoVarType(src.dtype()));
if (paddle::platform::is_cpu_place(src_place) &&
paddle::platform::is_cpu_place(dst_place)) {
paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size);
if (paddle::platform::is_cpu_place(src_place)) {
paddle::memory::Copy(src_place, dst_ptr, src_place, src_ptr, size);
}
}
......
......@@ -29,7 +29,7 @@ void DotKernel(const Context& dev_ctx,
DenseTensor* out) {
auto const *x_ptr = x.data<T>(), *x_ptr_ = &x_ptr[0];
auto const *y_ptr = y.data<T>(), *y_ptr_ = &y_ptr[0];
auto* z = out->mutable_data<T>();
auto* z = out->mutable_data<T>(dev_ctx.GetPlace());
// Loop over the total N elements of both operands while sum-reducing every
// B pairs along the way where B is the dimension of the least ordered axis
......
......@@ -45,7 +45,10 @@ struct SameDimsAddFunctor<
const DenseTensor& y,
DenseTensor* z) {
auto blas = paddle::operators::math::GetBlas<DevCtx, T>(dev_ctx);
blas.VADD(x.numel(), x.data<T>(), y.data<T>(), z->mutable_data<T>());
blas.VADD(x.numel(),
x.data<T>(),
y.data<T>(),
z->mutable_data<T>(dev_ctx.GetPlace()));
}
};
......@@ -58,7 +61,7 @@ struct SameDimsAddFunctor<
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* z) {
z->mutable_data<T>();
z->mutable_data<T>(dev_ctx.GetPlace());
auto eigen_x = pten::EigenVector<T>::Flatten(x);
auto eigen_y = pten::EigenVector<T>::Flatten(y);
auto eigen_z = pten::EigenVector<T>::Flatten(*z);
......@@ -86,7 +89,10 @@ struct SameDimsSubtractFunctor<
const DenseTensor& y,
DenseTensor* z) {
auto blas = paddle::operators::math::GetBlas<DevCtx, T>(dev_ctx);
blas.VSUB(x.numel(), x.data<T>(), y.data<T>(), z->mutable_data<T>());
blas.VSUB(x.numel(),
x.data<T>(),
y.data<T>(),
z->mutable_data<T>(dev_ctx.GetPlace()));
}
};
......@@ -141,7 +147,10 @@ struct SameDimsDivideFunctor<
const DenseTensor& y,
DenseTensor* z) {
auto blas = paddle::operators::math::GetBlas<DevCtx, T>(dev_ctx);
blas.VDIV(x.numel(), x.data<T>(), y.data<T>(), z->mutable_data<T>());
blas.VDIV(x.numel(),
x.data<T>(),
y.data<T>(),
z->mutable_data<T>(dev_ctx.GetPlace()));
}
};
......@@ -164,7 +173,10 @@ struct SameDimsMultiplyFunctor<
const DenseTensor& y,
DenseTensor* z) {
auto blas = paddle::operators::math::GetBlas<DevCtx, T>(dev_ctx);
blas.VMUL(x.numel(), x.data<T>(), y.data<T>(), z->mutable_data<T>());
blas.VMUL(x.numel(),
x.data<T>(),
y.data<T>(),
z->mutable_data<T>(dev_ctx.GetPlace()));
}
};
......@@ -280,7 +292,7 @@ void CommonForwardBroadcastCPU(const DenseTensor& x,
PADDLE_ENFORCE_NOT_NULL(y_data,
paddle::platform::errors::InvalidArgument(
"The input Y should not be empty."));
OutType* out_data = z->mutable_data<OutType>();
OutType* out_data = z->mutable_data<OutType>(ctx.GetPlace());
const int out_size = std::accumulate(
out_dims_array, out_dims_array + max_dim, 1, std::multiplies<int>());
......@@ -361,7 +373,7 @@ void ElementwiseCompute(const CPUContext& dev_ctx,
int axis,
Functor func,
DenseTensor* z) {
z->mutable_data<OutType>();
z->mutable_data<OutType>(dev_ctx.GetPlace());
auto x_dims = x.dims();
auto y_dims = y.dims();
bool is_xsize_larger = true;
......
......@@ -37,7 +37,7 @@ namespace pten {
const DenseTensor& y, \
int axis, \
DenseTensor* out) { \
out->mutable_data<T>(); \
out->mutable_data<T>(dev_ctx.GetPlace()); \
if (x.dims() == y.dims()) { \
SameDimsElementwiseCompute<SameDims##name##Functor<CPUContext, T>>()( \
dev_ctx, x, y, out); \
......@@ -85,7 +85,7 @@ void DivideRawKernel(const Context& dev_ctx,
int axis,
DenseTensor* out) {
// allocate memory for out
out->mutable_data<T>();
out->mutable_data<T>(dev_ctx.GetPlace());
if (x.dims() == y.dims() && std::is_floating_point<T>::value) {
SameDimsElementwiseCompute<SameDimsDivideFunctor<CPUContext, T>>()(
dev_ctx, x, y, out);
......
......@@ -119,7 +119,7 @@ void GetShuffledInput(const DeviceContext& dev_ctx,
GetShuffledDim(input.dims(), &shuffled_dims, dims, &perm_axis);
shuffled_input->ResizeAndAllocate(shuffled_dims);
shuffled_input->mutable_data<OutT>();
shuffled_input->mutable_data<OutT>(dev_ctx.GetPlace());
pten::math::TransposeNormal<DeviceContext, OutT> trans;
trans(dev_ctx, input, shuffled_input, perm_axis);
......@@ -158,7 +158,7 @@ void ReduceKernelImpl(const DeviceContext& dev_ctx,
const std::vector<int64_t>& dims,
bool keep_dim,
bool reduce_all) {
output->mutable_data<OutT>();
output->mutable_data<OutT>(dev_ctx.GetPlace());
if (reduce_all) {
// Flatten and reduce 1-D tensor
......
......@@ -33,7 +33,7 @@ void ScaleKernel(const Context& dev_ctx,
bool bias_after_scale,
DenseTensor* out) {
// calc
out->mutable_data<T>();
out->mutable_data<T>(dev_ctx.GetPlace());
auto eigen_out = pten::EigenVector<T>::Flatten(*out);
auto eigen_x = pten::EigenVector<T>::Flatten(x);
auto& dev = *dev_ctx.eigen_device();
......
......@@ -29,7 +29,7 @@ void EmptyKernel(const Context& dev_ctx,
template <typename T, typename Context>
void EmptyLikeKernel(const Context& dev_ctx, DenseTensor* out) {
out->mutable_data<T>();
out->mutable_data<T>(dev_ctx.GetPlace());
}
} // namespace pten
......
......@@ -227,7 +227,7 @@ class TransformFunctor {
const bool is_xsize_larger = true)
: x_(x.data<T>()),
y_(y.data<T>()),
z_(z->mutable_data<OutType>()),
z_(z->mutable_data<OutType>(ctx.GetPlace())),
nx_(x.numel()),
ctx_(ctx),
func_(func),
......@@ -585,7 +585,7 @@ void ElementwiseCudaKernel(const KPDevice &ctx,
ins_data[i] = ins[i]->data<InT>();
}
for (int i = 0; i < NumOuts; ++i) {
outs_data[i] = (*outs)[i]->mutable_data<OutT>();
outs_data[i] = (*outs)[i]->mutable_data<OutT>(ctx.GetPlace());
}
#ifdef PADDLE_WITH_XPU2
int block_size = 64;
......
......@@ -36,7 +36,7 @@ struct TransposeNormal<CPUContext, T> {
auto in_stride = pten::framework::stride(in.dims());
auto out_stride = pten::framework::stride(out->dims());
const T* in_ptr = in.data<T>();
T* out_ptr = out->mutable_data<T>();
T* out_ptr = out->mutable_data<T>(dev_ctx.GetPlace());
auto transpose_helper = [&](int64_t beg, int64_t end) {
for (int64_t out_idx = beg; out_idx < end; ++out_idx) {
......@@ -63,11 +63,8 @@ DEFINE_CPU_TRANS_NORMAL(bool);
DEFINE_CPU_TRANS_NORMAL(int8_t);
DEFINE_CPU_TRANS_NORMAL(uint8_t);
DEFINE_CPU_TRANS_NORMAL(int16_t);
DEFINE_CPU_TRANS_NORMAL(uint16_t);
DEFINE_CPU_TRANS_NORMAL(int32_t);
DEFINE_CPU_TRANS_NORMAL(uint32_t);
DEFINE_CPU_TRANS_NORMAL(int64_t);
DEFINE_CPU_TRANS_NORMAL(uint64_t);
DEFINE_CPU_TRANS_NORMAL(float);
DEFINE_CPU_TRANS_NORMAL(double);
DEFINE_CPU_TRANS_NORMAL(paddle::platform::float16);
......
......@@ -61,7 +61,7 @@ struct TransposeNormal<GPUContext, T> {
auto in_stride = pten::framework::stride(in.dims());
auto out_stride = pten::framework::stride(out->dims());
auto* in_ptr = in.data<T>();
auto* out_ptr = out->mutable_data<T>();
auto* out_ptr = out->mutable_data<T>(dev_ctx.GetPlace());
// copy in_stride, out_stride, axis to gpu device
const paddle::platform::CUDAPlace& cuda_place = dev_ctx.GetPlace();
......@@ -110,11 +110,8 @@ DEFINE_GPU_TRANS_NORMAL(bool);
DEFINE_GPU_TRANS_NORMAL(int8_t);
DEFINE_GPU_TRANS_NORMAL(uint8_t);
DEFINE_GPU_TRANS_NORMAL(int16_t);
DEFINE_GPU_TRANS_NORMAL(uint16_t);
DEFINE_GPU_TRANS_NORMAL(int32_t);
DEFINE_GPU_TRANS_NORMAL(uint32_t);
DEFINE_GPU_TRANS_NORMAL(int64_t);
DEFINE_GPU_TRANS_NORMAL(uint64_t);
DEFINE_GPU_TRANS_NORMAL(float);
DEFINE_GPU_TRANS_NORMAL(double);
DEFINE_GPU_TRANS_NORMAL(paddle::platform::float16);
......
......@@ -43,7 +43,7 @@ void CastCUDAKernelImpl(const GPUContext& dev_ctx,
std::vector<DenseTensor*> outputs;
inputs.emplace_back(&x);
outputs.emplace_back(out);
out->mutable_data<OutT>();
out->mutable_data<OutT>(dev_ctx.GetPlace());
pten::funcs::LaunchSameDimsElementwiseCudaKernel<ElementwiseType::kUnary,
InT,
OutT>(
......
......@@ -43,7 +43,7 @@ void Copy(const Context& dev_ctx,
<< dst_place;
dst->ResizeAndAllocate(src.dims());
auto* dst_ptr = dst->mutable_data();
auto* dst_ptr = dst->mutable_data(dst_place);
if (src_ptr == dst_ptr && src_place == dst_place) {
VLOG(3) << "Skip copy the same data async from " << src_place << " to "
......
......@@ -29,7 +29,7 @@ void DotKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out) {
out->mutable_data<T>();
out->mutable_data<T>(dev_ctx.GetPlace());
if (1 == out->dims().size()) {
auto eigen_out = pten::EigenScalar<T>::From(*out);
auto eigen_x = pten::EigenVector<T>::Flatten(x);
......
......@@ -350,7 +350,7 @@ void LaunchKernel(const KPDevice &ctx,
pten::framework::Array<_ptr_ OutT *, NumOuts> outs_data;
for (int i = 0; i < NumOuts; ++i) {
outs_data[i] = (*outs)[i]->mutable_data<OutT>();
outs_data[i] = (*outs)[i]->mutable_data<OutT>(ctx.GetPlace());
}
for (int i = 0; i < Arity; i++) {
......
......@@ -47,7 +47,7 @@ namespace pten {
inputs.emplace_back(&x); \
inputs.emplace_back(&y); \
outputs.emplace_back(out); \
out->mutable_data<T>(); \
out->mutable_data<T>(dev_ctx.GetPlace()); \
LaunchElementwiseCudaKernel<ElementwiseType::kBinary, T, T>( \
dev_ctx, inputs, &outputs, axis, funcs::name##Functor<T>()); \
}
......
......@@ -328,7 +328,7 @@ struct ReduceConfig {
if (should_reduce_again) {
tmp->ResizeAndAllocate(pten::framework::make_ddim(
{static_cast<int64_t>(left_num * grid.z * grid.y * sizeof(Ty))}));
output_data = tmp->mutable_data<Ty>();
output_data = tmp->mutable_data<Ty>(place);
} else {
output_data = y_data;
}
......@@ -1032,7 +1032,7 @@ static
pten::framework::make_ddim(
{static_cast<int64_t>(temp_storage_bytes)})));
auto* temp_storage = tmp.mutable_data<uint8_t>();
auto* temp_storage = tmp.mutable_data<uint8_t>(place);
cub::DeviceReduce::Reduce(temp_storage,
temp_storage_bytes,
......@@ -1070,8 +1070,7 @@ void TensorReduceFunctorImpl(const pten::DenseTensor& x,
const TransformOp& transform,
const std::vector<int>& origin_reduce_dims,
gpuStream_t stream) {
// Allocate memory
y->mutable_data<Ty>();
y->mutable_data<Ty>(x.place());
auto x_dim = pten::framework::vectorize<int>(x.dims());
auto config = ReduceConfig<Ty>(origin_reduce_dims, x_dim);
......@@ -1088,7 +1087,7 @@ void TensorReduceFunctorImpl(const pten::DenseTensor& x,
pten::DenseTensorMeta(y->dtype(), tmp_ddim, y->layout()));
auto x_data = x.data<Tx>();
auto y_data = y->mutable_data<Ty>();
auto y_data = y->data<Ty>();
auto* dev_ctx = static_cast<paddle::platform::CUDADeviceContext*>(
paddle::platform::DeviceContextPool::Instance().Get(x.place()));
......
......@@ -54,7 +54,7 @@ void ScaleKernel(const Context& dev_ctx,
std::vector<DenseTensor*> outputs;
inputs.emplace_back(&x);
outputs.emplace_back(out);
out->mutable_data<T>();
out->mutable_data<T>(dev_ctx.GetPlace());
pten::funcs::LaunchSameDimsElementwiseCudaKernel<ElementwiseType::kUnary,
T,
T>(
......
......@@ -26,7 +26,7 @@ void ConjKernel(const Context& dev_ctx,
DenseTensor* out) {
auto numel = x.numel();
auto* x_data = x.data<T>();
auto* out_data = out->mutable_data<T>();
auto* out_data = out->mutable_data<T>(dev_ctx.GetPlace());
paddle::platform::ForRange<Context> for_range(dev_ctx, numel);
paddle::operators::math::ConjFunctor<T> functor(x_data, numel, out_data);
......
......@@ -73,7 +73,7 @@ struct DotGradFunction<DeviceContext,
auto dout = EigenMatrix<T>::From(*tensor_dout);
if (tensor_dx) {
tensor_dx->mutable_data<T>();
tensor_dx->mutable_data<T>(ctx.GetPlace());
auto y = EigenMatrix<T>::From(*tensor_y);
auto& dev = *ctx.eigen_device();
Eigen::DSizes<int, 2> size(1, tensor_dx->dims()[1]);
......@@ -85,7 +85,7 @@ struct DotGradFunction<DeviceContext,
}
if (tensor_dy) {
tensor_dy->mutable_data<T>();
tensor_dy->mutable_data<T>(ctx.GetPlace());
auto x = EigenMatrix<T>::From(*tensor_x);
auto& dev = *ctx.eigen_device();
Eigen::DSizes<int, 2> size(1, tensor_dy->dims()[1]);
......@@ -100,7 +100,7 @@ struct DotGradFunction<DeviceContext,
const auto* data_dout = tensor_dout->data<T>();
if (tensor_dx) {
auto* data_dx = tensor_dx->mutable_data<T>();
auto* data_dx = tensor_dx->mutable_data<T>(ctx.GetPlace());
const auto* data_y = tensor_y->data<T>();
const DDim& dim = tensor_x->dims();
size_t N = static_cast<size_t>(pten::framework::product(dim));
......@@ -115,7 +115,7 @@ struct DotGradFunction<DeviceContext,
}
if (tensor_dy) {
auto* data_dy = tensor_dy->mutable_data<T>();
auto* data_dy = tensor_dy->mutable_data<T>(ctx.GetPlace());
const auto* data_x = tensor_x->data<T>();
const DDim& dim = tensor_y->dims();
size_t N = static_cast<size_t>(pten::framework::product(dim));
......@@ -164,7 +164,7 @@ struct DotGradFunction<DeviceContext,
auto dout = EigenMatrix<T>::From(*tensor_dout);
if (tensor_dx) {
tensor_dx->mutable_data<T>();
tensor_dx->mutable_data<T>(ctx.GetPlace());
auto y = EigenMatrix<T>::From(*tensor_y);
auto dx = EigenMatrix<T>::From(*tensor_dx);
auto& dev = *ctx.eigen_device();
......@@ -173,7 +173,7 @@ struct DotGradFunction<DeviceContext,
}
if (tensor_dy) {
tensor_dy->mutable_data<T>();
tensor_dy->mutable_data<T>(ctx.GetPlace());
auto x = EigenMatrix<T>::From(*tensor_x);
auto dy = EigenMatrix<T>::From(*tensor_dy);
auto& dev = *ctx.eigen_device();
......@@ -189,7 +189,7 @@ struct DotGradFunction<DeviceContext,
auto const B = d[d.size() - 1];
if (tensor_dx) {
auto* dx = tensor_dx->mutable_data<T>();
auto* dx = tensor_dx->mutable_data<T>(ctx.GetPlace());
for (auto j = 0; j < N / B; ++j) {
auto const ss = dz[j];
for (auto i = 0; i < B; ++i) *dx++ = *y++ * ss;
......@@ -197,7 +197,7 @@ struct DotGradFunction<DeviceContext,
}
if (tensor_dy) {
auto* dy = tensor_dy->mutable_data<T>();
auto* dy = tensor_dy->mutable_data<T>(ctx.GetPlace());
for (auto j = 0; j < N / B; ++j) {
auto const ss = dz[j];
for (auto i = 0; i < B; i++) *dy++ = *x++ * ss;
......@@ -272,7 +272,7 @@ struct DotDoubleGradFunction<DeviceContext,
const auto* data_dout = tensor_dout->data<T>();
if (tensor_dx) {
auto* data_dx = tensor_dx->mutable_data<T>();
auto* data_dx = tensor_dx->mutable_data<T>(ctx.GetPlace());
const auto* data_ddy = tensor_ddy->data<T>();
const DDim& dim = tensor_dx->dims();
size_t N = static_cast<size_t>(product(dim));
......@@ -287,7 +287,7 @@ struct DotDoubleGradFunction<DeviceContext,
}
if (tensor_dy) {
auto* data_dy = tensor_dy->mutable_data<T>();
auto* data_dy = tensor_dy->mutable_data<T>(ctx.GetPlace());
const auto* data_ddx = tensor_ddx->data<T>();
const DDim& dim = tensor_dy->dims();
size_t N = static_cast<size_t>(product(dim));
......@@ -302,7 +302,7 @@ struct DotDoubleGradFunction<DeviceContext,
}
if (tensor_ddout) {
auto* data_ddout = tensor_ddout->mutable_data<T>();
auto* data_ddout = tensor_ddout->mutable_data<T>(ctx.GetPlace());
auto* data_x = tensor_x->data<T>();
auto* data_y = tensor_y->data<T>();
auto* data_ddx = tensor_ddx->data<T>();
......@@ -351,7 +351,7 @@ struct DotDoubleGradFunction<DeviceContext,
auto& dev = *ctx.eigen_device();
auto dout = EigenVector<T>::Flatten(*tensor_dout);
if (tensor_dx) {
tensor_dx->mutable_data<T>();
tensor_dx->mutable_data<T>(ctx.GetPlace());
auto ddy = EigenVector<T>::Flatten(*tensor_ddy);
Eigen::DSizes<int, 1> size(tensor_ddy->numel());
auto dx = EigenVector<T>::Flatten(*tensor_dx);
......@@ -359,7 +359,7 @@ struct DotDoubleGradFunction<DeviceContext,
}
if (tensor_dy) {
tensor_dy->mutable_data<T>();
tensor_dy->mutable_data<T>(ctx.GetPlace());
auto ddx = EigenVector<T>::Flatten(*tensor_ddx);
Eigen::DSizes<int, 1> size(tensor_ddx->numel());
......@@ -368,7 +368,7 @@ struct DotDoubleGradFunction<DeviceContext,
}
if (tensor_ddout) {
tensor_ddout->mutable_data<T>();
tensor_ddout->mutable_data<T>(ctx.GetPlace());
auto x = EigenVector<T>::Flatten(*tensor_x);
auto y = EigenVector<T>::Flatten(*tensor_y);
auto ddx = EigenVector<T>::Flatten(*tensor_ddx);
......@@ -381,7 +381,7 @@ struct DotDoubleGradFunction<DeviceContext,
const auto* data_dout = tensor_dout->data<T>();
if (tensor_dx) {
auto* data_dx = tensor_dx->mutable_data<T>();
auto* data_dx = tensor_dx->mutable_data<T>(ctx.GetPlace());
const auto* data_ddy = tensor_ddy->data<T>();
const DDim& dim = tensor_dx->dims();
size_t N = static_cast<size_t>(product(dim));
......@@ -396,7 +396,7 @@ struct DotDoubleGradFunction<DeviceContext,
}
if (tensor_dy) {
auto* data_dy = tensor_dy->mutable_data<T>();
auto* data_dy = tensor_dy->mutable_data<T>(ctx.GetPlace());
const auto* data_ddx = tensor_ddx->data<T>();
const DDim& dim = tensor_dy->dims();
size_t N = static_cast<size_t>(product(dim));
......@@ -411,7 +411,7 @@ struct DotDoubleGradFunction<DeviceContext,
}
if (tensor_ddout) {
auto* data_ddout = tensor_ddout->mutable_data<T>();
auto* data_ddout = tensor_ddout->mutable_data<T>(ctx.GetPlace());
auto* data_x = tensor_x->data<T>();
auto* data_y = tensor_y->data<T>();
auto* data_ddx = tensor_ddx->data<T>();
......@@ -552,7 +552,7 @@ struct DotTripleGradFunction<DeviceContext,
const auto* data_d_ddout = in_tensor_d_ddout->data<T>();
if (out_tensor_d_x) {
auto* data_d_x = out_tensor_d_x->mutable_data<T>();
auto* data_d_x = out_tensor_d_x->mutable_data<T>(ctx.GetPlace());
const auto* data_ddy = in_tensor_ddy->data<T>();
const DDim& dim = out_tensor_d_x->dims();
......@@ -567,7 +567,7 @@ struct DotTripleGradFunction<DeviceContext,
}
if (out_tensor_d_y) {
auto* data_d_y = out_tensor_d_y->mutable_data<T>();
auto* data_d_y = out_tensor_d_y->mutable_data<T>(ctx.GetPlace());
const auto* data_ddx = in_tensor_ddx->data<T>();
const DDim& dim = out_tensor_d_y->dims();
......@@ -582,7 +582,7 @@ struct DotTripleGradFunction<DeviceContext,
}
if (out_tensor_d_dout) {
auto* data_d_dout = out_tensor_d_dout->mutable_data<T>();
auto* data_d_dout = out_tensor_d_dout->mutable_data<T>(ctx.GetPlace());
auto* data_ddx = in_tensor_ddx->data<T>();
auto* data_ddy = in_tensor_ddy->data<T>();
auto* data_d_dx = in_tensor_d_dx->data<T>();
......@@ -613,7 +613,7 @@ struct DotTripleGradFunction<DeviceContext,
}
if (out_tensor_d_ddx) {
auto* data_d_ddx = out_tensor_d_ddx->mutable_data<T>();
auto* data_d_ddx = out_tensor_d_ddx->mutable_data<T>(ctx.GetPlace());
auto* data_dout = in_tensor_dout->data<T>();
auto* data_d_dy = in_tensor_d_dy->data<T>();
auto* data_y = in_tensor_y->data<T>();
......@@ -633,7 +633,7 @@ struct DotTripleGradFunction<DeviceContext,
}
if (out_tensor_d_ddy) {
auto* data_d_ddy = out_tensor_d_ddy->mutable_data<T>();
auto* data_d_ddy = out_tensor_d_ddy->mutable_data<T>(ctx.GetPlace());
auto* data_dout = in_tensor_dout->data<T>();
auto* data_d_dx = in_tensor_d_dx->data<T>();
auto* data_x = in_tensor_x->data<T>();
......@@ -678,7 +678,7 @@ struct DotTripleGradFunction<DeviceContext,
auto& dev = *ctx.eigen_device();
auto d_ddout = EigenVector<T>::Flatten(*in_tensor_d_ddout);
if (out_tensor_d_x) {
out_tensor_d_x->mutable_data<T>();
out_tensor_d_x->mutable_data<T>(ctx.GetPlace());
auto ddy = EigenVector<T>::Flatten(*in_tensor_ddy);
Eigen::DSizes<int, 1> size(in_tensor_ddy->numel());
auto d_x = EigenVector<T>::Flatten(*out_tensor_d_x);
......@@ -686,7 +686,7 @@ struct DotTripleGradFunction<DeviceContext,
}
if (out_tensor_d_y) {
out_tensor_d_y->mutable_data<T>();
out_tensor_d_y->mutable_data<T>(ctx.GetPlace());
auto ddx = EigenVector<T>::Flatten(*in_tensor_ddx);
Eigen::DSizes<int, 1> size(in_tensor_ddx->numel());
......@@ -695,7 +695,7 @@ struct DotTripleGradFunction<DeviceContext,
}
if (out_tensor_d_dout) {
out_tensor_d_dout->mutable_data<T>();
out_tensor_d_dout->mutable_data<T>(ctx.GetPlace());
auto ddx = EigenVector<T>::Flatten(*in_tensor_ddx);
auto ddy = EigenVector<T>::Flatten(*in_tensor_ddy);
auto d_dx = EigenVector<T>::Flatten(*in_tensor_d_dx);
......@@ -705,7 +705,7 @@ struct DotTripleGradFunction<DeviceContext,
}
if (out_tensor_d_ddx) {
out_tensor_d_ddx->mutable_data<T>();
out_tensor_d_ddx->mutable_data<T>(ctx.GetPlace());
auto dout = EigenVector<T>::Flatten(*in_tensor_dout);
auto y = EigenVector<T>::Flatten(*in_tensor_y);
auto d_ddout = EigenVector<T>::Flatten(*in_tensor_d_ddout);
......@@ -717,7 +717,7 @@ struct DotTripleGradFunction<DeviceContext,
}
if (out_tensor_d_ddy) {
out_tensor_d_ddy->mutable_data<T>();
out_tensor_d_ddy->mutable_data<T>(ctx.GetPlace());
auto dout = EigenVector<T>::Flatten(*in_tensor_dout);
auto x = EigenVector<T>::Flatten(*in_tensor_x);
auto d_ddout = EigenVector<T>::Flatten(*in_tensor_d_ddout);
......@@ -732,7 +732,7 @@ struct DotTripleGradFunction<DeviceContext,
const auto* data_d_ddout = in_tensor_d_ddout->data<T>();
if (out_tensor_d_x) {
auto* data_d_x = out_tensor_d_x->mutable_data<T>();
auto* data_d_x = out_tensor_d_x->mutable_data<T>(ctx.GetPlace());
const auto* data_ddy = in_tensor_ddy->data<T>();
const DDim& dim = out_tensor_d_x->dims();
......@@ -747,7 +747,7 @@ struct DotTripleGradFunction<DeviceContext,
}
if (out_tensor_d_y) {
auto* data_d_y = out_tensor_d_y->mutable_data<T>();
auto* data_d_y = out_tensor_d_y->mutable_data<T>(ctx.GetPlace());
const auto* data_ddx = in_tensor_ddx->data<T>();
const DDim& dim = out_tensor_d_y->dims();
......@@ -762,7 +762,7 @@ struct DotTripleGradFunction<DeviceContext,
}
if (out_tensor_d_dout) {
auto* data_d_dout = out_tensor_d_dout->mutable_data<T>();
auto* data_d_dout = out_tensor_d_dout->mutable_data<T>(ctx.GetPlace());
auto* data_ddx = in_tensor_ddx->data<T>();
auto* data_ddy = in_tensor_ddy->data<T>();
auto* data_d_dx = in_tensor_d_dx->data<T>();
......@@ -790,7 +790,7 @@ struct DotTripleGradFunction<DeviceContext,
}
if (out_tensor_d_ddx) {
auto* data_d_ddx = out_tensor_d_ddx->mutable_data<T>();
auto* data_d_ddx = out_tensor_d_ddx->mutable_data<T>(ctx.GetPlace());
auto* data_dout = in_tensor_dout->data<T>();
auto* data_d_dy = in_tensor_d_dy->data<T>();
auto* data_y = in_tensor_y->data<T>();
......@@ -809,7 +809,7 @@ struct DotTripleGradFunction<DeviceContext,
}
if (out_tensor_d_ddy) {
auto* data_d_ddy = out_tensor_d_ddy->mutable_data<T>();
auto* data_d_ddy = out_tensor_d_ddy->mutable_data<T>(ctx.GetPlace());
auto* data_dout = in_tensor_dout->data<T>();
auto* data_d_dx = in_tensor_d_dx->data<T>();
auto* data_x = in_tensor_x->data<T>();
......@@ -838,10 +838,10 @@ void DotGradKernel(const Context& dev_ctx,
DenseTensor* dx,
DenseTensor* dy) {
if (dx) {
dx->mutable_data<T>();
dx->mutable_data<T>(dev_ctx.GetPlace());
}
if (dy) {
dy->mutable_data<T>();
dy->mutable_data<T>(dev_ctx.GetPlace());
}
DotGradFunction<Context, T>()(dev_ctx, &x, &y, &dout, dx, dy);
}
......@@ -857,13 +857,13 @@ void DotDoubleGradKernel(const Context& dev_ctx,
DenseTensor* dy,
DenseTensor* ddout) {
if (dx) {
dx->mutable_data<T>();
dx->mutable_data<T>(dev_ctx.GetPlace());
}
if (dy) {
dy->mutable_data<T>();
dy->mutable_data<T>(dev_ctx.GetPlace());
}
if (ddout) {
ddout->mutable_data<T>();
ddout->mutable_data<T>(dev_ctx.GetPlace());
}
DotDoubleGradFunction<Context, T>()(
dev_ctx, &x, &y, &dout, ddx, ddy, dx, dy, ddout);
......@@ -885,19 +885,19 @@ void DotTripleGradKernel(const Context& dev_ctx,
DenseTensor* d_ddy,
DenseTensor* d_dout) {
if (d_x) {
d_x->mutable_data<T>();
d_x->mutable_data<T>(dev_ctx.GetPlace());
}
if (d_y) {
d_y->mutable_data<T>();
d_y->mutable_data<T>(dev_ctx.GetPlace());
}
if (d_ddx) {
d_ddx->mutable_data<T>();
d_ddx->mutable_data<T>(dev_ctx.GetPlace());
}
if (d_ddy) {
d_ddy->mutable_data<T>();
d_ddy->mutable_data<T>(dev_ctx.GetPlace());
}
if (d_dout) {
d_dout->mutable_data<T>();
d_dout->mutable_data<T>(dev_ctx.GetPlace());
}
DotTripleGradFunction<Context, T>()(dev_ctx,
......
......@@ -26,7 +26,7 @@ namespace pten {
template <typename T, typename Context, typename VType>
void FullValue(const Context& dev_ctx, DenseTensor* tensor, VType val) {
tensor->mutable_data<T>();
tensor->mutable_data<T>(dev_ctx.GetPlace());
auto t = pten::EigenVector<T>::Flatten(*tensor);
t.device(*dev_ctx.eigen_device()) = t.constant(static_cast<T>(val));
}
......
......@@ -105,7 +105,7 @@ void MatMul(const Context& dev_ctx,
bool trans_b,
DenseTensor* out,
bool flag = false) {
out->mutable_data<T>();
out->mutable_data<T>(dev_ctx.GetPlace());
auto blas = paddle::operators::math::GetBlas<Context, T>(dev_ctx);
auto mat_dim_a =
paddle::operators::math::CreateMatrixDescriptor(a.dims(), 0, trans_a);
......@@ -123,7 +123,7 @@ void MatMul(const Context& dev_ctx,
b.data<T>(),
mat_dim_b,
static_cast<T>(1),
out->mutable_data<T>(),
out->data<T>(),
static_cast<T>(flag));
}
......@@ -242,8 +242,8 @@ void MatmulGradKernel(const Context& dev_ctx,
// Case1 : x's or y's dim = 1
if (x_ndim == 1 && y_ndim == 1) {
if (dx) dx->mutable_data<T>();
if (dy) dy->mutable_data<T>();
if (dx) dx->mutable_data<T>(dev_ctx.GetPlace());
if (dy) dy->mutable_data<T>(dev_ctx.GetPlace());
if (out_grad.numel() == 1) {
DotGradFunction<Context, T>()(dev_ctx, &x, &y, &out_grad, dx, dy);
return;
......
......@@ -118,7 +118,7 @@ void MatMulFunction(const Context& dev_ctx,
N));
VLOG(3) << "MatMul's case 1";
Out->Resize({1});
Out->mutable_data<T>();
Out->mutable_data<T>(dev_ctx.GetPlace());
blas.GEMM(CblasNoTrans,
CblasTrans,
1,
......@@ -128,7 +128,7 @@ void MatMulFunction(const Context& dev_ctx,
y_data,
x_data,
static_cast<T>(flag),
Out->mutable_data<T>());
Out->data<T>());
return;
}
......@@ -165,7 +165,7 @@ void MatMulFunction(const Context& dev_ctx,
out_dims.back() = y_dims.back();
}
Out->ResizeAndAllocate(pten::framework::make_ddim(out_dims));
Out->mutable_data<T>();
Out->mutable_data<T>(dev_ctx.GetPlace());
if (trans_y) {
const int M = Y.numel() / N;
VLOG(3) << "MatMul's case 2";
......@@ -176,7 +176,7 @@ void MatMulFunction(const Context& dev_ctx,
y_data,
x_data,
static_cast<T>(flag),
Out->mutable_data<T>());
Out->data<T>());
} else {
const int M = y_dims[y_ndim - 1];
const int batch_size = Y.numel() / (M * N);
......@@ -189,7 +189,7 @@ void MatMulFunction(const Context& dev_ctx,
y_data,
x_data,
static_cast<T>(flag),
Out->mutable_data<T>());
Out->data<T>());
} else {
VLOG(3) << "MatMul's case 4";
blas.BatchedGEMM(CblasTrans,
......@@ -201,7 +201,7 @@ void MatMulFunction(const Context& dev_ctx,
y_data,
x_data,
static_cast<T>(flag),
Out->mutable_data<T>(),
Out->data<T>(),
batch_size,
M * N,
0);
......@@ -243,7 +243,7 @@ void MatMulFunction(const Context& dev_ctx,
std::copy_n(x_dims.cbegin(), x_ndim - 1, out_dims.begin());
}
Out->ResizeAndAllocate(pten::framework::make_ddim(out_dims));
Out->mutable_data<T>();
Out->mutable_data<T>(dev_ctx.GetPlace());
if (trans_x) {
const int M = x_dims[x_ndim - 1];
......@@ -257,7 +257,7 @@ void MatMulFunction(const Context& dev_ctx,
x_data,
y_data,
static_cast<T>(flag),
Out->mutable_data<T>());
Out->data<T>());
} else {
VLOG(3) << "MatMul's case 6";
blas.BatchedGEMM(CblasTrans,
......@@ -269,7 +269,7 @@ void MatMulFunction(const Context& dev_ctx,
x_data,
y_data,
static_cast<T>(flag),
Out->mutable_data<T>(),
Out->data<T>(),
batch_size,
M * N,
0);
......@@ -284,7 +284,7 @@ void MatMulFunction(const Context& dev_ctx,
x_data,
y_data,
static_cast<T>(flag),
Out->mutable_data<T>());
Out->data<T>());
}
return;
}
......@@ -331,7 +331,7 @@ void MatMulFunction(const Context& dev_ctx,
out_broadcast_dims[ndim - 1] = N;
Out->ResizeAndAllocate(pten::framework::make_ddim(out_broadcast_dims));
Out->mutable_data<T>();
Out->mutable_data<T>(dev_ctx.GetPlace());
const int batch_dim = ndim - 2;
// broadcast message
......@@ -367,7 +367,7 @@ void MatMulFunction(const Context& dev_ctx,
x_data,
y_data,
static_cast<T>(flag),
Out->mutable_data<T>());
Out->data<T>());
} else if (x_batch_size == 1) {
if (M == 1 && trans_y) {
VLOG(3) << "MatMul's case 9";
......@@ -378,7 +378,7 @@ void MatMulFunction(const Context& dev_ctx,
y_data,
x_data,
static_cast<T>(flag),
Out->mutable_data<T>());
Out->data<T>());
} else {
VLOG(3) << "MatMul's case 10";
blas.BatchedGEMM(trans_x ? CblasTrans : CblasNoTrans,
......@@ -390,7 +390,7 @@ void MatMulFunction(const Context& dev_ctx,
x_data,
y_data,
static_cast<T>(flag),
Out->mutable_data<T>(),
Out->data<T>(),
out_batch_size,
0,
K * N);
......@@ -407,7 +407,7 @@ void MatMulFunction(const Context& dev_ctx,
x_data,
y_data,
static_cast<T>(flag),
Out->mutable_data<T>());
Out->data<T>());
} else {
VLOG(3) << "MatMul's case 12";
blas.BatchedGEMM(CblasTrans,
......@@ -419,7 +419,7 @@ void MatMulFunction(const Context& dev_ctx,
x_data,
y_data,
static_cast<T>(flag),
Out->mutable_data<T>(),
Out->data<T>(),
out_batch_size,
M * K,
0);
......@@ -435,7 +435,7 @@ void MatMulFunction(const Context& dev_ctx,
x_data,
y_data,
static_cast<T>(flag),
Out->mutable_data<T>(),
Out->data<T>(),
out_batch_size,
M * K,
K * N);
......@@ -454,7 +454,7 @@ void MatMulFunction(const Context& dev_ctx,
x_ptr[i] = x_data + x_index * M * K;
y_ptr[i] = y_data + y_index * K * N;
out_ptr[i] = Out->mutable_data<T>() + i * M * N;
out_ptr[i] = Out->data<T>() + i * M * N;
IndexIncreaseFromDims(batch_dim, out_broadcast_dims.data(), index.data());
}
VLOG(3) << "MatMul's case 14";
......
......@@ -26,7 +26,7 @@ template <typename T, typename Context>
void SignKernel(const Context& dev_ctx,
const DenseTensor& x,
DenseTensor* out) {
out->mutable_data<T>();
out->mutable_data<T>(dev_ctx.GetPlace());
auto eigen_out = pten::EigenVector<T>::Flatten(*out);
auto eigen_x = pten::EigenVector<T>::Flatten(x);
......
......@@ -27,12 +27,15 @@ void ReshapeKernel(const Context& dev_ctx,
const ScalarArray& shape,
DenseTensor* out) {
auto out_meta = InferMetaFromVecValue(x.meta(), shape.GetData());
if (x.data() == out->data() && x.numel() == out->numel()) {
if (x.initialized() && x.Holder() == out->Holder()) {
out->ResizeAndAllocate(out_meta.dims);
return;
}
out->Resize(x.dims());
out->mutable_data(x.place());
pten::Copy(dev_ctx, x, false, out);
out->ResizeAndAllocate(out_meta.dims);
out->Resize(out_meta.dims);
out->ResetLoD(x.lod());
}
......
......@@ -30,7 +30,7 @@ void Copy(const Context& dev_ctx,
bool blocking,
DenseTensor* dst) {
auto* src_ptr = src.data();
auto* dst_ptr = dst->mutable_data();
auto* dst_ptr = dst->mutable_data(dev_ctx.GetPlace());
const auto& src_place = src.place();
const auto& dst_place = dst->place();
......
......@@ -37,7 +37,8 @@ TEST(API, cast) {
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 4}),
pten::DataLayout::NCHW));
auto* dense_x_data = dense_x->mutable_data<float>();
auto* dense_x_data =
dense_x->mutable_data<float>(paddle::platform::CPUPlace());
for (int i = 0; i < dense_x->numel(); i++) {
dense_x_data[i] = i;
......
......@@ -37,7 +37,8 @@ TEST(API, conj) {
pten::DenseTensorMeta(pten::DataType::COMPLEX64,
framework::make_ddim({3, 10}),
pten::DataLayout::NCHW));
auto* dense_x_data = dense_x->mutable_data<paddle::complex64>();
auto* dense_x_data =
dense_x->mutable_data<paddle::complex64>(paddle::platform::CPUPlace());
for (size_t i = 0; i < 3; ++i) {
for (size_t j = 0; j < 10; ++j) {
......
......@@ -37,14 +37,16 @@ TEST(API, dot) {
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 10}),
pten::DataLayout::NCHW));
auto* dense_x_data = dense_x->mutable_data<float>();
auto* dense_x_data =
dense_x->mutable_data<float>(paddle::platform::CPUPlace());
auto dense_y = std::make_shared<pten::DenseTensor>(
alloc.get(),
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 10}),
pten::DataLayout::NCHW));
auto* dense_y_data = dense_y->mutable_data<float>();
auto* dense_y_data =
dense_y->mutable_data<float>(paddle::platform::CPUPlace());
float sum[3] = {0.0, 0.0, 0.0};
for (size_t i = 0; i < 3; ++i) {
......
......@@ -37,14 +37,16 @@ TEST(API, add) {
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 10}),
pten::DataLayout::NCHW));
auto* dense_x_data = dense_x->mutable_data<float>();
auto* dense_x_data =
dense_x->mutable_data<float>(paddle::platform::CPUPlace());
auto dense_y = std::make_shared<pten::DenseTensor>(
alloc.get(),
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({10}),
pten::DataLayout::NCHW));
auto* dense_y_data = dense_y->mutable_data<float>();
auto* dense_y_data =
dense_y->mutable_data<float>(paddle::platform::CPUPlace());
float sum[3][10] = {0.0};
for (size_t i = 0; i < 3; ++i) {
......@@ -91,14 +93,16 @@ TEST(API, subtract) {
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 10}),
pten::DataLayout::NCHW));
auto* dense_x_data = dense_x->mutable_data<float>();
auto* dense_x_data =
dense_x->mutable_data<float>(paddle::platform::CPUPlace());
auto dense_y = std::make_shared<pten::DenseTensor>(
alloc.get(),
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({10}),
pten::DataLayout::NCHW));
auto* dense_y_data = dense_y->mutable_data<float>();
auto* dense_y_data =
dense_y->mutable_data<float>(paddle::platform::CPUPlace());
float sub[3][10] = {0.0};
for (size_t i = 0; i < 3; ++i) {
......@@ -145,14 +149,16 @@ TEST(API, divide) {
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 10}),
pten::DataLayout::NCHW));
auto* dense_x_data = dense_x->mutable_data<float>();
auto* dense_x_data =
dense_x->mutable_data<float>(paddle::platform::CPUPlace());
auto dense_y = std::make_shared<pten::DenseTensor>(
alloc.get(),
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({10}),
pten::DataLayout::NCHW));
auto* dense_y_data = dense_y->mutable_data<float>();
auto* dense_y_data =
dense_y->mutable_data<float>(paddle::platform::CPUPlace());
float div[3][10] = {0.0};
for (size_t i = 0; i < 3; ++i) {
......@@ -199,14 +205,16 @@ TEST(API, multiply) {
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 10}),
pten::DataLayout::NCHW));
auto* dense_x_data = dense_x->mutable_data<float>();
auto* dense_x_data =
dense_x->mutable_data<float>(paddle::platform::CPUPlace());
auto dense_y = std::make_shared<pten::DenseTensor>(
alloc.get(),
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({10}),
pten::DataLayout::NCHW));
auto* dense_y_data = dense_y->mutable_data<float>();
auto* dense_y_data =
dense_y->mutable_data<float>(paddle::platform::CPUPlace());
float mul[3][10] = {0.0};
for (size_t i = 0; i < 3; ++i) {
......
......@@ -47,10 +47,8 @@ TEST(API, empty_like) {
ASSERT_EQ(out.dims().size(), 2);
ASSERT_EQ(out.dims()[0], 3);
ASSERT_EQ(out.numel(), 6);
ASSERT_EQ(out.is_cpu(), true);
ASSERT_EQ(out.type(), pten::DataType::FLOAT32);
ASSERT_EQ(out.layout(), pten::DataLayout::NCHW);
ASSERT_EQ(out.initialized(), true);
}
TEST(API, empty1) {
......@@ -63,7 +61,8 @@ TEST(API, empty1) {
pten::DenseTensorMeta(pten::DataType::INT64,
framework::make_ddim({2}),
pten::DataLayout::NCHW));
auto* shape_data = dense_shape->mutable_data<int64_t>();
auto* shape_data =
dense_shape->mutable_data<int64_t>(paddle::platform::CPUPlace());
shape_data[0] = 2;
shape_data[1] = 3;
......@@ -76,10 +75,8 @@ TEST(API, empty1) {
ASSERT_EQ(out.shape().size(), 2UL);
ASSERT_EQ(out.shape()[0], 2);
ASSERT_EQ(out.numel(), 6);
ASSERT_EQ(out.is_cpu(), true);
ASSERT_EQ(out.type(), pten::DataType::FLOAT32);
ASSERT_EQ(out.layout(), pten::DataLayout::NCHW);
ASSERT_EQ(out.initialized(), true);
}
TEST(API, empty2) {
......@@ -91,7 +88,7 @@ TEST(API, empty2) {
pten::DenseTensorMeta(pten::DataType::INT32,
framework::make_ddim({1}),
pten::DataLayout::NCHW));
dense_scalar->mutable_data<int32_t>()[0] = 2;
dense_scalar->mutable_data<int32_t>(paddle::platform::CPUPlace())[0] = 2;
paddle::experimental::Tensor shape_scalar1(dense_scalar);
paddle::experimental::Tensor shape_scalar2(dense_scalar);
......@@ -103,10 +100,8 @@ TEST(API, empty2) {
ASSERT_EQ(out.shape().size(), 2UL);
ASSERT_EQ(out.shape()[0], 2);
ASSERT_EQ(out.numel(), 4);
ASSERT_EQ(out.is_cpu(), true);
ASSERT_EQ(out.type(), pten::DataType::FLOAT32);
ASSERT_EQ(out.layout(), pten::DataLayout::NCHW);
ASSERT_EQ(out.initialized(), true);
}
TEST(API, empty3) {
......@@ -117,10 +112,8 @@ TEST(API, empty3) {
ASSERT_EQ(out.shape().size(), 2UL);
ASSERT_EQ(out.shape()[0], 2);
ASSERT_EQ(out.numel(), 6);
ASSERT_EQ(out.is_cpu(), true);
ASSERT_EQ(out.type(), pten::DataType::INT32);
ASSERT_EQ(out.layout(), pten::DataLayout::NCHW);
ASSERT_EQ(out.initialized(), true);
}
} // namespace tests
......
......@@ -37,7 +37,8 @@ TEST(API, full_like) {
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 2}),
pten::DataLayout::NCHW));
auto* dense_x_data = dense_x->mutable_data<float>();
auto* dense_x_data =
dense_x->mutable_data<float>(paddle::platform::CPUPlace());
dense_x_data[0] = 0;
float val = 1.0;
......@@ -72,7 +73,8 @@ TEST(API, zeros_like) {
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 2}),
pten::DataLayout::NCHW));
auto* dense_x_data = dense_x->mutable_data<float>();
auto* dense_x_data =
dense_x->mutable_data<float>(paddle::platform::CPUPlace());
dense_x_data[0] = 1;
paddle::experimental::Tensor x(dense_x);
......@@ -105,7 +107,8 @@ TEST(API, ones_like) {
pten::DenseTensorMeta(pten::DataType::INT32,
framework::make_ddim({3, 2}),
pten::DataLayout::NCHW));
auto* dense_x_data = dense_x->mutable_data<int32_t>();
auto* dense_x_data =
dense_x->mutable_data<int32_t>(paddle::platform::CPUPlace());
dense_x_data[0] = 0;
paddle::experimental::Tensor x(dense_x);
......@@ -139,7 +142,8 @@ TEST(API, full1) {
pten::DenseTensorMeta(pten::DataType::INT64,
framework::make_ddim({2}),
pten::DataLayout::NCHW));
auto* shape_data = dense_shape->mutable_data<int64_t>();
auto* shape_data =
dense_shape->mutable_data<int64_t>(paddle::platform::CPUPlace());
shape_data[0] = 2;
shape_data[1] = 3;
......@@ -148,7 +152,7 @@ TEST(API, full1) {
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({1}),
pten::DataLayout::NCHW));
dense_scalar->mutable_data<float>()[0] = 1.0;
dense_scalar->mutable_data<float>(paddle::platform::CPUPlace())[0] = 1.0;
paddle::experimental::Tensor value(dense_scalar);
......@@ -185,7 +189,7 @@ TEST(API, full2) {
pten::DenseTensorMeta(pten::DataType::INT32,
framework::make_ddim({1}),
pten::DataLayout::NCHW));
dense_scalar->mutable_data<int32_t>()[0] = 2;
dense_scalar->mutable_data<int>(paddle::platform::CPUPlace())[0] = 2;
paddle::experimental::Tensor shape_scalar1(dense_scalar);
paddle::experimental::Tensor shape_scalar2(dense_scalar);
......
......@@ -37,7 +37,8 @@ TEST(API, flatten) {
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 2, 2, 3}),
pten::DataLayout::NCHW));
auto* dense_x_data = dense_x->mutable_data<float>();
auto* dense_x_data =
dense_x->mutable_data<float>(paddle::platform::CPUPlace());
for (int i = 0; i < dense_x->numel(); i++) {
dense_x_data[i] = i;
......
......@@ -38,14 +38,16 @@ TEST(API, matmul_cpu) {
framework::make_ddim({3, 3}),
pten::DataLayout::NCHW));
auto* dense_x_data = dense_x->mutable_data<float>();
auto* dense_x_data =
dense_x->mutable_data<float>(paddle::platform::CPUPlace());
auto dense_y = std::make_shared<pten::DenseTensor>(
alloc.get(),
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 3}),
pten::DataLayout::NCHW));
auto* dense_y_data = dense_y->mutable_data<float>();
auto* dense_y_data =
dense_y->mutable_data<float>(paddle::platform::CPUPlace());
for (size_t i = 0; i < 9; ++i) {
dense_x_data[i] = 1.0;
......@@ -87,14 +89,14 @@ TEST(API, matmul_cuda) {
framework::make_ddim({3, 3}),
pten::DataLayout::NCHW));
auto* ref_x_data = ref_x->mutable_data<float>();
auto* ref_x_data = ref_x->mutable_data<float>(paddle::platform::CPUPlace());
auto ref_y = std::make_shared<pten::DenseTensor>(
alloc_cpu.get(),
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 3}),
pten::DataLayout::NCHW));
auto* ref_y_data = ref_y->mutable_data<float>();
auto* ref_y_data = ref_y->mutable_data<float>(paddle::platform::CPUPlace());
for (size_t i = 0; i < 9; ++i) {
ref_x_data[i] = 1.0;
......
......@@ -37,7 +37,8 @@ TEST(API, mean) {
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 4}),
pten::DataLayout::NCHW));
auto* dense_x_data = dense_x->mutable_data<float>();
auto* dense_x_data =
dense_x->mutable_data<float>(paddle::platform::CPUPlace());
float sum = 0.0;
for (size_t i = 0; i < 12; ++i) {
......
......@@ -58,11 +58,11 @@ void TestAPIPlace() {
std::vector<int64_t> tensor_shape = {5, 5};
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
auto t1 = experimental::Tensor(paddle::PlaceType::kGPU, tensor_shape);
t1.mutable_data<float>();
t1.mutable_data<float>(paddle::PlaceType::kGPU);
CHECK((paddle::PlaceType::kGPU == t1.place()));
#endif
auto t2 = experimental::Tensor(paddle::PlaceType::kCPU, tensor_shape);
t2.mutable_data<float>();
t2.mutable_data<float>(paddle::PlaceType::kCPU);
CHECK((paddle::PlaceType::kCPU == t2.place()));
}
......@@ -80,29 +80,30 @@ void TestAPISlice() {
std::vector<int64_t> tensor_shape_sub2 = {1, 5, 5};
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
auto t1 = experimental::Tensor(paddle::PlaceType::kGPU, tensor_shape_origin1);
t1.mutable_data<float>();
t1.mutable_data<float>(paddle::PlaceType::kGPU);
CHECK(t1.slice(0, 5).shape() == tensor_shape_origin1);
CHECK(t1.slice(0, 3).shape() == tensor_shape_sub1);
auto t2 = experimental::Tensor(paddle::PlaceType::kGPU, tensor_shape_origin2);
t2.mutable_data<float>();
t2.mutable_data<float>(paddle::PlaceType::kGPU);
CHECK(t2.slice(4, 5).shape() == tensor_shape_sub2);
#endif
auto t3 = experimental::Tensor(paddle::PlaceType::kCPU, tensor_shape_origin1);
t3.mutable_data<float>();
t3.mutable_data<float>(paddle::PlaceType::kCPU);
CHECK(t3.slice(0, 5).shape() == tensor_shape_origin1);
CHECK(t3.slice(0, 3).shape() == tensor_shape_sub1);
auto t4 = experimental::Tensor(paddle::PlaceType::kCPU, tensor_shape_origin2);
t4.mutable_data<float>();
t4.mutable_data<float>(paddle::PlaceType::kCPU);
CHECK(t4.slice(4, 5).shape() == tensor_shape_sub2);
// Test writing function for sliced tensor
auto t = InitCPUTensorForTest<float>();
auto t_sliced = t.slice(0, 1);
auto* t_sliced_data_ptr = t_sliced.mutable_data<float>();
auto* t_sliced_data_ptr =
t_sliced.mutable_data<float>(paddle::PlaceType::kCPU);
for (int64_t i = 0; i < t_sliced.size(); i++) {
t_sliced_data_ptr[i] += static_cast<float>(5);
}
auto* t_data_ptr = t.mutable_data<float>();
auto* t_data_ptr = t.mutable_data<float>(paddle::PlaceType::kCPU);
for (int64_t i = 0; i < t_sliced.size(); i++) {
CHECK_EQ(t_data_ptr[i], static_cast<float>(10));
}
......@@ -112,7 +113,7 @@ template <typename T>
paddle::DataType TestDtype() {
std::vector<int64_t> tensor_shape = {5, 5};
auto t1 = experimental::Tensor(paddle::PlaceType::kCPU, tensor_shape);
t1.template mutable_data<T>();
t1.template mutable_data<T>(paddle::PlaceType::kCPU);
return t1.type();
}
......@@ -120,13 +121,13 @@ template <typename T>
void TestCast(paddle::DataType data_type) {
std::vector<int64_t> tensor_shape = {5, 5};
auto t1 = experimental::Tensor(paddle::PlaceType::kCPU, tensor_shape);
t1.template mutable_data<T>();
t1.template mutable_data<T>(paddle::PlaceType::kCPU);
auto t2 = t1.cast(data_type);
CHECK(t2.type() == data_type);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
auto tg1 = experimental::Tensor(paddle::PlaceType::kGPU);
tg1.reshape(tensor_shape);
tg1.template mutable_data<T>();
tg1.template mutable_data<T>(paddle::PlaceType::kGPU);
auto tg2 = tg1.cast(data_type);
CHECK(tg2.type() == data_type);
#endif
......@@ -194,7 +195,7 @@ void GroupTestDtype() {
void TestInitilized() {
experimental::Tensor test_tensor(paddle::PlaceType::kCPU, {1, 1});
CHECK(test_tensor.is_initialized() == false);
test_tensor.mutable_data<float>();
test_tensor.mutable_data<float>(paddle::PlaceType::kCPU);
CHECK(test_tensor.is_initialized() == true);
float* tensor_data = test_tensor.mutable_data<float>();
for (int i = 0; i < test_tensor.size(); i++) {
......
......@@ -37,7 +37,8 @@ TEST(API, reshape) {
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 2, 2, 3}),
pten::DataLayout::NCHW));
auto* dense_x_data = dense_x->mutable_data<float>();
auto* dense_x_data =
dense_x->mutable_data<float>(paddle::platform::CPUPlace());
for (int i = 0; i < dense_x->numel(); i++) {
dense_x_data[i] = i;
......@@ -69,14 +70,15 @@ TEST(API, reshape) {
TEST(Tensor, old_reshape) {
paddle::experimental::Tensor x(paddle::PlaceType::kCPU);
x.reshape({3, 4});
x.mutable_data<float>(paddle::PlaceType::kCPU);
ASSERT_EQ(x.shape()[0], 3);
ASSERT_EQ(x.shape()[1], 4);
ASSERT_EQ(x.numel(), 12);
ASSERT_EQ(x.is_cpu(), true);
ASSERT_EQ(x.type(), pten::DataType::UNDEFINED);
ASSERT_EQ(x.type(), pten::DataType::FLOAT32);
ASSERT_EQ(x.layout(), pten::DataLayout::NCHW);
ASSERT_EQ(x.initialized(), false);
ASSERT_EQ(x.initialized(), true);
}
} // namespace tests
......
......@@ -37,7 +37,8 @@ TEST(API, sum) {
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 4}),
pten::DataLayout::NCHW));
auto* dense_x_data = dense_x->mutable_data<float>();
auto* dense_x_data =
dense_x->mutable_data<float>(paddle::platform::CPUPlace());
float sum = 0.0;
for (size_t i = 0; i < 12; ++i) {
......
......@@ -35,7 +35,8 @@ paddle::experimental::Tensor CreateInputTensor() {
pten::DenseTensorMeta(pten::DataType::INT64,
framework::make_ddim({3, 4}),
pten::DataLayout::NCHW));
auto* dense_x_data = dense_x->mutable_data<int64_t>();
auto* dense_x_data =
dense_x->mutable_data<int64_t>(paddle::platform::CPUPlace());
for (int64_t i = 0; i < 12; ++i) {
dense_x_data[i] = i;
......
......@@ -112,8 +112,6 @@ TEST(dense_tensor, resize) {
CHECK_EQ(tensor_0.capacity(), 2u);
tensor_0.ResizeAndAllocate({1, 2, 3});
CHECK_EQ(tensor_0.capacity(), 6u);
tensor_0.mutable_data<int8_t>();
CHECK_EQ(tensor_0.capacity(), 6u);
}
TEST(dense_tensor, shallow_copy) {
......
......@@ -38,7 +38,8 @@ TEST(DEV_API, cast) {
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 4}),
pten::DataLayout::NCHW));
auto* dense_x_data = dense_x.mutable_data<float>();
auto* dense_x_data =
dense_x.mutable_data<float>(paddle::platform::CPUPlace());
float sum = 0.0;
for (size_t i = 0; i < 12; ++i) {
......
......@@ -37,7 +37,8 @@ TEST(DEV_API, conj) {
framework::make_ddim({3, 4}),
pten::DataLayout::NCHW));
auto* dense_x_data = dense_x.mutable_data<paddle::complex64>();
auto* dense_x_data =
dense_x.mutable_data<paddle::complex64>(paddle::platform::CPUPlace());
for (size_t i = 0; i < 12; ++i) {
dense_x_data[i] = paddle::complex64(i * 1.0, i * 1.0);
}
......
......@@ -39,7 +39,8 @@ TEST(DEV_API, copy) {
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({2, 3}),
pten::DataLayout::NCHW));
auto* dense_x_data = dense_src->mutable_data<float>();
auto* dense_x_data =
dense_src->mutable_data<float>(paddle::platform::CPUPlace());
auto dense_dst = std::make_shared<pten::DenseTensor>(
alloc.get(),
......
......@@ -52,7 +52,8 @@ TEST(DEV_API, empty_like) {
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 2}),
pten::DataLayout::NCHW));
auto* dense_x_data = dense_x.mutable_data<float>();
auto* dense_x_data =
dense_x.mutable_data<float>(paddle::platform::CPUPlace());
dense_x_data[0] = 0;
// 2. test API
......@@ -96,7 +97,8 @@ TEST(DEV_API, full_like) {
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 2}),
pten::DataLayout::NCHW));
auto* dense_x_data = dense_x.mutable_data<float>();
auto* dense_x_data =
dense_x.mutable_data<float>(paddle::platform::CPUPlace());
dense_x_data[0] = 0;
float val = 1.0;
......
......@@ -36,13 +36,15 @@ TEST(DEV_API, dot) {
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 10}),
pten::DataLayout::NCHW));
auto* dense_x_data = dense_x.mutable_data<float>();
auto* dense_x_data =
dense_x.mutable_data<float>(paddle::platform::CPUPlace());
pten::DenseTensor dense_y(alloc.get(),
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 10}),
pten::DataLayout::NCHW));
auto* dense_y_data = dense_y.mutable_data<float>();
auto* dense_y_data =
dense_y.mutable_data<float>(paddle::platform::CPUPlace());
float sum[3] = {0.0, 0.0, 0.0};
for (size_t i = 0; i < 3; ++i) {
......
......@@ -36,13 +36,15 @@ TEST(DEV_API, add) {
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 10}),
pten::DataLayout::NCHW));
auto* dense_x_data = dense_x.mutable_data<float>();
auto* dense_x_data =
dense_x.mutable_data<float>(paddle::platform::CPUPlace());
pten::DenseTensor dense_y(alloc.get(),
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({10}),
pten::DataLayout::NCHW));
auto* dense_y_data = dense_y.mutable_data<float>();
auto* dense_y_data =
dense_y.mutable_data<float>(paddle::platform::CPUPlace());
float sum[3][10] = {0.0};
for (size_t i = 0; i < 3; ++i) {
......@@ -82,13 +84,15 @@ TEST(DEV_API, subtract) {
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 10}),
pten::DataLayout::NCHW));
auto* dense_x_data = dense_x.mutable_data<float>();
auto* dense_x_data =
dense_x.mutable_data<float>(paddle::platform::CPUPlace());
pten::DenseTensor dense_y(alloc.get(),
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({10}),
pten::DataLayout::NCHW));
auto* dense_y_data = dense_y.mutable_data<float>();
auto* dense_y_data =
dense_y.mutable_data<float>(paddle::platform::CPUPlace());
float sub[3][10] = {0.0};
for (size_t i = 0; i < 3; ++i) {
......@@ -128,13 +132,15 @@ TEST(DEV_API, divide) {
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 10}),
pten::DataLayout::NCHW));
auto* dense_x_data = dense_x.mutable_data<float>();
auto* dense_x_data =
dense_x.mutable_data<float>(paddle::platform::CPUPlace());
pten::DenseTensor dense_y(alloc.get(),
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({10}),
pten::DataLayout::NCHW));
auto* dense_y_data = dense_y.mutable_data<float>();
auto* dense_y_data =
dense_y.mutable_data<float>(paddle::platform::CPUPlace());
float div[3][10] = {0.0};
for (size_t i = 0; i < 3; ++i) {
......@@ -174,13 +180,15 @@ TEST(DEV_API, multiply) {
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 10}),
pten::DataLayout::NCHW));
auto* dense_x_data = dense_x.mutable_data<float>();
auto* dense_x_data =
dense_x.mutable_data<float>(paddle::platform::CPUPlace());
pten::DenseTensor dense_y(alloc.get(),
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({10}),
pten::DataLayout::NCHW));
auto* dense_y_data = dense_y.mutable_data<float>();
auto* dense_y_data =
dense_y.mutable_data<float>(paddle::platform::CPUPlace());
float mul[3][10] = {0.0};
for (size_t i = 0; i < 3; ++i) {
......
......@@ -47,7 +47,8 @@ TEST(DEV_API, flatten) {
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 2, 2, 3}),
pten::DataLayout::NCHW));
auto* dense_x_data = dense_x.mutable_data<float>();
auto* dense_x_data =
dense_x.mutable_data<float>(paddle::platform::CPUPlace());
for (int i = 0; i < dense_x.numel(); i++) {
dense_x_data[i] = i;
......
......@@ -36,13 +36,15 @@ TEST(DEV_API, dot) {
framework::make_ddim({3, 3}),
pten::DataLayout::NCHW));
auto* dense_x_data = dense_x.mutable_data<float>();
auto* dense_x_data =
dense_x.mutable_data<float>(paddle::platform::CPUPlace());
DenseTensor dense_y(alloc.get(),
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 3}),
pten::DataLayout::NCHW));
auto* dense_y_data = dense_y.mutable_data<float>();
auto* dense_y_data =
dense_y.mutable_data<float>(paddle::platform::CPUPlace());
for (size_t i = 0; i < 9; ++i) {
dense_x_data[i] = 1.0;
......
......@@ -35,7 +35,8 @@ TEST(DEV_API, mean) {
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 4}),
pten::DataLayout::NCHW));
auto* dense_x_data = dense_x.mutable_data<float>();
auto* dense_x_data =
dense_x.mutable_data<float>(paddle::platform::CPUPlace());
float sum = 0.0;
for (size_t i = 0; i < 12; ++i) {
......
......@@ -37,7 +37,8 @@ TEST(DEV_API, reshape) {
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 2, 2, 3}),
pten::DataLayout::NCHW));
auto* dense_x_data = dense_x.mutable_data<float>();
auto* dense_x_data =
dense_x.mutable_data<float>(paddle::platform::CPUPlace());
for (int i = 0; i < dense_x.numel(); i++) {
dense_x_data[i] = i;
......
......@@ -36,7 +36,8 @@ TEST(DEV_API, scale) {
framework::make_ddim({3, 4}),
pten::DataLayout::NCHW));
auto* dense_x_data = dense_x.mutable_data<float>();
auto* dense_x_data =
dense_x.mutable_data<float>(paddle::platform::CPUPlace());
for (size_t i = 0; i < 12; ++i) {
dense_x_data[i] = i * 1.0;
}
......@@ -68,7 +69,8 @@ TEST(DEV_API, scale_host) {
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 4}),
pten::DataLayout::NCHW));
auto* dense_x_data = dense_x.mutable_data<float>();
auto* dense_x_data =
dense_x.mutable_data<float>(paddle::platform::CPUPlace());
for (size_t i = 0; i < 12; ++i) {
dense_x_data[i] = i * 1.0;
}
......@@ -77,7 +79,7 @@ TEST(DEV_API, scale_host) {
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({1}),
pten::DataLayout::NCHW));
scale.mutable_data<float>()[0] = 2;
scale.data<float>()[0] = 2;
float bias = 1;
bool bias_after_scale = true;
......
......@@ -35,7 +35,8 @@ TEST(DEV_API, sum) {
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 4}),
pten::DataLayout::NCHW));
auto* dense_x_data = dense_x.mutable_data<float>();
auto* dense_x_data =
dense_x.mutable_data<float>(paddle::platform::CPUPlace());
float sum = 0.0;
for (size_t i = 0; i < 12; ++i) {
......
......@@ -137,7 +137,9 @@ std::vector<paddle::Tensor> AttrTestForward(
PD_DISPATCH_FLOATING_TYPES(
x.type(), "assign_cpu_kernel", ([&] {
assign_cpu_kernel<data_t>(
x.data<data_t>(), out.mutable_data<data_t>(), x.size());
x.data<data_t>(),
out.mutable_data<data_t>(paddle::PlaceType::kCPU),
x.size());
}));
// Check attrs value
......@@ -175,12 +177,13 @@ std::vector<paddle::Tensor> AttrTestBackward(
const std::vector<std::string>& str_vec_attr) {
auto grad_x = paddle::Tensor(paddle::PlaceType::kCPU, grad_out.shape());
PD_DISPATCH_FLOATING_TYPES(grad_out.type(), "assign_cpu_kernel", ([&] {
assign_cpu_kernel<data_t>(
grad_out.data<data_t>(),
grad_x.mutable_data<data_t>(),
grad_out.size());
}));
PD_DISPATCH_FLOATING_TYPES(
grad_out.type(), "assign_cpu_kernel", ([&] {
assign_cpu_kernel<data_t>(
grad_out.data<data_t>(),
grad_x.mutable_data<data_t>(paddle::PlaceType::kCPU),
grad_out.size());
}));
CheckAllBackwardAttrs(int_attr, float_vec_attr, str_vec_attr);
......@@ -203,7 +206,9 @@ std::vector<paddle::Tensor> ConstAttrTestForward(
PD_DISPATCH_FLOATING_TYPES(
x.type(), "assign_cpu_kernel", ([&] {
assign_cpu_kernel<data_t>(
x.data<data_t>(), out.mutable_data<data_t>(), x.size());
x.data<data_t>(),
out.mutable_data<data_t>(paddle::PlaceType::kCPU),
x.size());
}));
// Check attrs value
......@@ -241,12 +246,13 @@ std::vector<paddle::Tensor> ConstAttrTestBackward(
const std::vector<std::string>& str_vec_attr) {
auto grad_x = paddle::Tensor(paddle::PlaceType::kCPU, grad_out.shape());
PD_DISPATCH_FLOATING_TYPES(grad_out.type(), "assign_cpu_kernel", ([&] {
assign_cpu_kernel<data_t>(
grad_out.data<data_t>(),
grad_x.mutable_data<data_t>(),
grad_out.size());
}));
PD_DISPATCH_FLOATING_TYPES(
grad_out.type(), "assign_cpu_kernel", ([&] {
assign_cpu_kernel<data_t>(
grad_out.data<data_t>(),
grad_x.mutable_data<data_t>(paddle::PlaceType::kCPU),
grad_out.size());
}));
CheckAllBackwardAttrs(int_attr, float_vec_attr, str_vec_attr);
......
......@@ -47,7 +47,7 @@ void ConcatCpuKernel(const std::vector<paddle::Tensor>& ins,
int64_t out_cols = 0;
auto ins_cols = GetCols(ins, out_rows, &out_cols);
auto* out_data = out->mutable_data<data_t>();
auto* out_data = out->mutable_data<data_t>(paddle::PlaceType::kCPU);
int64_t col_idx = 0;
for (size_t i = 0; i < num; ++i) {
int64_t col_len = ins_cols[i];
......@@ -76,7 +76,9 @@ void SplitCpuKernel(const paddle::Tensor& in,
int64_t col_idx = 0;
for (size_t j = 0; j < num; ++j) {
int64_t col_len = out_cols[j];
auto* out_data = outs->at(j).mutable_data<data_t>() + i * col_len;
auto* out_data =
outs->at(j).mutable_data<data_t>(paddle::PlaceType::kCPU) +
i * col_len;
std::memcpy(out_data, in_data + col_idx, sizeof(data_t) * col_len);
col_idx += col_len;
}
......
......@@ -76,7 +76,9 @@ std::vector<paddle::Tensor> ConjFunction(const paddle::Tensor& x) {
PD_DISPATCH_FLOATING_AND_COMPLEX_TYPES(
x.type(), "ConjCPUKernel", ([&] {
ConjCPUKernel<data_t>(
x.data<data_t>(), x.size(), out.mutable_data<data_t>());
x.data<data_t>(),
x.size(),
out.mutable_data<data_t>(paddle::PlaceType::kCPU));
}));
return {out};
......
......@@ -32,7 +32,9 @@ std::vector<paddle::Tensor> DispatchTestInterger(const paddle::Tensor& x) {
PD_DISPATCH_INTEGRAL_TYPES(
x.type(), "assign_cpu_kernel", ([&] {
assign_cpu_kernel<data_t>(
x.data<data_t>(), out.mutable_data<data_t>(), x.size());
x.data<data_t>(),
out.mutable_data<data_t>(paddle::PlaceType::kCPU),
x.size());
}));
return {out};
......@@ -50,7 +52,9 @@ std::vector<paddle::Tensor> DispatchTestFloatAndInteger(
PD_DISPATCH_FLOATING_AND_INTEGRAL_TYPES(
x.type(), "assign_cpu_kernel", ([&] {
assign_cpu_kernel<data_t>(
x.data<data_t>(), out.mutable_data<data_t>(), x.size());
x.data<data_t>(),
out.mutable_data<data_t>(paddle::PlaceType::kCPU),
x.size());
}));
return {out};
......@@ -67,7 +71,9 @@ std::vector<paddle::Tensor> DispatchTestComplex(const paddle::Tensor& x) {
PD_DISPATCH_COMPLEX_TYPES(
x.type(), "assign_cpu_kernel", ([&] {
assign_cpu_kernel<data_t>(
x.data<data_t>(), out.mutable_data<data_t>(), x.size());
x.data<data_t>(),
out.mutable_data<data_t>(paddle::PlaceType::kCPU),
x.size());
}));
return {out};
......@@ -85,7 +91,9 @@ std::vector<paddle::Tensor> DispatchTestFloatAndComplex(
PD_DISPATCH_FLOATING_AND_COMPLEX_TYPES(
x.type(), "assign_cpu_kernel", ([&] {
assign_cpu_kernel<data_t>(
x.data<data_t>(), out.mutable_data<data_t>(), x.size());
x.data<data_t>(),
out.mutable_data<data_t>(paddle::PlaceType::kCPU),
x.size());
}));
return {out};
......@@ -103,7 +111,9 @@ std::vector<paddle::Tensor> DispatchTestFloatAndIntegerAndComplex(
PD_DISPATCH_FLOATING_AND_INTEGRAL_AND_COMPLEX_TYPES(
x.type(), "assign_cpu_kernel", ([&] {
assign_cpu_kernel<data_t>(
x.data<data_t>(), out.mutable_data<data_t>(), x.size());
x.data<data_t>(),
out.mutable_data<data_t>(paddle::PlaceType::kCPU),
x.size());
}));
return {out};
......@@ -120,7 +130,9 @@ std::vector<paddle::Tensor> DispatchTestFloatAndHalf(const paddle::Tensor& x) {
PD_DISPATCH_FLOATING_AND_HALF_TYPES(
x.type(), "assign_cpu_kernel", ([&] {
assign_cpu_kernel<data_t>(
x.data<data_t>(), out.mutable_data<data_t>(), x.size());
x.data<data_t>(),
out.mutable_data<data_t>(paddle::PlaceType::kCPU),
x.size());
}));
return {out};
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册