未验证 提交 34a957e3 编写于 作者: R Ruibiao Chen 提交者: GitHub

Replace mutable_data with DeviceContext.Alloc in phi kernels (#48500)

* Replace mutable_data with DeviceContext.Alloc in phi kernels

* Fix CI errors

* Fix CI errors

* Fix CI errors, test=kunlun

* Fix CI errors, test=kunlun

* Handle rnn_functor

* Update approvals
上级 d1e2ba8a
...@@ -50,7 +50,7 @@ math_library(tree2col DEPS math_function) ...@@ -50,7 +50,7 @@ math_library(tree2col DEPS math_function)
cc_test( cc_test(
selected_rows_functor_test selected_rows_functor_test
SRCS selected_rows_functor_test.cc SRCS selected_rows_functor_test.cc
DEPS selected_rows_functor) DEPS allocator selected_rows_functor)
cc_test( cc_test(
im2col_test im2col_test
SRCS im2col_test.cc SRCS im2col_test.cc
......
...@@ -15,11 +15,15 @@ limitations under the License. */ ...@@ -15,11 +15,15 @@ limitations under the License. */
#include "paddle/phi/kernels/funcs/selected_rows_functor.h" #include "paddle/phi/kernels/funcs/selected_rows_functor.h"
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "paddle/fluid/memory/allocation/allocator_facade.h"
#include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/math_function.h"
TEST(selected_rows_functor, cpu_add) { TEST(selected_rows_functor, cpu_add) {
paddle::platform::CPUPlace cpu_place; paddle::platform::CPUPlace cpu_place;
phi::CPUContext ctx(cpu_place); phi::CPUContext ctx(cpu_place);
ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(cpu_place)
.get());
phi::funcs::SetConstant<phi::CPUContext, float> functor; phi::funcs::SetConstant<phi::CPUContext, float> functor;
int64_t height = 10; int64_t height = 10;
int64_t row_numel = 10; int64_t row_numel = 10;
...@@ -109,6 +113,9 @@ TEST(selected_rows_functor, cpu_add) { ...@@ -109,6 +113,9 @@ TEST(selected_rows_functor, cpu_add) {
TEST(selected_rows_functor, cpu_add_to) { TEST(selected_rows_functor, cpu_add_to) {
paddle::platform::CPUPlace cpu_place; paddle::platform::CPUPlace cpu_place;
phi::CPUContext ctx(cpu_place); phi::CPUContext ctx(cpu_place);
ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(cpu_place)
.get());
phi::funcs::SetConstant<phi::CPUContext, float> functor; phi::funcs::SetConstant<phi::CPUContext, float> functor;
int64_t height = 10; int64_t height = 10;
int64_t row_numel = 10; int64_t row_numel = 10;
...@@ -198,6 +205,9 @@ TEST(selected_rows_functor, cpu_add_to) { ...@@ -198,6 +205,9 @@ TEST(selected_rows_functor, cpu_add_to) {
TEST(selected_rows_functor, cpu_merge_average_float) { TEST(selected_rows_functor, cpu_merge_average_float) {
paddle::platform::CPUPlace cpu_place; paddle::platform::CPUPlace cpu_place;
phi::CPUContext ctx(cpu_place); phi::CPUContext ctx(cpu_place);
ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(cpu_place)
.get());
phi::funcs::SetConstant<phi::CPUContext, float> functor; phi::funcs::SetConstant<phi::CPUContext, float> functor;
int64_t height = 10; int64_t height = 10;
int64_t row_numel = 10; int64_t row_numel = 10;
...@@ -233,6 +243,9 @@ TEST(selected_rows_functor, cpu_merge_average_float) { ...@@ -233,6 +243,9 @@ TEST(selected_rows_functor, cpu_merge_average_float) {
TEST(selected_rows_functor, cpu_merge_add_float) { TEST(selected_rows_functor, cpu_merge_add_float) {
paddle::platform::CPUPlace cpu_place; paddle::platform::CPUPlace cpu_place;
phi::CPUContext ctx(cpu_place); phi::CPUContext ctx(cpu_place);
ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(cpu_place)
.get());
phi::funcs::SetConstant<phi::CPUContext, float> functor; phi::funcs::SetConstant<phi::CPUContext, float> functor;
int64_t height = 10; int64_t height = 10;
int64_t row_numel = 10; int64_t row_numel = 10;
...@@ -269,6 +282,9 @@ TEST(selected_rows_functor, cpu_merge_add_float) { ...@@ -269,6 +282,9 @@ TEST(selected_rows_functor, cpu_merge_add_float) {
TEST(selected_rows_functor, cpu_merge_add_int) { TEST(selected_rows_functor, cpu_merge_add_int) {
paddle::platform::CPUPlace cpu_place; paddle::platform::CPUPlace cpu_place;
phi::CPUContext ctx(cpu_place); phi::CPUContext ctx(cpu_place);
ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(cpu_place)
.get());
phi::funcs::SetConstant<phi::CPUContext, int> functor; phi::funcs::SetConstant<phi::CPUContext, int> functor;
int64_t height = 10; int64_t height = 10;
int64_t row_numel = 10; int64_t row_numel = 10;
...@@ -305,6 +321,9 @@ TEST(selected_rows_functor, cpu_merge_add_int) { ...@@ -305,6 +321,9 @@ TEST(selected_rows_functor, cpu_merge_add_int) {
TEST(selected_rows_functor, cpu_merge_add_multi) { TEST(selected_rows_functor, cpu_merge_add_multi) {
paddle::platform::CPUPlace cpu_place; paddle::platform::CPUPlace cpu_place;
phi::CPUContext ctx(cpu_place); phi::CPUContext ctx(cpu_place);
ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(cpu_place)
.get());
phi::funcs::SetConstant<phi::CPUContext, float> set_const; phi::funcs::SetConstant<phi::CPUContext, float> set_const;
int64_t height = 10; int64_t height = 10;
...@@ -354,6 +373,9 @@ TEST(selected_rows_functor, cpu_merge_add_multi) { ...@@ -354,6 +373,9 @@ TEST(selected_rows_functor, cpu_merge_add_multi) {
TEST(selected_rows_functor, cpu_merge_add_multi_noduplicated) { TEST(selected_rows_functor, cpu_merge_add_multi_noduplicated) {
paddle::platform::CPUPlace cpu_place; paddle::platform::CPUPlace cpu_place;
phi::CPUContext ctx(cpu_place); phi::CPUContext ctx(cpu_place);
ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(cpu_place)
.get());
phi::funcs::SetConstant<phi::CPUContext, float> set_const; phi::funcs::SetConstant<phi::CPUContext, float> set_const;
int64_t height = 10; int64_t height = 10;
...@@ -409,6 +431,9 @@ TEST(selected_rows_functor, cpu_merge_add_multi_noduplicated) { ...@@ -409,6 +431,9 @@ TEST(selected_rows_functor, cpu_merge_add_multi_noduplicated) {
TEST(selected_rows_functor, cpu_sum_to) { TEST(selected_rows_functor, cpu_sum_to) {
paddle::platform::CPUPlace cpu_place; paddle::platform::CPUPlace cpu_place;
phi::CPUContext ctx(cpu_place); phi::CPUContext ctx(cpu_place);
ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(cpu_place)
.get());
phi::funcs::SetConstant<phi::CPUContext, float> functor; phi::funcs::SetConstant<phi::CPUContext, float> functor;
int64_t height = 10; int64_t height = 10;
int64_t row_numel = 10; int64_t row_numel = 10;
......
...@@ -163,7 +163,7 @@ std::unique_ptr<DeviceContext> CreateDeviceContext( ...@@ -163,7 +163,7 @@ std::unique_ptr<DeviceContext> CreateDeviceContext(
if (!disable_setting_default_stream_for_allocator) { if (!disable_setting_default_stream_for_allocator) {
instance.SetDefaultStream(CUDAPlace(p.GetDeviceId()), cuda_ctx->stream()); instance.SetDefaultStream(CUDAPlace(p.GetDeviceId()), cuda_ctx->stream());
} }
dev_ctx->SetAllocator(instance.GetAllocator(p).get()); dev_ctx->SetAllocator(instance.GetAllocator(p, cuda_ctx->stream()).get());
dev_ctx->SetPinnedAllocator( dev_ctx->SetPinnedAllocator(
instance.GetAllocator(paddle::platform::CUDAPinnedPlace()).get()); instance.GetAllocator(paddle::platform::CUDAPinnedPlace()).get());
......
...@@ -148,7 +148,7 @@ struct DeviceContext::Impl { ...@@ -148,7 +148,7 @@ struct DeviceContext::Impl {
if (tensor->initialized() && tensor->place() != place) { if (tensor->initialized() && tensor->place() != place) {
ClearHolder(tensor); ClearHolder(tensor);
} }
auto* allocator = tensor->numel() == 0 auto* allocator = tensor->numel() == 0 && requested_size == 0
? zero_allocator_ ? zero_allocator_
: (pinned ? pinned_allocator_ : device_allocator_); : (pinned ? pinned_allocator_ : device_allocator_);
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
......
...@@ -44,7 +44,7 @@ void ConcatKernel(const Context& dev_ctx, ...@@ -44,7 +44,7 @@ void ConcatKernel(const Context& dev_ctx,
phi::DDim out_dims = phi::funcs::ComputeAndCheckShape(true, x_dims, axis); phi::DDim out_dims = phi::funcs::ComputeAndCheckShape(true, x_dims, axis);
out->Resize(out_dims); out->Resize(out_dims);
out->mutable_data<T>(dev_ctx.GetPlace()); dev_ctx.template Alloc<T>(out);
// If axis is 0, the lod of the output is not the same as inputs. // If axis is 0, the lod of the output is not the same as inputs.
if (axis == 0 && x[0]->lod().size() > 0) { if (axis == 0 && x[0]->lod().size() > 0) {
......
...@@ -90,13 +90,11 @@ ElementwiseAddGrad(const CPUContext& ctx, ...@@ -90,13 +90,11 @@ ElementwiseAddGrad(const CPUContext& ctx,
int axis = -1) { int axis = -1) {
auto blas = phi::funcs::GetBlas<CPUContext, T>(ctx); auto blas = phi::funcs::GetBlas<CPUContext, T>(ctx);
if (dx) { if (dx) {
blas.VCOPY( blas.VCOPY(dout.numel(), dout.data<T>(), ctx.template Alloc<T>(dx));
dout.numel(), dout.data<T>(), dx->mutable_data<T>(ctx.GetPlace()));
} }
if (dy) { if (dy) {
blas.VCOPY( blas.VCOPY(dout.numel(), dout.data<T>(), ctx.template Alloc<T>(dy));
dout.numel(), dout.data<T>(), dy->mutable_data<T>(ctx.GetPlace()));
} }
} }
......
...@@ -34,7 +34,7 @@ void HistogramKernel(const Context& dev_ctx, ...@@ -34,7 +34,7 @@ void HistogramKernel(const Context& dev_ctx,
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
auto input_numel = input.numel(); auto input_numel = input.numel();
int64_t* out_data = output->mutable_data<int64_t>(dev_ctx.GetPlace()); int64_t* out_data = dev_ctx.template Alloc<int64_t>(output);
phi::funcs::SetConstant<Context, int64_t>()( phi::funcs::SetConstant<Context, int64_t>()(
dev_ctx, output, static_cast<int64_t>(0)); dev_ctx, output, static_cast<int64_t>(0));
......
...@@ -27,7 +27,8 @@ void MaskedSelectGradKernel(const Context& dev_ctx, ...@@ -27,7 +27,8 @@ void MaskedSelectGradKernel(const Context& dev_ctx,
DenseTensor* x_grad) { DenseTensor* x_grad) {
auto* mask_data = mask.data<bool>(); auto* mask_data = mask.data<bool>();
auto* input_data = out_grad.data<T>(); auto* input_data = out_grad.data<T>();
auto* out_data = x_grad->mutable_data<T>(dev_ctx.GetPlace());
auto* out_data = dev_ctx.template Alloc<T>(x_grad);
int mask_size = mask.numel(); int mask_size = mask.numel();
int index = 0; int index = 0;
......
...@@ -48,7 +48,8 @@ void MaskedSelectKernel(const Context& dev_ctx, ...@@ -48,7 +48,8 @@ void MaskedSelectKernel(const Context& dev_ctx,
DDim out_dim{out_size}; DDim out_dim{out_size};
out->Resize(out_dim); out->Resize(out_dim);
auto out_data = out->mutable_data<T>(phi::CPUPlace());
auto out_data = dev_ctx.template HostAlloc<T>(out);
int index = 0; int index = 0;
for (int i = 0; i < mask_size; i++) { for (int i = 0; i < mask_size; i++) {
......
...@@ -58,7 +58,7 @@ void PutAlongAxisGradKernel(const Context& dev_ctx, ...@@ -58,7 +58,7 @@ void PutAlongAxisGradKernel(const Context& dev_ctx,
if (value_grad) { if (value_grad) {
value_grad->Resize(index.dims()); value_grad->Resize(index.dims());
value_grad->mutable_data<T>(dev_ctx.GetPlace()); dev_ctx.template Alloc<T>(value_grad);
if (index_type == DataType::INT32) { if (index_type == DataType::INT32) {
paddle::operators::cpu_gather_kernel<T, int32_t>( paddle::operators::cpu_gather_kernel<T, int32_t>(
out_grad, axis, index, *value_grad, dev_ctx); out_grad, axis, index, *value_grad, dev_ctx);
......
...@@ -114,8 +114,9 @@ void TemporalShiftGradKernel(const Context& dev_ctx, ...@@ -114,8 +114,9 @@ void TemporalShiftGradKernel(const Context& dev_ctx,
(data_layout == DataLayout::kNCHW ? phi::make_ddim({nt, c, h, w}) (data_layout == DataLayout::kNCHW ? phi::make_ddim({nt, c, h, w})
: phi::make_ddim({nt, h, w, c})); : phi::make_ddim({nt, h, w, c}));
const T* output_grad_data = output_grad->data<T>(); const T* output_grad_data = output_grad->data<T>();
T* input_grad_data = input_grad->Resize(in_grad_dims);
input_grad->mutable_data<T>(in_grad_dims, dev_ctx.GetPlace());
T* input_grad_data = dev_ctx.template Alloc<T>(input_grad);
if (data_layout == DataLayout::kNCHW) { if (data_layout == DataLayout::kNCHW) {
TemporalShiftBwNCHW<T>( TemporalShiftBwNCHW<T>(
......
...@@ -114,7 +114,8 @@ void TemporalShiftKernel(const Context& dev_ctx, ...@@ -114,7 +114,8 @@ void TemporalShiftKernel(const Context& dev_ctx,
(data_layout == DataLayout::kNCHW ? phi::make_ddim({nt, c, h, w}) (data_layout == DataLayout::kNCHW ? phi::make_ddim({nt, c, h, w})
: phi::make_ddim({nt, h, w, c})); : phi::make_ddim({nt, h, w, c}));
const T* input_data = input->data<T>(); const T* input_data = input->data<T>();
T* output_data = output->mutable_data<T>(out_dims, dev_ctx.GetPlace()); output->Resize(out_dims);
T* output_data = dev_ctx.template Alloc<T>(output);
if (data_layout == DataLayout::kNCHW) { if (data_layout == DataLayout::kNCHW) {
TemporalShiftFwNCHW<T>( TemporalShiftFwNCHW<T>(
......
...@@ -51,16 +51,19 @@ void YoloBoxKernel(const Context& dev_ctx, ...@@ -51,16 +51,19 @@ void YoloBoxKernel(const Context& dev_ctx,
const int an_stride = (class_num + 5) * stride; const int an_stride = (class_num + 5) * stride;
DenseTensor anchors_; DenseTensor anchors_;
auto anchors_data = anchors_.Resize({an_num * 2});
anchors_.mutable_data<int>({an_num * 2}, dev_ctx.GetPlace()); auto anchors_data = dev_ctx.template Alloc<int>(&anchors_);
std::copy(anchors.begin(), anchors.end(), anchors_data); std::copy(anchors.begin(), anchors.end(), anchors_data);
const T* input_data = input->data<T>(); const T* input_data = input->data<T>();
const int* imgsize_data = imgsize->data<int>(); const int* imgsize_data = imgsize->data<int>();
T* boxes_data = boxes->mutable_data<T>({n, box_num, 4}, dev_ctx.GetPlace()); boxes->Resize({n, box_num, 4});
T* boxes_data = dev_ctx.template Alloc<T>(boxes);
memset(boxes_data, 0, boxes->numel() * sizeof(T)); memset(boxes_data, 0, boxes->numel() * sizeof(T));
T* scores_data =
scores->mutable_data<T>({n, box_num, class_num}, dev_ctx.GetPlace()); scores->Resize({n, box_num, class_num});
T* scores_data = dev_ctx.template Alloc<T>(scores);
memset(scores_data, 0, scores->numel() * sizeof(T)); memset(scores_data, 0, scores->numel() * sizeof(T));
T box[4]; T box[4];
......
...@@ -996,7 +996,7 @@ void ElementwiseCompute(const GPUContext &dev_ctx, ...@@ -996,7 +996,7 @@ void ElementwiseCompute(const GPUContext &dev_ctx,
DenseTensor *z) { DenseTensor *z) {
std::vector<const DenseTensor *> ins = {&x, &y}; std::vector<const DenseTensor *> ins = {&x, &y};
std::vector<DenseTensor *> outs = {z}; std::vector<DenseTensor *> outs = {z};
z->mutable_data<OutType>(dev_ctx.GetPlace()); dev_ctx.template Alloc<OutType>(z);
BroadcastKernel<ElementwiseType::kBinary, T, OutType, Functor, 1>( BroadcastKernel<ElementwiseType::kBinary, T, OutType, Functor, 1>(
dev_ctx, ins, &outs, axis, func); dev_ctx, ins, &outs, axis, func);
} }
......
...@@ -104,7 +104,8 @@ DenseTensor Diagonal(const DeviceContext& context, ...@@ -104,7 +104,8 @@ DenseTensor Diagonal(const DeviceContext& context,
DenseTensor diag; DenseTensor diag;
DDim diag_dims = phi::make_ddim(ret_dims); DDim diag_dims = phi::make_ddim(ret_dims);
auto dig_stride = phi::stride(diag_dims); auto dig_stride = phi::stride(diag_dims);
auto diag_data = diag.mutable_data<T>(diag_dims, context.GetPlace()); diag.Resize(diag_dims);
auto diag_data = context.template Alloc<T>(&diag);
int64_t pos = std::abs(offset) * offset_stride; int64_t pos = std::abs(offset) * offset_stride;
int64_t dim_size = ret_strides.size(); int64_t dim_size = ret_strides.size();
......
...@@ -474,7 +474,7 @@ static inline void GetDoubleGradSafeTensor(const DeviceContext &dev_ctx, ...@@ -474,7 +474,7 @@ static inline void GetDoubleGradSafeTensor(const DeviceContext &dev_ctx,
} else { } else {
auto meta = phi::DenseTensorMeta(x.dtype(), x.dims(), x.layout()); auto meta = phi::DenseTensorMeta(x.dtype(), x.dims(), x.layout());
*ddx_safe = phi::Empty(dev_ctx, std::move(meta)); *ddx_safe = phi::Empty(dev_ctx, std::move(meta));
ddx_safe->mutable_data(dev_ctx.GetPlace()); dev_ctx.template Alloc<T>(ddx_safe);
SetConstant<DeviceContext, T> set_zero; SetConstant<DeviceContext, T> set_zero;
set_zero(dev_ctx, ddx_safe, static_cast<T>(0)); set_zero(dev_ctx, ddx_safe, static_cast<T>(0));
} }
......
...@@ -237,7 +237,8 @@ void CommonElementwiseBroadcastBackward(const CPUContext &ctx, ...@@ -237,7 +237,8 @@ void CommonElementwiseBroadcastBackward(const CPUContext &ctx,
// result. // result.
if (dx && dx->IsSharedBufferWith(dout)) { if (dx && dx->IsSharedBufferWith(dout)) {
dx->clear(); dx->clear();
dx->mutable_data<T>(x_dims, ctx.GetPlace()); dx->Resize(x_dims);
ctx.template Alloc<T>(dx);
} }
VLOG(3) << "CommonElementwiseBroadcastBackward xdims:" VLOG(3) << "CommonElementwiseBroadcastBackward xdims:"
...@@ -1680,7 +1681,8 @@ void CommonElementwiseBroadcastBackward(const GPUContext &ctx, ...@@ -1680,7 +1681,8 @@ void CommonElementwiseBroadcastBackward(const GPUContext &ctx,
// result. // result.
if (dx && dx->IsSharedBufferWith(dout)) { if (dx && dx->IsSharedBufferWith(dout)) {
dx->clear(); dx->clear();
dx->mutable_data<T>(x_dims, ctx.GetPlace()); dx->Resize(x_dims);
ctx.template Alloc<T>(dx);
} }
VLOG(3) << "CommonElementwiseBroadcastBackward xdims:" VLOG(3) << "CommonElementwiseBroadcastBackward xdims:"
......
...@@ -39,8 +39,11 @@ void FCFunctor<DeviceContext, T>::operator()(const DeviceContext& context, ...@@ -39,8 +39,11 @@ void FCFunctor<DeviceContext, T>::operator()(const DeviceContext& context,
const int NN = N + 4; const int NN = N + 4;
const int KK = K + 4; const int KK = K + 4;
phi::DenseTensor X1; phi::DenseTensor X1;
T* X1_data = X1.mutable_data<T>({M * KK}, paddle::platform::CPUPlace()); X1.Resize({M * KK});
Y1_data = Y1.mutable_data<T>({M * (N + 4)}, paddle::platform::CPUPlace()); T* X1_data = context.template HostAlloc<T>(&X1);
Y1.Resize({M * (N + 4)});
Y1_data = context.template HostAlloc<T>(&Y1);
#ifdef PADDLE_WITH_MKLML #ifdef PADDLE_WITH_MKLML
#pragma omp parallel for #pragma omp parallel for
#endif #endif
......
...@@ -319,7 +319,9 @@ void ColwiseSum<phi::GPUContext, double>::operator()( ...@@ -319,7 +319,9 @@ void ColwiseSum<phi::GPUContext, double>::operator()(
size, size,
vector->numel())); vector->numel()));
phi::DenseTensor one; phi::DenseTensor one;
one.mutable_data<double>({in_dims[0]}, context.GetPlace()); one.Resize({in_dims[0]});
context.template Alloc<double>(&one);
SetConstant<phi::GPUContext, double> set; SetConstant<phi::GPUContext, double> set;
set(context, &one, static_cast<double>(1.0)); set(context, &one, static_cast<double>(1.0));
phi::funcs::GetBlas<phi::GPUContext, double>(context).GEMV( phi::funcs::GetBlas<phi::GPUContext, double>(context).GEMV(
...@@ -355,7 +357,9 @@ void RowwiseSum<phi::GPUContext, double>::operator()( ...@@ -355,7 +357,9 @@ void RowwiseSum<phi::GPUContext, double>::operator()(
in_dims[0], in_dims[0],
vector->numel())); vector->numel()));
phi::DenseTensor one; phi::DenseTensor one;
one.mutable_data<double>({size}, context.GetPlace()); one.Resize({size});
context.template Alloc<double>(&one);
SetConstant<phi::GPUContext, double> set; SetConstant<phi::GPUContext, double> set;
set(context, &one, static_cast<double>(1.0)); set(context, &one, static_cast<double>(1.0));
phi::funcs::GetBlas<phi::GPUContext, double>(context).GEMV( phi::funcs::GetBlas<phi::GPUContext, double>(context).GEMV(
......
...@@ -117,7 +117,7 @@ class ColwiseSum<phi::CPUContext, T> { ...@@ -117,7 +117,7 @@ class ColwiseSum<phi::CPUContext, T> {
size, size,
out->numel())); out->numel()));
T* out_buf = out->mutable_data<T>(out->place()); T* out_buf = context.template Alloc<T>(out);
const T* in_buf = input.data<T>(); const T* in_buf = input.data<T>();
for (size_t i = 0; i < static_cast<size_t>(height); ++i) { for (size_t i = 0; i < static_cast<size_t>(height); ++i) {
...@@ -185,7 +185,7 @@ class RowwiseMean<phi::CPUContext, T> { ...@@ -185,7 +185,7 @@ class RowwiseMean<phi::CPUContext, T> {
height, height,
out->numel())); out->numel()));
auto inv_size = 1.0 / size; auto inv_size = 1.0 / size;
T* out_buf = out->mutable_data<T>(out->place()); T* out_buf = context.template Alloc<T>(out);
const T* in_buf = input.data<T>(); const T* in_buf = input.data<T>();
for (size_t i = 0; i < static_cast<size_t>(height); ++i) { for (size_t i = 0; i < static_cast<size_t>(height); ++i) {
...@@ -251,7 +251,7 @@ class RowwiseSum<phi::CPUContext, T> { ...@@ -251,7 +251,7 @@ class RowwiseSum<phi::CPUContext, T> {
height, height,
out->numel())); out->numel()));
T* out_buf = out->mutable_data<T>(out->place()); T* out_buf = context.template Alloc<T>(out);
const T* in_buf = input.data<T>(); const T* in_buf = input.data<T>();
for (size_t i = 0; i < static_cast<size_t>(height); ++i) { for (size_t i = 0; i < static_cast<size_t>(height); ++i) {
......
...@@ -451,7 +451,7 @@ void SelectKernel(const KPDevice &dev_ctx, ...@@ -451,7 +451,7 @@ void SelectKernel(const KPDevice &dev_ctx,
out_dim.push_back(static_cast<int64_t>(rank)); out_dim.push_back(static_cast<int64_t>(rank));
out->Resize(phi::make_ddim(out_dim)); out->Resize(phi::make_ddim(out_dim));
} }
auto out_data = out->mutable_data<OutT>(cuda_place); auto out_data = dev_ctx.template Alloc<OutT>(out);
// 3.2 get true data's index according to cond_data and cumsum_data // 3.2 get true data's index according to cond_data and cumsum_data
if (total_true_num <= 0) return; if (total_true_num <= 0) return;
SelectKernel<MT, InT, CT, OutT, Functor, kVecSize, SelectData> SelectKernel<MT, InT, CT, OutT, Functor, kVecSize, SelectData>
......
...@@ -542,11 +542,10 @@ struct MergeAddImpl { ...@@ -542,11 +542,10 @@ struct MergeAddImpl {
} }
out.set_height(input_height); out.set_height(input_height);
out.mutable_value()->mutable_data<T>( DenseTensor* out_tensor = out.mutable_value();
phi::make_ddim( out_tensor->Resize(phi::make_ddim(
{static_cast<int64_t>(merged_row_set.size()), input_width}), {static_cast<int64_t>(merged_row_set.size()), input_width}));
context.GetPlace()); auto* out_data = context.template Alloc<T>(out_tensor);
auto* out_data = out.mutable_value()->data<T>();
if (merged_row_set.size() == row_num && !sorted_result) { if (merged_row_set.size() == row_num && !sorted_result) {
// no duplicated ids, just concat the result together // no duplicated ids, just concat the result together
...@@ -659,9 +658,10 @@ struct MergeAdd<phi::XPUContext, T> { ...@@ -659,9 +658,10 @@ struct MergeAdd<phi::XPUContext, T> {
out.set_rows(merge_rows); out.set_rows(merge_rows);
out.set_height(input.height()); out.set_height(input.height());
out.mutable_value()->mutable_data<T>( DenseTensor* out_tensor = out.mutable_value();
phi::make_ddim({static_cast<int64_t>(merge_rows.size()), input_width}), out_tensor->Resize(
context.GetPlace()); phi::make_ddim({static_cast<int64_t>(merge_rows.size()), input_width}));
context.template Alloc<T>(out_tensor);
std::unordered_map<int64_t, size_t> rows_to_id; std::unordered_map<int64_t, size_t> rows_to_id;
for (size_t i = 0; i < merge_rows.size(); ++i) { for (size_t i = 0; i < merge_rows.size(); ++i) {
...@@ -748,12 +748,13 @@ struct MergeAdd<phi::XPUContext, T> { ...@@ -748,12 +748,13 @@ struct MergeAdd<phi::XPUContext, T> {
out.set_rows(merge_rows); out.set_rows(merge_rows);
out.set_height(input_height); out.set_height(input_height);
out.mutable_value()->mutable_data<T>(
phi::make_ddim(
{static_cast<int64_t>(merged_row_set.size()), input_width}),
context.GetPlace());
float* y_data = reinterpret_cast<float*>(out.mutable_value()->data<T>()); DenseTensor* out_tensor = out.mutable_value();
out_tensor->Resize(phi::make_ddim(
{static_cast<int64_t>(merged_row_set.size()), input_width}));
context.template Alloc<T>(out_tensor);
float* y_data = reinterpret_cast<float*>(out_tensor->data<T>());
std::unordered_map<int64_t, size_t> rows_to_id; std::unordered_map<int64_t, size_t> rows_to_id;
for (size_t i = 0; i < merge_rows.size(); ++i) { for (size_t i = 0; i < merge_rows.size(); ++i) {
...@@ -856,11 +857,11 @@ struct MergeAverage<phi::CPUContext, T> { ...@@ -856,11 +857,11 @@ struct MergeAverage<phi::CPUContext, T> {
} }
out.set_height(input_height); out.set_height(input_height);
out.mutable_value()->mutable_data<T>(
phi::make_ddim( DenseTensor* out_tensor = out.mutable_value();
{static_cast<int64_t>(merged_row_set.size()), input_width}), out_tensor->Resize(phi::make_ddim(
context.GetPlace()); {static_cast<int64_t>(merged_row_set.size()), input_width}));
auto* out_data = out.mutable_value()->data<T>(); auto* out_data = context.template Alloc<T>(out_tensor);
std::vector<int64_t> merge_rows(merged_row_set.begin(), std::vector<int64_t> merge_rows(merged_row_set.begin(),
merged_row_set.end()); merged_row_set.end());
......
...@@ -392,9 +392,10 @@ struct MergeAddImpl { ...@@ -392,9 +392,10 @@ struct MergeAddImpl {
out.set_rows(merge_rows); out.set_rows(merge_rows);
out.set_height(input.height()); out.set_height(input.height());
out.mutable_value()->mutable_data<T>( DenseTensor* out_tensor = out.mutable_value();
phi::make_ddim({static_cast<int64_t>(merge_rows.size()), input_width}), out_tensor->Resize(
context.GetPlace()); phi::make_ddim({static_cast<int64_t>(merge_rows.size()), input_width}));
context.template Alloc<T>(out_tensor);
phi::funcs::SetConstant<DeviceContext, T> constant_functor; phi::funcs::SetConstant<DeviceContext, T> constant_functor;
constant_functor(context, out.mutable_value(), static_cast<T>(0)); constant_functor(context, out.mutable_value(), static_cast<T>(0));
...@@ -462,9 +463,11 @@ struct MergeAddImpl { ...@@ -462,9 +463,11 @@ struct MergeAddImpl {
out.set_rows(merge_rows); out.set_rows(merge_rows);
out.set_height(input_height); out.set_height(input_height);
out.mutable_value()->mutable_data<T>(
phi::make_ddim({static_cast<int64_t>(merge_rows.size()), input_width}), DenseTensor* out_tensor = out.mutable_value();
context.GetPlace()); out_tensor->Resize(
phi::make_ddim({static_cast<int64_t>(merge_rows.size()), input_width}));
context.template Alloc<T>(out_tensor);
phi::funcs::SetConstant<DeviceContext, T> constant_functor; phi::funcs::SetConstant<DeviceContext, T> constant_functor;
constant_functor(context, out.mutable_value(), static_cast<T>(0)); constant_functor(context, out.mutable_value(), static_cast<T>(0));
......
...@@ -941,8 +941,7 @@ bool SortTopk(const phi::GPUContext& ctx, ...@@ -941,8 +941,7 @@ bool SortTopk(const phi::GPUContext& ctx,
const std::vector<int64_t> dims = {num_rows, num_cols}; const std::vector<int64_t> dims = {num_rows, num_cols};
auto dim = phi::make_ddim(dims); auto dim = phi::make_ddim(dims);
input_indices.Resize(dim); input_indices.Resize(dim);
// input_indices.Resize(num_rows*num_cols); ctx.template Alloc<int64_t>(&input_indices);
input_indices.mutable_data<int64_t>(ctx.GetPlace());
size_t temp_storage_bytes = -1; size_t temp_storage_bytes = -1;
auto ComputeBlockSize = [](int col) { auto ComputeBlockSize = [](int col) {
...@@ -984,7 +983,7 @@ bool SortTopk(const phi::GPUContext& ctx, ...@@ -984,7 +983,7 @@ bool SortTopk(const phi::GPUContext& ctx,
const T* input = input_tensor->data<T>(); const T* input = input_tensor->data<T>();
T* values = out_tensor->data<T>(); T* values = out_tensor->data<T>();
int64_t* indices = indices_tensor->mutable_data<int64_t>(ctx.GetPlace()); int64_t* indices = ctx.template Alloc<int64_t>(indices_tensor);
if (k == num_cols) { if (k == num_cols) {
// Doing a full sort. // Doing a full sort.
...@@ -993,8 +992,8 @@ bool SortTopk(const phi::GPUContext& ctx, ...@@ -993,8 +992,8 @@ bool SortTopk(const phi::GPUContext& ctx,
} else { } else {
temp_values.Resize(dim); temp_values.Resize(dim);
temp_indices.Resize(dim); temp_indices.Resize(dim);
sorted_values_ptr = temp_values.mutable_data<T>(ctx.GetPlace()); sorted_values_ptr = ctx.template Alloc<T>(&temp_values);
sorted_indices_ptr = temp_indices.mutable_data<int64_t>(ctx.GetPlace()); sorted_indices_ptr = ctx.template Alloc<int64_t>(&temp_indices);
} }
// Get temp storage buffer size, maybe can allocate a fixed buffer to save // Get temp storage buffer size, maybe can allocate a fixed buffer to save
...@@ -1067,7 +1066,7 @@ bool SortTopk(const phi::GPUContext& ctx, ...@@ -1067,7 +1066,7 @@ bool SortTopk(const phi::GPUContext& ctx,
#endif #endif
} }
Tensor temp_storage; Tensor temp_storage;
temp_storage.mutable_data<uint8_t>(ctx.GetPlace(), temp_storage_bytes); ctx.template Alloc<uint8_t>(&temp_storage, temp_storage_bytes);
if (largest) { if (largest) {
auto err = cub::DeviceSegmentedRadixSort::SortPairsDescending( auto err = cub::DeviceSegmentedRadixSort::SortPairsDescending(
......
...@@ -43,7 +43,7 @@ void ConcatKernel(const Context& dev_ctx, ...@@ -43,7 +43,7 @@ void ConcatKernel(const Context& dev_ctx,
phi::DDim out_dims = phi::funcs::ComputeAndCheckShape(true, x_dims, axis); phi::DDim out_dims = phi::funcs::ComputeAndCheckShape(true, x_dims, axis);
out->Resize(out_dims); out->Resize(out_dims);
out->mutable_data<T>(dev_ctx.GetPlace()); dev_ctx.template Alloc<T>(out);
// If axis is 0, the lod of the output is not the same as inputs. // If axis is 0, the lod of the output is not the same as inputs.
if (axis == 0 && x[0]->lod().size() > 0) { if (axis == 0 && x[0]->lod().size() > 0) {
......
...@@ -1231,7 +1231,7 @@ class DepthwiseConvFunctor<phi::GPUContext, T, fuse_relu_before_conv> { ...@@ -1231,7 +1231,7 @@ class DepthwiseConvFunctor<phi::GPUContext, T, fuse_relu_before_conv> {
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
const T* filter_data = filter.data<T>(); const T* filter_data = filter.data<T>();
T* output_data = output->mutable_data<T>(context.GetPlace()); T* output_data = context.template Alloc<T>(output);
phi::DenseTensor filter_hwc; phi::DenseTensor filter_hwc;
if (data_layout == DataLayout::kNHWC) { if (data_layout == DataLayout::kNHWC) {
...@@ -1240,7 +1240,7 @@ class DepthwiseConvFunctor<phi::GPUContext, T, fuse_relu_before_conv> { ...@@ -1240,7 +1240,7 @@ class DepthwiseConvFunctor<phi::GPUContext, T, fuse_relu_before_conv> {
filter.dims()[0], filter.dims()[0],
filter.dims()[1]}); filter.dims()[1]});
filter_hwc.Resize(filter_hwc_dims); filter_hwc.Resize(filter_hwc_dims);
filter_hwc.mutable_data<T>(context.GetPlace()); context.template Alloc<T>(&filter_hwc);
std::vector<int> perm_axis({2, 3, 0, 1}); std::vector<int> perm_axis({2, 3, 0, 1});
phi::funcs::TransposeNormal<phi::GPUContext, T> trans; phi::funcs::TransposeNormal<phi::GPUContext, T> trans;
trans(context, filter, &filter_hwc, perm_axis); trans(context, filter, &filter_hwc, perm_axis);
...@@ -1409,7 +1409,7 @@ class DepthwiseConvInputGradFunctor<phi::GPUContext, T, fuse_relu_before_conv> { ...@@ -1409,7 +1409,7 @@ class DepthwiseConvInputGradFunctor<phi::GPUContext, T, fuse_relu_before_conv> {
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
const T* filter_data = filter.data<T>(); const T* filter_data = filter.data<T>();
const T* output_grad_data = output_grad.data<T>(); const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace()); T* input_grad_data = context.template Alloc<T>(input_grad);
phi::DenseTensor filter_hwc; phi::DenseTensor filter_hwc;
if (data_layout == DataLayout::kNHWC) { if (data_layout == DataLayout::kNHWC) {
...@@ -1418,7 +1418,7 @@ class DepthwiseConvInputGradFunctor<phi::GPUContext, T, fuse_relu_before_conv> { ...@@ -1418,7 +1418,7 @@ class DepthwiseConvInputGradFunctor<phi::GPUContext, T, fuse_relu_before_conv> {
filter.dims()[0], filter.dims()[0],
filter.dims()[1]}); filter.dims()[1]});
filter_hwc.Resize(filter_hwc_dims); filter_hwc.Resize(filter_hwc_dims);
filter_hwc.mutable_data<T>(context.GetPlace()); context.template Alloc<T>(&filter_hwc);
std::vector<int> perm_axis({2, 3, 0, 1}); std::vector<int> perm_axis({2, 3, 0, 1});
phi::funcs::TransposeNormal<phi::GPUContext, T> trans; phi::funcs::TransposeNormal<phi::GPUContext, T> trans;
trans(context, filter, &filter_hwc, perm_axis); trans(context, filter, &filter_hwc, perm_axis);
...@@ -1584,7 +1584,7 @@ class DepthwiseConvFilterGradFunctor<phi::GPUContext, ...@@ -1584,7 +1584,7 @@ class DepthwiseConvFilterGradFunctor<phi::GPUContext,
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
const T* output_grad_data = output_grad.data<T>(); const T* output_grad_data = output_grad.data<T>();
T* filter_grad_data = filter_grad->mutable_data<T>(context.GetPlace()); T* filter_grad_data = context.template Alloc<T>(filter_grad);
int block_size = 512; int block_size = 512;
int blocks; int blocks;
...@@ -1654,7 +1654,7 @@ class DepthwiseConvFilterGradFunctor<phi::GPUContext, ...@@ -1654,7 +1654,7 @@ class DepthwiseConvFilterGradFunctor<phi::GPUContext,
filter_grad->dims()[0], \ filter_grad->dims()[0], \
filter_grad->dims()[1]}); \ filter_grad->dims()[1]}); \
filter_grad_hwc.Resize(filter_grad_hwc_dims); \ filter_grad_hwc.Resize(filter_grad_hwc_dims); \
filter_grad_hwc.mutable_data<T>(context.GetPlace()); \ context.template Alloc<T>(&filter_grad_hwc); \
phi::funcs::SetConstant<phi::GPUContext, T> set_zero; \ phi::funcs::SetConstant<phi::GPUContext, T> set_zero; \
set_zero(context, &filter_grad_hwc, static_cast<T>(0)); \ set_zero(context, &filter_grad_hwc, static_cast<T>(0)); \
filter_grad_data = filter_grad_hwc.data<T>(); \ filter_grad_data = filter_grad_hwc.data<T>(); \
......
...@@ -75,7 +75,7 @@ void DepthwiseConvGradKernel(const Context& dev_ctx, ...@@ -75,7 +75,7 @@ void DepthwiseConvGradKernel(const Context& dev_ctx,
phi::funcs::SetConstant<Context, T> set_zero; phi::funcs::SetConstant<Context, T> set_zero;
if (input_grad) { if (input_grad) {
input_grad->mutable_data<T>(dev_ctx.GetPlace()); dev_ctx.template Alloc<T>(input_grad);
set_zero(dev_ctx, input_grad, static_cast<T>(0)); set_zero(dev_ctx, input_grad, static_cast<T>(0));
if (fuse_relu) { if (fuse_relu) {
...@@ -106,7 +106,7 @@ void DepthwiseConvGradKernel(const Context& dev_ctx, ...@@ -106,7 +106,7 @@ void DepthwiseConvGradKernel(const Context& dev_ctx,
} }
if (filter_grad) { if (filter_grad) {
filter_grad->mutable_data<T>(dev_ctx.GetPlace()); dev_ctx.template Alloc<T>(filter_grad);
set_zero(dev_ctx, filter_grad, static_cast<T>(0)); set_zero(dev_ctx, filter_grad, static_cast<T>(0));
if (fuse_relu) { if (fuse_relu) {
paddle::operators::math::DepthwiseConvFilterGradFunctor<Context, T, true> paddle::operators::math::DepthwiseConvFilterGradFunctor<Context, T, true>
......
...@@ -32,7 +32,7 @@ void DepthwiseConvKernel(const Context& dev_ctx, ...@@ -32,7 +32,7 @@ void DepthwiseConvKernel(const Context& dev_ctx,
const std::string& data_format, const std::string& data_format,
DenseTensor* out) { DenseTensor* out) {
DenseTensor* output = out; DenseTensor* output = out;
output->mutable_data<T>(dev_ctx.GetPlace()); dev_ctx.template Alloc<T>(output);
const std::vector<int> strides = strides_t; const std::vector<int> strides = strides_t;
std::vector<int> dilations = dilations_t; std::vector<int> dilations = dilations_t;
......
...@@ -29,7 +29,7 @@ void DropoutGradRawKernel(const Context& dev_ctx, ...@@ -29,7 +29,7 @@ void DropoutGradRawKernel(const Context& dev_ctx,
const std::string& mode, const std::string& mode,
DenseTensor* x_grad) { DenseTensor* x_grad) {
bool upscale_in_train = (mode == "upscale_in_train"); bool upscale_in_train = (mode == "upscale_in_train");
x_grad->mutable_data<T>(dev_ctx.GetPlace()); dev_ctx.template Alloc<T>(x_grad);
paddle::operators::DropoutGradGPUKernelDriver<T>(dev_ctx, paddle::operators::DropoutGradGPUKernelDriver<T>(dev_ctx,
is_test, is_test,
p.to<float>(), p.to<float>(),
......
...@@ -153,7 +153,7 @@ void DefaultElementwiseAddGrad(const GPUContext &ctx, ...@@ -153,7 +153,7 @@ void DefaultElementwiseAddGrad(const GPUContext &ctx,
// dx // dx
if (dx != nullptr) { if (dx != nullptr) {
auto *dx_data = dx->mutable_data<T>(ctx.GetPlace()); auto *dx_data = ctx.template Alloc<T>(dx);
if (dx->dims() == dout.dims()) { if (dx->dims() == dout.dims()) {
if (dx_data != dout_data) { if (dx_data != dout_data) {
phi::Copy(ctx, dout, ctx.GetPlace(), false, dx); phi::Copy(ctx, dout, ctx.GetPlace(), false, dx);
...@@ -163,7 +163,8 @@ void DefaultElementwiseAddGrad(const GPUContext &ctx, ...@@ -163,7 +163,8 @@ void DefaultElementwiseAddGrad(const GPUContext &ctx,
// the result of dy wrong. // the result of dy wrong.
if (dx->IsSharedBufferWith(dout)) { if (dx->IsSharedBufferWith(dout)) {
dx->clear(); dx->clear();
dx->mutable_data<T>(x.dims(), ctx.GetPlace()); dx->Resize(x.dims());
ctx.template Alloc<T>(dx);
} }
std::vector<int> reduce_dims = std::vector<int> reduce_dims =
funcs::GetReduceDim(x.dims(), out.dims(), axis); funcs::GetReduceDim(x.dims(), out.dims(), axis);
...@@ -173,7 +174,7 @@ void DefaultElementwiseAddGrad(const GPUContext &ctx, ...@@ -173,7 +174,7 @@ void DefaultElementwiseAddGrad(const GPUContext &ctx,
} }
// dy // dy
if (dy != nullptr) { if (dy != nullptr) {
auto *dy_data = dy->mutable_data<T>(ctx.GetPlace()); auto *dy_data = ctx.template Alloc<T>(dy);
if (dy->dims() == dout.dims()) { if (dy->dims() == dout.dims()) {
if (dy_data != dout_data) { if (dy_data != dout_data) {
phi::Copy(ctx, dout, ctx.GetPlace(), false, dy); phi::Copy(ctx, dout, ctx.GetPlace(), false, dy);
...@@ -217,12 +218,11 @@ void ElementwiseAddGrad(const GPUContext &ctx, ...@@ -217,12 +218,11 @@ void ElementwiseAddGrad(const GPUContext &ctx,
PREDEFINED_BLOCK_SIZE, PREDEFINED_BLOCK_SIZE,
1); 1);
SimpleElemwiseAddGradCUDAKernel<T> SimpleElemwiseAddGradCUDAKernel<T>
<<<grid_size, block_size, 0, ctx.stream()>>>( <<<grid_size, block_size, 0, ctx.stream()>>>(dout.data<T>(),
dout.data<T>(), size,
size, vec_size,
vec_size, ctx.template Alloc<T>(dx),
dx->mutable_data<T>(ctx.GetPlace()), ctx.template Alloc<T>(dy));
dy->mutable_data<T>(ctx.GetPlace()));
} else { } else {
VLOG(4) << "Special case when dy_data is the same as dout_data, " VLOG(4) << "Special case when dy_data is the same as dout_data, "
"and dx_data is the same as dout_data, do not need " "and dx_data is the same as dout_data, do not need "
...@@ -264,7 +264,7 @@ void default_elementwise_sub_grad(const GPUContext &ctx, ...@@ -264,7 +264,7 @@ void default_elementwise_sub_grad(const GPUContext &ctx,
auto *dout_data = dout.data<T>(); auto *dout_data = dout.data<T>();
// dx // dx
if (dx != nullptr) { if (dx != nullptr) {
auto *dx_data = dx->mutable_data<T>(ctx.GetPlace()); auto *dx_data = ctx.template Alloc<T>(dx);
if (dx->dims() == dout.dims()) { if (dx->dims() == dout.dims()) {
if (dx_data != dout_data) { if (dx_data != dout_data) {
phi::Copy(ctx, dout, ctx.GetPlace(), false, dx); phi::Copy(ctx, dout, ctx.GetPlace(), false, dx);
...@@ -274,7 +274,8 @@ void default_elementwise_sub_grad(const GPUContext &ctx, ...@@ -274,7 +274,8 @@ void default_elementwise_sub_grad(const GPUContext &ctx,
// the result of dy wrong. // the result of dy wrong.
if (dx->IsSharedBufferWith(dout)) { if (dx->IsSharedBufferWith(dout)) {
dx->clear(); dx->clear();
dx->mutable_data<T>(x.dims(), ctx.GetPlace()); dx->Resize(x.dims());
ctx.template Alloc<T>(dx);
} }
std::vector<int> reduce_dims = std::vector<int> reduce_dims =
funcs::GetReduceDim(x.dims(), out.dims(), axis); funcs::GetReduceDim(x.dims(), out.dims(), axis);
...@@ -284,7 +285,7 @@ void default_elementwise_sub_grad(const GPUContext &ctx, ...@@ -284,7 +285,7 @@ void default_elementwise_sub_grad(const GPUContext &ctx,
} }
// dy // dy
if (dy != nullptr) { if (dy != nullptr) {
auto *dy_data = dy->mutable_data<T>(ctx.GetPlace()); auto *dy_data = ctx.template Alloc<T>(dy);
if (dy->dims() == dout.dims()) { if (dy->dims() == dout.dims()) {
if (dy_data != dout_data) { if (dy_data != dout_data) {
dim3 block_size = dim3(PREDEFINED_BLOCK_SIZE, 1); dim3 block_size = dim3(PREDEFINED_BLOCK_SIZE, 1);
...@@ -293,10 +294,7 @@ void default_elementwise_sub_grad(const GPUContext &ctx, ...@@ -293,10 +294,7 @@ void default_elementwise_sub_grad(const GPUContext &ctx,
dim3((size + PREDEFINED_BLOCK_SIZE - 1) / PREDEFINED_BLOCK_SIZE, 1); dim3((size + PREDEFINED_BLOCK_SIZE - 1) / PREDEFINED_BLOCK_SIZE, 1);
SimpleElemwiseSubGradCUDAKernel<T> SimpleElemwiseSubGradCUDAKernel<T>
<<<grid_size, block_size, 0, ctx.stream()>>>( <<<grid_size, block_size, 0, ctx.stream()>>>(
dout.data<T>(), dout.data<T>(), size, nullptr, ctx.template Alloc<T>(dy));
size,
nullptr,
dy->mutable_data<T>(ctx.GetPlace()));
} }
} else { } else {
std::vector<int> reduce_dims = std::vector<int> reduce_dims =
...@@ -320,11 +318,10 @@ void elementwise_sub_grad(const GPUContext &ctx, ...@@ -320,11 +318,10 @@ void elementwise_sub_grad(const GPUContext &ctx,
dim3 grid_size = dim3 grid_size =
dim3((size + PREDEFINED_BLOCK_SIZE - 1) / PREDEFINED_BLOCK_SIZE, 1); dim3((size + PREDEFINED_BLOCK_SIZE - 1) / PREDEFINED_BLOCK_SIZE, 1);
SimpleElemwiseSubGradCUDAKernel<T> SimpleElemwiseSubGradCUDAKernel<T>
<<<grid_size, block_size, 0, ctx.stream()>>>( <<<grid_size, block_size, 0, ctx.stream()>>>(dout.data<T>(),
dout.data<T>(), size,
size, ctx.template Alloc<T>(dx),
dx->mutable_data<T>(ctx.GetPlace()), ctx.template Alloc<T>(dy));
dy->mutable_data<T>(ctx.GetPlace()));
} }
/* /*
****************************** ******************************
......
...@@ -314,9 +314,9 @@ void GraphReindexKernel(const Context& dev_ctx, ...@@ -314,9 +314,9 @@ void GraphReindexKernel(const Context& dev_ctx,
const auto* ph_index = hashtable_index.get_ptr(); const auto* ph_index = hashtable_index.get_ptr();
hashtable_index_out.ShareDataWith(*ph_index); hashtable_index_out.ShareDataWith(*ph_index);
int* hashtable_value_data = int* hashtable_value_data =
hashtable_value_out.mutable_data<int>(dev_ctx.GetPlace()); dev_ctx.template Alloc<int>(&hashtable_value_out);
int* hashtable_index_data = int* hashtable_index_data =
hashtable_index_out.mutable_data<int>(dev_ctx.GetPlace()); dev_ctx.template Alloc<int>(&hashtable_index_out);
BufferReindex<T, Context>(dev_ctx, BufferReindex<T, Context>(dev_ctx,
x_data, x_data,
src_outputs, src_outputs,
......
...@@ -85,7 +85,7 @@ void HistogramKernel(const Context& dev_ctx, ...@@ -85,7 +85,7 @@ void HistogramKernel(const Context& dev_ctx,
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
const int input_numel = input.numel(); const int input_numel = input.numel();
int64_t* out_data = output->mutable_data<int64_t>(dev_ctx.GetPlace()); int64_t* out_data = dev_ctx.template Alloc<int64_t>(output);
phi::funcs::SetConstant<Context, int64_t>()( phi::funcs::SetConstant<Context, int64_t>()(
dev_ctx, output, static_cast<int64_t>(0)); dev_ctx, output, static_cast<int64_t>(0));
...@@ -98,8 +98,10 @@ void HistogramKernel(const Context& dev_ctx, ...@@ -98,8 +98,10 @@ void HistogramKernel(const Context& dev_ctx,
auto input_x = phi::EigenVector<T>::Flatten(input); auto input_x = phi::EigenVector<T>::Flatten(input);
DenseTensor input_min_t, input_max_t; DenseTensor input_min_t, input_max_t;
auto* input_min_data = input_min_t.mutable_data<T>({1}, dev_ctx.GetPlace()); input_min_t.Resize({1});
auto* input_max_data = input_max_t.mutable_data<T>({1}, dev_ctx.GetPlace()); input_max_t.Resize({1});
auto* input_min_data = dev_ctx.template Alloc<T>(&input_min_t);
auto* input_max_data = dev_ctx.template Alloc<T>(&input_max_t);
auto input_min_scala = phi::EigenScalar<T>::From(input_min_t); auto input_min_scala = phi::EigenScalar<T>::From(input_min_t);
auto input_max_scala = phi::EigenScalar<T>::From(input_max_t); auto input_max_scala = phi::EigenScalar<T>::From(input_max_t);
......
...@@ -67,7 +67,7 @@ bool SortKthvalue(const phi::GPUContext& dev_ctx, ...@@ -67,7 +67,7 @@ bool SortKthvalue(const phi::GPUContext& dev_ctx,
DenseTensor temp_values, temp_indices; DenseTensor temp_values, temp_indices;
const T* input = input_tensor->data<T>(); const T* input = input_tensor->data<T>();
T* values = out_tensor->data<T>(); T* values = out_tensor->data<T>();
int64_t* indices = indices_tensor->mutable_data<int64_t>(dev_ctx.GetPlace()); int64_t* indices = dev_ctx.template Alloc<int64_t>(indices_tensor);
temp_values.Resize(dim); temp_values.Resize(dim);
temp_indices.Resize(dim); temp_indices.Resize(dim);
sorted_values_ptr = dev_ctx.template Alloc<T>(&temp_values); sorted_values_ptr = dev_ctx.template Alloc<T>(&temp_values);
...@@ -208,13 +208,16 @@ void KthvalueKernel(const Context& dev_ctx, ...@@ -208,13 +208,16 @@ void KthvalueKernel(const Context& dev_ctx,
} }
trans_out_dims[in_dims.size() - 1] = 1; trans_out_dims[in_dims.size() - 1] = 1;
DenseTensor trans_input; DenseTensor trans_input;
trans_input.mutable_data<T>(trans_dims, dev_ctx.GetPlace()); trans_input.Resize(trans_dims);
dev_ctx.template Alloc<T>(&trans_input);
int ndims = trans.size(); int ndims = trans.size();
funcs::TransCompute<phi::GPUContext, T>( funcs::TransCompute<phi::GPUContext, T>(
ndims, dev_ctx, x, &trans_input, trans); ndims, dev_ctx, x, &trans_input, trans);
DenseTensor trans_ind, trans_out; DenseTensor trans_ind, trans_out;
trans_ind.mutable_data<int64_t>(trans_out_dims, dev_ctx.GetPlace()); trans_ind.Resize(trans_out_dims);
trans_out.mutable_data<T>(trans_out_dims, dev_ctx.GetPlace()); trans_out.Resize(trans_out_dims);
dev_ctx.template Alloc<int64_t>(&trans_ind);
dev_ctx.template Alloc<T>(&trans_out);
const int64_t input_height = const int64_t input_height =
phi::product(phi::slice_ddim(trans_dims, 0, trans_dims.size() - 1)); phi::product(phi::slice_ddim(trans_dims, 0, trans_dims.size() - 1));
const int64_t input_width = trans_dims[trans_dims.size() - 1]; const int64_t input_width = trans_dims[trans_dims.size() - 1];
......
...@@ -50,7 +50,7 @@ void PutAlongAxisGradKernel(const Context& dev_ctx, ...@@ -50,7 +50,7 @@ void PutAlongAxisGradKernel(const Context& dev_ctx,
} }
if (value_grad) { if (value_grad) {
value_grad->Resize(index.dims()); value_grad->Resize(index.dims());
value_grad->mutable_data<T>(dev_ctx.GetPlace()); dev_ctx.template Alloc<T>(value_grad);
if (index_type == DataType::INT32) { if (index_type == DataType::INT32) {
paddle::operators::gpu_gather_kernel<T, int32_t>( paddle::operators::gpu_gather_kernel<T, int32_t>(
out_grad, out_grad,
......
...@@ -58,7 +58,7 @@ class RNNDescriptors { ...@@ -58,7 +58,7 @@ class RNNDescriptors {
template <typename T> template <typename T>
void Create(const gpuDnnHandle_t &handle, void Create(const gpuDnnHandle_t &handle,
const Place &place, const DeviceContext &dev_ctx,
const std::vector<int> &sequence_length, const std::vector<int> &sequence_length,
size_t *workspace_size, size_t *workspace_size,
size_t *reserve_size, size_t *reserve_size,
...@@ -103,17 +103,15 @@ class RNNDescriptors { ...@@ -103,17 +103,15 @@ class RNNDescriptors {
#ifdef PADDLE_WITH_HIP #ifdef PADDLE_WITH_HIP
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::miopenDropoutGetStatesSize(handle, &state_size)); phi::dynload::miopenDropoutGetStatesSize(handle, &state_size));
dropout_state->mutable_data<uint8_t>({static_cast<int64_t>(state_size)},
place);
#else #else
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::cudnnDropoutGetStatesSize(handle, &state_size)); phi::dynload::cudnnDropoutGetStatesSize(handle, &state_size));
dropout_state->mutable_data<uint8_t>({static_cast<int64_t>(state_size)},
place);
#endif #endif
dropout_state->Resize({static_cast<int64_t>(state_size)});
dev_ctx.template Alloc<uint8_t>(dropout_state);
} }
dropout_desc_.descriptor(handle, dropout_desc_.descriptor(handle,
place, dev_ctx.GetPlace(),
is_initialized, is_initialized,
dropout_prob_, dropout_prob_,
is_test_ ? nullptr : dropout_state, is_test_ ? nullptr : dropout_state,
......
...@@ -248,7 +248,7 @@ void RnnGradKernel(const Context &dev_ctx, ...@@ -248,7 +248,7 @@ void RnnGradKernel(const Context &dev_ctx,
is_test); is_test);
rnn.Create<T>(handle, rnn.Create<T>(handle,
dev_ctx.GetPlace(), dev_ctx,
SequenceLength, SequenceLength,
&workspace_size, &workspace_size,
&reserve_size, &reserve_size,
......
...@@ -280,7 +280,7 @@ void RnnKernel(const Context &dev_ctx, ...@@ -280,7 +280,7 @@ void RnnKernel(const Context &dev_ctx,
is_bidirec, is_bidirec,
is_test); is_test);
rnn.Create<T>(handle, rnn.Create<T>(handle,
dev_ctx.GetPlace(), dev_ctx,
SequenceLength, SequenceLength,
&workspace_size, &workspace_size,
&reserve_size, &reserve_size,
......
...@@ -82,9 +82,8 @@ void SGDDenseKernel(const Context& dev_ctx, ...@@ -82,9 +82,8 @@ void SGDDenseKernel(const Context& dev_ctx,
const MPDType* master_in_data = const MPDType* master_in_data =
multi_precision ? master_param->data<MPDType>() : nullptr; multi_precision ? master_param->data<MPDType>() : nullptr;
MPDType* master_out_data = MPDType* master_out_data =
multi_precision multi_precision ? dev_ctx.template Alloc<MPDType>(master_param_out)
? master_param_out->mutable_data<MPDType>(dev_ctx.GetPlace()) : nullptr;
: nullptr;
int block = 512; int block = 512;
int grid = (param.numel() + block - 1) / block; int grid = (param.numel() + block - 1) / block;
...@@ -94,7 +93,7 @@ void SGDDenseKernel(const Context& dev_ctx, ...@@ -94,7 +93,7 @@ void SGDDenseKernel(const Context& dev_ctx,
grad.data<T>(), grad.data<T>(),
learning_rate.data<T>(), learning_rate.data<T>(),
param.numel(), param.numel(),
param_out->mutable_data<T>(dev_ctx.GetPlace()), dev_ctx.template Alloc<T>(param_out),
master_in_data, master_in_data,
master_out_data); master_out_data);
} }
...@@ -119,9 +118,8 @@ void SGDDenseParamSparseGradKernel( ...@@ -119,9 +118,8 @@ void SGDDenseParamSparseGradKernel(
const MPDType* master_in_data = const MPDType* master_in_data =
multi_precision ? master_param->data<MPDType>() : nullptr; multi_precision ? master_param->data<MPDType>() : nullptr;
MPDType* master_out_data = MPDType* master_out_data =
multi_precision multi_precision ? dev_ctx.template Alloc<MPDType>(master_param_out)
? master_param_out->mutable_data<MPDType>(dev_ctx.GetPlace()) : nullptr;
: nullptr;
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
&param, &param,
......
...@@ -120,8 +120,8 @@ void TemporalShiftGradKernel(const Context& dev_ctx, ...@@ -120,8 +120,8 @@ void TemporalShiftGradKernel(const Context& dev_ctx,
(data_layout == DataLayout::kNCHW ? phi::make_ddim({nt, c, h, w}) (data_layout == DataLayout::kNCHW ? phi::make_ddim({nt, c, h, w})
: phi::make_ddim({nt, h, w, c})); : phi::make_ddim({nt, h, w, c}));
const T* output_grad_data = output_grad->data<T>(); const T* output_grad_data = output_grad->data<T>();
T* input_grad_data = input_grad->Resize(in_grad_dims);
input_grad->mutable_data<T>(in_grad_dims, dev_ctx.GetPlace()); T* input_grad_data = dev_ctx.template Alloc<T>(input_grad);
int pixelNum = nt * chw; int pixelNum = nt * chw;
int threads = 1024; int threads = 1024;
......
...@@ -120,7 +120,8 @@ void TemporalShiftKernel(const Context& dev_ctx, ...@@ -120,7 +120,8 @@ void TemporalShiftKernel(const Context& dev_ctx,
(data_layout == DataLayout::kNCHW ? phi::make_ddim({nt, c, h, w}) (data_layout == DataLayout::kNCHW ? phi::make_ddim({nt, c, h, w})
: phi::make_ddim({nt, h, w, c})); : phi::make_ddim({nt, h, w, c}));
const T* input_data = input->data<T>(); const T* input_data = input->data<T>();
T* output_data = output->mutable_data<T>(out_dims, dev_ctx.GetPlace()); output->Resize(out_dims);
T* output_data = dev_ctx.template Alloc<T>(output);
int pixelNum = nt * chw; int pixelNum = nt * chw;
int threads = 1024; int threads = 1024;
......
...@@ -139,9 +139,10 @@ void YoloBoxKernel(const Context& dev_ctx, ...@@ -139,9 +139,10 @@ void YoloBoxKernel(const Context& dev_ctx,
const T* input_data = input->data<T>(); const T* input_data = input->data<T>();
const int* imgsize_data = img_size.data<int>(); const int* imgsize_data = img_size.data<int>();
T* boxes_data = boxes->mutable_data<T>({n, box_num, 4}, dev_ctx.GetPlace()); boxes->Resize({n, box_num, 4});
T* scores_data = T* boxes_data = dev_ctx.template Alloc<T>(boxes);
scores->mutable_data<T>({n, box_num, class_num}, dev_ctx.GetPlace()); scores->Resize({n, box_num, class_num});
T* scores_data = dev_ctx.template Alloc<T>(scores);
phi::funcs::SetConstant<phi::GPUContext, T> set_zero; phi::funcs::SetConstant<phi::GPUContext, T> set_zero;
set_zero(dev_ctx, boxes, static_cast<T>(0)); set_zero(dev_ctx, boxes, static_cast<T>(0));
set_zero(dev_ctx, scores, static_cast<T>(0)); set_zero(dev_ctx, scores, static_cast<T>(0));
......
...@@ -42,7 +42,7 @@ void DigammaGradKernel(const Context& ctx, ...@@ -42,7 +42,7 @@ void DigammaGradKernel(const Context& ctx,
const DenseTensor& x, const DenseTensor& x,
const DenseTensor& out_grad, const DenseTensor& out_grad,
DenseTensor* x_grad) { DenseTensor* x_grad) {
x_grad->mutable_data<T>(ctx.GetPlace()); ctx.template Alloc<T>(x_grad);
auto* dout_data = out_grad.data<T>(); auto* dout_data = out_grad.data<T>();
auto* x_data = x.data<T>(); auto* x_data = x.data<T>();
......
...@@ -38,7 +38,7 @@ struct DigammaFunctor { ...@@ -38,7 +38,7 @@ struct DigammaFunctor {
template <typename T, typename Context> template <typename T, typename Context>
void DigammaKernel(const Context& ctx, const DenseTensor& x, DenseTensor* out) { void DigammaKernel(const Context& ctx, const DenseTensor& x, DenseTensor* out) {
out->mutable_data<T>(ctx.GetPlace()); ctx.template Alloc<T>(out);
auto* x_data = x.data<T>(); auto* x_data = x.data<T>();
auto* out_data = out->data<T>(); auto* out_data = out->data<T>();
auto numel = x.numel(); auto numel = x.numel();
......
...@@ -68,7 +68,7 @@ void AddDoubleGradImpl(const Context& dev_ctx, ...@@ -68,7 +68,7 @@ void AddDoubleGradImpl(const Context& dev_ctx,
funcs::GetDoubleGradSafeTensor<Context, T>( funcs::GetDoubleGradSafeTensor<Context, T>(
dev_ctx, y, ddy.get_ptr(), &ddy_safe); dev_ctx, y, ddy.get_ptr(), &ddy_safe);
ddout->mutable_data<T>(dev_ctx.GetPlace()); dev_ctx.template Alloc<T>(ddout);
auto ddx_dims = ddx_safe.dims(); auto ddx_dims = ddx_safe.dims();
auto ddy_dims = ddy_safe.dims(); auto ddy_dims = ddy_safe.dims();
if (ddx_dims.size() >= ddy_dims.size()) { if (ddx_dims.size() >= ddy_dims.size()) {
...@@ -102,7 +102,7 @@ void SubtractDoubleGradImpl(const Context& dev_ctx, ...@@ -102,7 +102,7 @@ void SubtractDoubleGradImpl(const Context& dev_ctx,
funcs::GetDoubleGradSafeTensor<Context, T>( funcs::GetDoubleGradSafeTensor<Context, T>(
dev_ctx, y, ddy.get_ptr(), &ddy_safe); dev_ctx, y, ddy.get_ptr(), &ddy_safe);
ddout->mutable_data<T>(dev_ctx.GetPlace()); dev_ctx.template Alloc<T>(ddout);
funcs::ElementwiseCompute<funcs::SubtractFunctor<T>, T>( funcs::ElementwiseCompute<funcs::SubtractFunctor<T>, T>(
dev_ctx, ddx_safe, ddy_safe, axis, funcs::SubtractFunctor<T>(), ddout); dev_ctx, ddx_safe, ddy_safe, axis, funcs::SubtractFunctor<T>(), ddout);
} }
......
...@@ -25,32 +25,6 @@ namespace phi { ...@@ -25,32 +25,6 @@ namespace phi {
static constexpr size_t WAIT_THRESHOLD = 64 * 1024; static constexpr size_t WAIT_THRESHOLD = 64 * 1024;
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
template <>
void MemcpyH2DKernel(const GPUContext& dev_ctx,
const DenseTensor& x,
int dst_place_type,
DenseTensor* out) {
PADDLE_ENFORCE_GE(
dst_place_type,
0,
errors::OutOfRange("dst_place_type only support 0-3, but got: %d",
dst_place_type));
PADDLE_ENFORCE_LE(
dst_place_type,
3,
errors::OutOfRange("dst_place_type only support 0-3, but got: %d",
dst_place_type));
auto stream = dev_ctx.stream();
out->mutable_data(dev_ctx.GetPlace(),
x.dtype(),
phi::Stream(reinterpret_cast<phi::StreamId>(stream)));
Copy(dev_ctx, x, dev_ctx.GetPlace(), false, out);
}
#endif
template <typename Context> template <typename Context>
void MemcpyH2DKernel(const Context& dev_ctx, void MemcpyH2DKernel(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
...@@ -77,10 +51,6 @@ void MemcpyD2HKernel(const Context& dev_ctx, ...@@ -77,10 +51,6 @@ void MemcpyD2HKernel(const Context& dev_ctx,
DenseTensor* out) { DenseTensor* out) {
switch (dst_place_type) { switch (dst_place_type) {
case 0: case 0:
// NOTE(lvyongkang): phi::Copy will use DeviceContext.zero_allocator to
// alloc and assign DeviceContext.place to out, which causes place check
// fails. So we specify out's place here.
out->mutable_data(CPUPlace());
Copy(dev_ctx, x, CPUPlace(), false, out); Copy(dev_ctx, x, CPUPlace(), false, out);
// NOTE(copy from Aurelius84): host <-> device memory copies of a memory // NOTE(copy from Aurelius84): host <-> device memory copies of a memory
// block of 64 KB or less are asynchronous. See // block of 64 KB or less are asynchronous. See
...@@ -91,10 +61,6 @@ void MemcpyD2HKernel(const Context& dev_ctx, ...@@ -91,10 +61,6 @@ void MemcpyD2HKernel(const Context& dev_ctx,
break; break;
case 1: case 1:
// NOTE(lvyongkang): phi::Copy will use DeviceContext.zero_allocator to
// alloc and assign DeviceContext.place to out, which causes place check
// fails. So we specify out's place here.
out->mutable_data(GPUPinnedPlace());
Copy(dev_ctx, x, GPUPinnedPlace(), false, out); Copy(dev_ctx, x, GPUPinnedPlace(), false, out);
// paddle::memory::Copy use async copy for GPUPinnedPlace // paddle::memory::Copy use async copy for GPUPinnedPlace
dev_ctx.Wait(); dev_ctx.Wait();
......
...@@ -74,7 +74,7 @@ void AddGradKernel(const Context& dev_ctx, ...@@ -74,7 +74,7 @@ void AddGradKernel(const Context& dev_ctx,
} }
if (dy != nullptr) { if (dy != nullptr) {
T* dy_data = dy->mutable_data<T>(dev_ctx.GetPlace()); T* dy_data = dev_ctx.template Alloc<T>(dy);
if (dy->dims() == dz_dims) { if (dy->dims() == dz_dims) {
if (dy_data != dz_data) { if (dy_data != dz_data) {
int ret = xpu::copy(dev_ctx.x_context(), int ret = xpu::copy(dev_ctx.x_context(),
......
...@@ -45,7 +45,7 @@ void TensorSetConstantXPU(phi::DenseTensor* tensor, ...@@ -45,7 +45,7 @@ void TensorSetConstantXPU(phi::DenseTensor* tensor,
template <typename T, typename Context, typename VType> template <typename T, typename Context, typename VType>
void FullValueXPU(const Context& dev_ctx, DenseTensor* tensor, VType val) { void FullValueXPU(const Context& dev_ctx, DenseTensor* tensor, VType val) {
tensor->mutable_data<T>(dev_ctx.GetPlace()); dev_ctx.template Alloc<T>(tensor);
PD_VISIT_ALL_TYPES(tensor->dtype(), "FullValueXPU", ([&] { PD_VISIT_ALL_TYPES(tensor->dtype(), "FullValueXPU", ([&] {
TensorSetConstantXPU<VType, data_t>( TensorSetConstantXPU<VType, data_t>(
......
...@@ -342,17 +342,17 @@ if [ "${PHI_INCLUDE_FLUID_FILES}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then ...@@ -342,17 +342,17 @@ if [ "${PHI_INCLUDE_FLUID_FILES}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then
check_approval 1 chenwhql YuanRisheng zyfncg check_approval 1 chenwhql YuanRisheng zyfncg
fi fi
HAS_MODIFIED_PHI_KERNEL_FILES=`git diff --name-only upstream/$BRANCH | grep "paddle/phi/kernels" || true` HAS_MODIFIED_PHI_OR_FLUID_FILES=`git diff --name-only upstream/$BRANCH | grep -E "paddle/phi|paddle/fluid" || true`
PHI_USE_MUTABLE_DATA_FILES="" USE_MUTABLE_DATA_FILES=""
for CHANGE_FILE in ${HAS_MODIFIED_PHI_KERNEL_FILES}; do for CHANGE_FILE in ${HAS_MODIFIED_PHI_OR_FLUID_FILES}; do
PHI_DIR_ADDED_LINES=`git diff -U0 upstream/$BRANCH -- ${PADDLE_ROOT}/${CHANGE_FILE} | grep "^+" | grep -w "mutable_data" || true` ADDED_LINES=`git diff -U0 upstream/$BRANCH -- ${PADDLE_ROOT}/${CHANGE_FILE} | grep "^+" | grep -w "mutable_data" || true`
if [ "${PHI_DIR_ADDED_LINES}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then if [ "${ADDED_LINES}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then
PHI_USE_MUTABLE_DATA_FILES="${PHI_USE_MUTABLE_DATA_FILES} ${CHANGE_FILE}" USE_MUTABLE_DATA_FILES="${USE_MUTABLE_DATA_FILES} ${CHANGE_FILE}"
fi fi
done done
if [ "${PHI_USE_MUTABLE_DATA_FILES}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then if [ "${USE_MUTABLE_DATA_FILES}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then
echo_line="You can not use the DenseTensor::mutable_data() method in paddle/phi/kernels files(${PHI_USE_MUTABLE_DATA_FILES}). If you want to alloc memory, use phi::DeviceContext::Alloc() or phi::DeviceContext::HostAlloc() instead and if you want to get mutable data, use DenseTensor::data(). If you have any questions, you can have one RD (chenwhql, Shixiaowei02, YuanRisheng or zyfncg) review and approve.\n" echo_line="You can not use the DenseTensor::mutable_data() method in files(${USE_MUTABLE_DATA_FILES}). If you want to alloc memory, use phi::DeviceContext::Alloc() or phi::DeviceContext::HostAlloc() instead and if you want to get mutable data, use DenseTensor::data(). If you have any questions, you can have one RD (chenwhql, Shixiaowei02, YuanRisheng, zyfncg or From00) review and approve.\n"
check_approval 1 chenwhql Shixiaowei02 YuanRisheng zyfncg check_approval 1 chenwhql Shixiaowei02 YuanRisheng zyfncg From00
fi fi
ALL_CHANGE_FILES=`git diff --numstat upstream/$BRANCH | awk '{print $3}' | grep ".py"` ALL_CHANGE_FILES=`git diff --numstat upstream/$BRANCH | awk '{print $3}' | grep ".py"`
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册