未验证 提交 34a957e3 编写于 作者: R Ruibiao Chen 提交者: GitHub

Replace mutable_data with DeviceContext.Alloc in phi kernels (#48500)

* Replace mutable_data with DeviceContext.Alloc in phi kernels

* Fix CI errors

* Fix CI errors

* Fix CI errors, test=kunlun

* Fix CI errors, test=kunlun

* Handle rnn_functor

* Update approvals
上级 d1e2ba8a
......@@ -50,7 +50,7 @@ math_library(tree2col DEPS math_function)
cc_test(
selected_rows_functor_test
SRCS selected_rows_functor_test.cc
DEPS selected_rows_functor)
DEPS allocator selected_rows_functor)
cc_test(
im2col_test
SRCS im2col_test.cc
......
......@@ -15,11 +15,15 @@ limitations under the License. */
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
#include "gtest/gtest.h"
#include "paddle/fluid/memory/allocation/allocator_facade.h"
#include "paddle/phi/kernels/funcs/math_function.h"
TEST(selected_rows_functor, cpu_add) {
paddle::platform::CPUPlace cpu_place;
phi::CPUContext ctx(cpu_place);
ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(cpu_place)
.get());
phi::funcs::SetConstant<phi::CPUContext, float> functor;
int64_t height = 10;
int64_t row_numel = 10;
......@@ -109,6 +113,9 @@ TEST(selected_rows_functor, cpu_add) {
TEST(selected_rows_functor, cpu_add_to) {
paddle::platform::CPUPlace cpu_place;
phi::CPUContext ctx(cpu_place);
ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(cpu_place)
.get());
phi::funcs::SetConstant<phi::CPUContext, float> functor;
int64_t height = 10;
int64_t row_numel = 10;
......@@ -198,6 +205,9 @@ TEST(selected_rows_functor, cpu_add_to) {
TEST(selected_rows_functor, cpu_merge_average_float) {
paddle::platform::CPUPlace cpu_place;
phi::CPUContext ctx(cpu_place);
ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(cpu_place)
.get());
phi::funcs::SetConstant<phi::CPUContext, float> functor;
int64_t height = 10;
int64_t row_numel = 10;
......@@ -233,6 +243,9 @@ TEST(selected_rows_functor, cpu_merge_average_float) {
TEST(selected_rows_functor, cpu_merge_add_float) {
paddle::platform::CPUPlace cpu_place;
phi::CPUContext ctx(cpu_place);
ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(cpu_place)
.get());
phi::funcs::SetConstant<phi::CPUContext, float> functor;
int64_t height = 10;
int64_t row_numel = 10;
......@@ -269,6 +282,9 @@ TEST(selected_rows_functor, cpu_merge_add_float) {
TEST(selected_rows_functor, cpu_merge_add_int) {
paddle::platform::CPUPlace cpu_place;
phi::CPUContext ctx(cpu_place);
ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(cpu_place)
.get());
phi::funcs::SetConstant<phi::CPUContext, int> functor;
int64_t height = 10;
int64_t row_numel = 10;
......@@ -305,6 +321,9 @@ TEST(selected_rows_functor, cpu_merge_add_int) {
TEST(selected_rows_functor, cpu_merge_add_multi) {
paddle::platform::CPUPlace cpu_place;
phi::CPUContext ctx(cpu_place);
ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(cpu_place)
.get());
phi::funcs::SetConstant<phi::CPUContext, float> set_const;
int64_t height = 10;
......@@ -354,6 +373,9 @@ TEST(selected_rows_functor, cpu_merge_add_multi) {
TEST(selected_rows_functor, cpu_merge_add_multi_noduplicated) {
paddle::platform::CPUPlace cpu_place;
phi::CPUContext ctx(cpu_place);
ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(cpu_place)
.get());
phi::funcs::SetConstant<phi::CPUContext, float> set_const;
int64_t height = 10;
......@@ -409,6 +431,9 @@ TEST(selected_rows_functor, cpu_merge_add_multi_noduplicated) {
TEST(selected_rows_functor, cpu_sum_to) {
paddle::platform::CPUPlace cpu_place;
phi::CPUContext ctx(cpu_place);
ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(cpu_place)
.get());
phi::funcs::SetConstant<phi::CPUContext, float> functor;
int64_t height = 10;
int64_t row_numel = 10;
......
......@@ -163,7 +163,7 @@ std::unique_ptr<DeviceContext> CreateDeviceContext(
if (!disable_setting_default_stream_for_allocator) {
instance.SetDefaultStream(CUDAPlace(p.GetDeviceId()), cuda_ctx->stream());
}
dev_ctx->SetAllocator(instance.GetAllocator(p).get());
dev_ctx->SetAllocator(instance.GetAllocator(p, cuda_ctx->stream()).get());
dev_ctx->SetPinnedAllocator(
instance.GetAllocator(paddle::platform::CUDAPinnedPlace()).get());
......
......@@ -148,7 +148,7 @@ struct DeviceContext::Impl {
if (tensor->initialized() && tensor->place() != place) {
ClearHolder(tensor);
}
auto* allocator = tensor->numel() == 0
auto* allocator = tensor->numel() == 0 && requested_size == 0
? zero_allocator_
: (pinned ? pinned_allocator_ : device_allocator_);
#ifdef PADDLE_WITH_CUDA
......
......@@ -44,7 +44,7 @@ void ConcatKernel(const Context& dev_ctx,
phi::DDim out_dims = phi::funcs::ComputeAndCheckShape(true, x_dims, axis);
out->Resize(out_dims);
out->mutable_data<T>(dev_ctx.GetPlace());
dev_ctx.template Alloc<T>(out);
// If axis is 0, the lod of the output is not the same as inputs.
if (axis == 0 && x[0]->lod().size() > 0) {
......
......@@ -90,13 +90,11 @@ ElementwiseAddGrad(const CPUContext& ctx,
int axis = -1) {
auto blas = phi::funcs::GetBlas<CPUContext, T>(ctx);
if (dx) {
blas.VCOPY(
dout.numel(), dout.data<T>(), dx->mutable_data<T>(ctx.GetPlace()));
blas.VCOPY(dout.numel(), dout.data<T>(), ctx.template Alloc<T>(dx));
}
if (dy) {
blas.VCOPY(
dout.numel(), dout.data<T>(), dy->mutable_data<T>(ctx.GetPlace()));
blas.VCOPY(dout.numel(), dout.data<T>(), ctx.template Alloc<T>(dy));
}
}
......
......@@ -34,7 +34,7 @@ void HistogramKernel(const Context& dev_ctx,
const T* input_data = input.data<T>();
auto input_numel = input.numel();
int64_t* out_data = output->mutable_data<int64_t>(dev_ctx.GetPlace());
int64_t* out_data = dev_ctx.template Alloc<int64_t>(output);
phi::funcs::SetConstant<Context, int64_t>()(
dev_ctx, output, static_cast<int64_t>(0));
......
......@@ -27,7 +27,8 @@ void MaskedSelectGradKernel(const Context& dev_ctx,
DenseTensor* x_grad) {
auto* mask_data = mask.data<bool>();
auto* input_data = out_grad.data<T>();
auto* out_data = x_grad->mutable_data<T>(dev_ctx.GetPlace());
auto* out_data = dev_ctx.template Alloc<T>(x_grad);
int mask_size = mask.numel();
int index = 0;
......
......@@ -48,7 +48,8 @@ void MaskedSelectKernel(const Context& dev_ctx,
DDim out_dim{out_size};
out->Resize(out_dim);
auto out_data = out->mutable_data<T>(phi::CPUPlace());
auto out_data = dev_ctx.template HostAlloc<T>(out);
int index = 0;
for (int i = 0; i < mask_size; i++) {
......
......@@ -58,7 +58,7 @@ void PutAlongAxisGradKernel(const Context& dev_ctx,
if (value_grad) {
value_grad->Resize(index.dims());
value_grad->mutable_data<T>(dev_ctx.GetPlace());
dev_ctx.template Alloc<T>(value_grad);
if (index_type == DataType::INT32) {
paddle::operators::cpu_gather_kernel<T, int32_t>(
out_grad, axis, index, *value_grad, dev_ctx);
......
......@@ -114,8 +114,9 @@ void TemporalShiftGradKernel(const Context& dev_ctx,
(data_layout == DataLayout::kNCHW ? phi::make_ddim({nt, c, h, w})
: phi::make_ddim({nt, h, w, c}));
const T* output_grad_data = output_grad->data<T>();
T* input_grad_data =
input_grad->mutable_data<T>(in_grad_dims, dev_ctx.GetPlace());
input_grad->Resize(in_grad_dims);
T* input_grad_data = dev_ctx.template Alloc<T>(input_grad);
if (data_layout == DataLayout::kNCHW) {
TemporalShiftBwNCHW<T>(
......
......@@ -114,7 +114,8 @@ void TemporalShiftKernel(const Context& dev_ctx,
(data_layout == DataLayout::kNCHW ? phi::make_ddim({nt, c, h, w})
: phi::make_ddim({nt, h, w, c}));
const T* input_data = input->data<T>();
T* output_data = output->mutable_data<T>(out_dims, dev_ctx.GetPlace());
output->Resize(out_dims);
T* output_data = dev_ctx.template Alloc<T>(output);
if (data_layout == DataLayout::kNCHW) {
TemporalShiftFwNCHW<T>(
......
......@@ -51,16 +51,19 @@ void YoloBoxKernel(const Context& dev_ctx,
const int an_stride = (class_num + 5) * stride;
DenseTensor anchors_;
auto anchors_data =
anchors_.mutable_data<int>({an_num * 2}, dev_ctx.GetPlace());
anchors_.Resize({an_num * 2});
auto anchors_data = dev_ctx.template Alloc<int>(&anchors_);
std::copy(anchors.begin(), anchors.end(), anchors_data);
const T* input_data = input->data<T>();
const int* imgsize_data = imgsize->data<int>();
T* boxes_data = boxes->mutable_data<T>({n, box_num, 4}, dev_ctx.GetPlace());
boxes->Resize({n, box_num, 4});
T* boxes_data = dev_ctx.template Alloc<T>(boxes);
memset(boxes_data, 0, boxes->numel() * sizeof(T));
T* scores_data =
scores->mutable_data<T>({n, box_num, class_num}, dev_ctx.GetPlace());
scores->Resize({n, box_num, class_num});
T* scores_data = dev_ctx.template Alloc<T>(scores);
memset(scores_data, 0, scores->numel() * sizeof(T));
T box[4];
......
......@@ -996,7 +996,7 @@ void ElementwiseCompute(const GPUContext &dev_ctx,
DenseTensor *z) {
std::vector<const DenseTensor *> ins = {&x, &y};
std::vector<DenseTensor *> outs = {z};
z->mutable_data<OutType>(dev_ctx.GetPlace());
dev_ctx.template Alloc<OutType>(z);
BroadcastKernel<ElementwiseType::kBinary, T, OutType, Functor, 1>(
dev_ctx, ins, &outs, axis, func);
}
......
......@@ -104,7 +104,8 @@ DenseTensor Diagonal(const DeviceContext& context,
DenseTensor diag;
DDim diag_dims = phi::make_ddim(ret_dims);
auto dig_stride = phi::stride(diag_dims);
auto diag_data = diag.mutable_data<T>(diag_dims, context.GetPlace());
diag.Resize(diag_dims);
auto diag_data = context.template Alloc<T>(&diag);
int64_t pos = std::abs(offset) * offset_stride;
int64_t dim_size = ret_strides.size();
......
......@@ -474,7 +474,7 @@ static inline void GetDoubleGradSafeTensor(const DeviceContext &dev_ctx,
} else {
auto meta = phi::DenseTensorMeta(x.dtype(), x.dims(), x.layout());
*ddx_safe = phi::Empty(dev_ctx, std::move(meta));
ddx_safe->mutable_data(dev_ctx.GetPlace());
dev_ctx.template Alloc<T>(ddx_safe);
SetConstant<DeviceContext, T> set_zero;
set_zero(dev_ctx, ddx_safe, static_cast<T>(0));
}
......
......@@ -237,7 +237,8 @@ void CommonElementwiseBroadcastBackward(const CPUContext &ctx,
// result.
if (dx && dx->IsSharedBufferWith(dout)) {
dx->clear();
dx->mutable_data<T>(x_dims, ctx.GetPlace());
dx->Resize(x_dims);
ctx.template Alloc<T>(dx);
}
VLOG(3) << "CommonElementwiseBroadcastBackward xdims:"
......@@ -1680,7 +1681,8 @@ void CommonElementwiseBroadcastBackward(const GPUContext &ctx,
// result.
if (dx && dx->IsSharedBufferWith(dout)) {
dx->clear();
dx->mutable_data<T>(x_dims, ctx.GetPlace());
dx->Resize(x_dims);
ctx.template Alloc<T>(dx);
}
VLOG(3) << "CommonElementwiseBroadcastBackward xdims:"
......
......@@ -39,8 +39,11 @@ void FCFunctor<DeviceContext, T>::operator()(const DeviceContext& context,
const int NN = N + 4;
const int KK = K + 4;
phi::DenseTensor X1;
T* X1_data = X1.mutable_data<T>({M * KK}, paddle::platform::CPUPlace());
Y1_data = Y1.mutable_data<T>({M * (N + 4)}, paddle::platform::CPUPlace());
X1.Resize({M * KK});
T* X1_data = context.template HostAlloc<T>(&X1);
Y1.Resize({M * (N + 4)});
Y1_data = context.template HostAlloc<T>(&Y1);
#ifdef PADDLE_WITH_MKLML
#pragma omp parallel for
#endif
......
......@@ -319,7 +319,9 @@ void ColwiseSum<phi::GPUContext, double>::operator()(
size,
vector->numel()));
phi::DenseTensor one;
one.mutable_data<double>({in_dims[0]}, context.GetPlace());
one.Resize({in_dims[0]});
context.template Alloc<double>(&one);
SetConstant<phi::GPUContext, double> set;
set(context, &one, static_cast<double>(1.0));
phi::funcs::GetBlas<phi::GPUContext, double>(context).GEMV(
......@@ -355,7 +357,9 @@ void RowwiseSum<phi::GPUContext, double>::operator()(
in_dims[0],
vector->numel()));
phi::DenseTensor one;
one.mutable_data<double>({size}, context.GetPlace());
one.Resize({size});
context.template Alloc<double>(&one);
SetConstant<phi::GPUContext, double> set;
set(context, &one, static_cast<double>(1.0));
phi::funcs::GetBlas<phi::GPUContext, double>(context).GEMV(
......
......@@ -117,7 +117,7 @@ class ColwiseSum<phi::CPUContext, T> {
size,
out->numel()));
T* out_buf = out->mutable_data<T>(out->place());
T* out_buf = context.template Alloc<T>(out);
const T* in_buf = input.data<T>();
for (size_t i = 0; i < static_cast<size_t>(height); ++i) {
......@@ -185,7 +185,7 @@ class RowwiseMean<phi::CPUContext, T> {
height,
out->numel()));
auto inv_size = 1.0 / size;
T* out_buf = out->mutable_data<T>(out->place());
T* out_buf = context.template Alloc<T>(out);
const T* in_buf = input.data<T>();
for (size_t i = 0; i < static_cast<size_t>(height); ++i) {
......@@ -251,7 +251,7 @@ class RowwiseSum<phi::CPUContext, T> {
height,
out->numel()));
T* out_buf = out->mutable_data<T>(out->place());
T* out_buf = context.template Alloc<T>(out);
const T* in_buf = input.data<T>();
for (size_t i = 0; i < static_cast<size_t>(height); ++i) {
......
......@@ -451,7 +451,7 @@ void SelectKernel(const KPDevice &dev_ctx,
out_dim.push_back(static_cast<int64_t>(rank));
out->Resize(phi::make_ddim(out_dim));
}
auto out_data = out->mutable_data<OutT>(cuda_place);
auto out_data = dev_ctx.template Alloc<OutT>(out);
// 3.2 get true data's index according to cond_data and cumsum_data
if (total_true_num <= 0) return;
SelectKernel<MT, InT, CT, OutT, Functor, kVecSize, SelectData>
......
......@@ -542,11 +542,10 @@ struct MergeAddImpl {
}
out.set_height(input_height);
out.mutable_value()->mutable_data<T>(
phi::make_ddim(
{static_cast<int64_t>(merged_row_set.size()), input_width}),
context.GetPlace());
auto* out_data = out.mutable_value()->data<T>();
DenseTensor* out_tensor = out.mutable_value();
out_tensor->Resize(phi::make_ddim(
{static_cast<int64_t>(merged_row_set.size()), input_width}));
auto* out_data = context.template Alloc<T>(out_tensor);
if (merged_row_set.size() == row_num && !sorted_result) {
// no duplicated ids, just concat the result together
......@@ -659,9 +658,10 @@ struct MergeAdd<phi::XPUContext, T> {
out.set_rows(merge_rows);
out.set_height(input.height());
out.mutable_value()->mutable_data<T>(
phi::make_ddim({static_cast<int64_t>(merge_rows.size()), input_width}),
context.GetPlace());
DenseTensor* out_tensor = out.mutable_value();
out_tensor->Resize(
phi::make_ddim({static_cast<int64_t>(merge_rows.size()), input_width}));
context.template Alloc<T>(out_tensor);
std::unordered_map<int64_t, size_t> rows_to_id;
for (size_t i = 0; i < merge_rows.size(); ++i) {
......@@ -748,12 +748,13 @@ struct MergeAdd<phi::XPUContext, T> {
out.set_rows(merge_rows);
out.set_height(input_height);
out.mutable_value()->mutable_data<T>(
phi::make_ddim(
{static_cast<int64_t>(merged_row_set.size()), input_width}),
context.GetPlace());
float* y_data = reinterpret_cast<float*>(out.mutable_value()->data<T>());
DenseTensor* out_tensor = out.mutable_value();
out_tensor->Resize(phi::make_ddim(
{static_cast<int64_t>(merged_row_set.size()), input_width}));
context.template Alloc<T>(out_tensor);
float* y_data = reinterpret_cast<float*>(out_tensor->data<T>());
std::unordered_map<int64_t, size_t> rows_to_id;
for (size_t i = 0; i < merge_rows.size(); ++i) {
......@@ -856,11 +857,11 @@ struct MergeAverage<phi::CPUContext, T> {
}
out.set_height(input_height);
out.mutable_value()->mutable_data<T>(
phi::make_ddim(
{static_cast<int64_t>(merged_row_set.size()), input_width}),
context.GetPlace());
auto* out_data = out.mutable_value()->data<T>();
DenseTensor* out_tensor = out.mutable_value();
out_tensor->Resize(phi::make_ddim(
{static_cast<int64_t>(merged_row_set.size()), input_width}));
auto* out_data = context.template Alloc<T>(out_tensor);
std::vector<int64_t> merge_rows(merged_row_set.begin(),
merged_row_set.end());
......
......@@ -392,9 +392,10 @@ struct MergeAddImpl {
out.set_rows(merge_rows);
out.set_height(input.height());
out.mutable_value()->mutable_data<T>(
phi::make_ddim({static_cast<int64_t>(merge_rows.size()), input_width}),
context.GetPlace());
DenseTensor* out_tensor = out.mutable_value();
out_tensor->Resize(
phi::make_ddim({static_cast<int64_t>(merge_rows.size()), input_width}));
context.template Alloc<T>(out_tensor);
phi::funcs::SetConstant<DeviceContext, T> constant_functor;
constant_functor(context, out.mutable_value(), static_cast<T>(0));
......@@ -462,9 +463,11 @@ struct MergeAddImpl {
out.set_rows(merge_rows);
out.set_height(input_height);
out.mutable_value()->mutable_data<T>(
phi::make_ddim({static_cast<int64_t>(merge_rows.size()), input_width}),
context.GetPlace());
DenseTensor* out_tensor = out.mutable_value();
out_tensor->Resize(
phi::make_ddim({static_cast<int64_t>(merge_rows.size()), input_width}));
context.template Alloc<T>(out_tensor);
phi::funcs::SetConstant<DeviceContext, T> constant_functor;
constant_functor(context, out.mutable_value(), static_cast<T>(0));
......
......@@ -941,8 +941,7 @@ bool SortTopk(const phi::GPUContext& ctx,
const std::vector<int64_t> dims = {num_rows, num_cols};
auto dim = phi::make_ddim(dims);
input_indices.Resize(dim);
// input_indices.Resize(num_rows*num_cols);
input_indices.mutable_data<int64_t>(ctx.GetPlace());
ctx.template Alloc<int64_t>(&input_indices);
size_t temp_storage_bytes = -1;
auto ComputeBlockSize = [](int col) {
......@@ -984,7 +983,7 @@ bool SortTopk(const phi::GPUContext& ctx,
const T* input = input_tensor->data<T>();
T* values = out_tensor->data<T>();
int64_t* indices = indices_tensor->mutable_data<int64_t>(ctx.GetPlace());
int64_t* indices = ctx.template Alloc<int64_t>(indices_tensor);
if (k == num_cols) {
// Doing a full sort.
......@@ -993,8 +992,8 @@ bool SortTopk(const phi::GPUContext& ctx,
} else {
temp_values.Resize(dim);
temp_indices.Resize(dim);
sorted_values_ptr = temp_values.mutable_data<T>(ctx.GetPlace());
sorted_indices_ptr = temp_indices.mutable_data<int64_t>(ctx.GetPlace());
sorted_values_ptr = ctx.template Alloc<T>(&temp_values);
sorted_indices_ptr = ctx.template Alloc<int64_t>(&temp_indices);
}
// Get temp storage buffer size, maybe can allocate a fixed buffer to save
......@@ -1067,7 +1066,7 @@ bool SortTopk(const phi::GPUContext& ctx,
#endif
}
Tensor temp_storage;
temp_storage.mutable_data<uint8_t>(ctx.GetPlace(), temp_storage_bytes);
ctx.template Alloc<uint8_t>(&temp_storage, temp_storage_bytes);
if (largest) {
auto err = cub::DeviceSegmentedRadixSort::SortPairsDescending(
......
......@@ -43,7 +43,7 @@ void ConcatKernel(const Context& dev_ctx,
phi::DDim out_dims = phi::funcs::ComputeAndCheckShape(true, x_dims, axis);
out->Resize(out_dims);
out->mutable_data<T>(dev_ctx.GetPlace());
dev_ctx.template Alloc<T>(out);
// If axis is 0, the lod of the output is not the same as inputs.
if (axis == 0 && x[0]->lod().size() > 0) {
......
......@@ -1231,7 +1231,7 @@ class DepthwiseConvFunctor<phi::GPUContext, T, fuse_relu_before_conv> {
const T* input_data = input.data<T>();
const T* filter_data = filter.data<T>();
T* output_data = output->mutable_data<T>(context.GetPlace());
T* output_data = context.template Alloc<T>(output);
phi::DenseTensor filter_hwc;
if (data_layout == DataLayout::kNHWC) {
......@@ -1240,7 +1240,7 @@ class DepthwiseConvFunctor<phi::GPUContext, T, fuse_relu_before_conv> {
filter.dims()[0],
filter.dims()[1]});
filter_hwc.Resize(filter_hwc_dims);
filter_hwc.mutable_data<T>(context.GetPlace());
context.template Alloc<T>(&filter_hwc);
std::vector<int> perm_axis({2, 3, 0, 1});
phi::funcs::TransposeNormal<phi::GPUContext, T> trans;
trans(context, filter, &filter_hwc, perm_axis);
......@@ -1409,7 +1409,7 @@ class DepthwiseConvInputGradFunctor<phi::GPUContext, T, fuse_relu_before_conv> {
const T* input_data = input.data<T>();
const T* filter_data = filter.data<T>();
const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
T* input_grad_data = context.template Alloc<T>(input_grad);
phi::DenseTensor filter_hwc;
if (data_layout == DataLayout::kNHWC) {
......@@ -1418,7 +1418,7 @@ class DepthwiseConvInputGradFunctor<phi::GPUContext, T, fuse_relu_before_conv> {
filter.dims()[0],
filter.dims()[1]});
filter_hwc.Resize(filter_hwc_dims);
filter_hwc.mutable_data<T>(context.GetPlace());
context.template Alloc<T>(&filter_hwc);
std::vector<int> perm_axis({2, 3, 0, 1});
phi::funcs::TransposeNormal<phi::GPUContext, T> trans;
trans(context, filter, &filter_hwc, perm_axis);
......@@ -1584,7 +1584,7 @@ class DepthwiseConvFilterGradFunctor<phi::GPUContext,
const T* input_data = input.data<T>();
const T* output_grad_data = output_grad.data<T>();
T* filter_grad_data = filter_grad->mutable_data<T>(context.GetPlace());
T* filter_grad_data = context.template Alloc<T>(filter_grad);
int block_size = 512;
int blocks;
......@@ -1654,7 +1654,7 @@ class DepthwiseConvFilterGradFunctor<phi::GPUContext,
filter_grad->dims()[0], \
filter_grad->dims()[1]}); \
filter_grad_hwc.Resize(filter_grad_hwc_dims); \
filter_grad_hwc.mutable_data<T>(context.GetPlace()); \
context.template Alloc<T>(&filter_grad_hwc); \
phi::funcs::SetConstant<phi::GPUContext, T> set_zero; \
set_zero(context, &filter_grad_hwc, static_cast<T>(0)); \
filter_grad_data = filter_grad_hwc.data<T>(); \
......
......@@ -75,7 +75,7 @@ void DepthwiseConvGradKernel(const Context& dev_ctx,
phi::funcs::SetConstant<Context, T> set_zero;
if (input_grad) {
input_grad->mutable_data<T>(dev_ctx.GetPlace());
dev_ctx.template Alloc<T>(input_grad);
set_zero(dev_ctx, input_grad, static_cast<T>(0));
if (fuse_relu) {
......@@ -106,7 +106,7 @@ void DepthwiseConvGradKernel(const Context& dev_ctx,
}
if (filter_grad) {
filter_grad->mutable_data<T>(dev_ctx.GetPlace());
dev_ctx.template Alloc<T>(filter_grad);
set_zero(dev_ctx, filter_grad, static_cast<T>(0));
if (fuse_relu) {
paddle::operators::math::DepthwiseConvFilterGradFunctor<Context, T, true>
......
......@@ -32,7 +32,7 @@ void DepthwiseConvKernel(const Context& dev_ctx,
const std::string& data_format,
DenseTensor* out) {
DenseTensor* output = out;
output->mutable_data<T>(dev_ctx.GetPlace());
dev_ctx.template Alloc<T>(output);
const std::vector<int> strides = strides_t;
std::vector<int> dilations = dilations_t;
......
......@@ -29,7 +29,7 @@ void DropoutGradRawKernel(const Context& dev_ctx,
const std::string& mode,
DenseTensor* x_grad) {
bool upscale_in_train = (mode == "upscale_in_train");
x_grad->mutable_data<T>(dev_ctx.GetPlace());
dev_ctx.template Alloc<T>(x_grad);
paddle::operators::DropoutGradGPUKernelDriver<T>(dev_ctx,
is_test,
p.to<float>(),
......
......@@ -153,7 +153,7 @@ void DefaultElementwiseAddGrad(const GPUContext &ctx,
// dx
if (dx != nullptr) {
auto *dx_data = dx->mutable_data<T>(ctx.GetPlace());
auto *dx_data = ctx.template Alloc<T>(dx);
if (dx->dims() == dout.dims()) {
if (dx_data != dout_data) {
phi::Copy(ctx, dout, ctx.GetPlace(), false, dx);
......@@ -163,7 +163,8 @@ void DefaultElementwiseAddGrad(const GPUContext &ctx,
// the result of dy wrong.
if (dx->IsSharedBufferWith(dout)) {
dx->clear();
dx->mutable_data<T>(x.dims(), ctx.GetPlace());
dx->Resize(x.dims());
ctx.template Alloc<T>(dx);
}
std::vector<int> reduce_dims =
funcs::GetReduceDim(x.dims(), out.dims(), axis);
......@@ -173,7 +174,7 @@ void DefaultElementwiseAddGrad(const GPUContext &ctx,
}
// dy
if (dy != nullptr) {
auto *dy_data = dy->mutable_data<T>(ctx.GetPlace());
auto *dy_data = ctx.template Alloc<T>(dy);
if (dy->dims() == dout.dims()) {
if (dy_data != dout_data) {
phi::Copy(ctx, dout, ctx.GetPlace(), false, dy);
......@@ -217,12 +218,11 @@ void ElementwiseAddGrad(const GPUContext &ctx,
PREDEFINED_BLOCK_SIZE,
1);
SimpleElemwiseAddGradCUDAKernel<T>
<<<grid_size, block_size, 0, ctx.stream()>>>(
dout.data<T>(),
size,
vec_size,
dx->mutable_data<T>(ctx.GetPlace()),
dy->mutable_data<T>(ctx.GetPlace()));
<<<grid_size, block_size, 0, ctx.stream()>>>(dout.data<T>(),
size,
vec_size,
ctx.template Alloc<T>(dx),
ctx.template Alloc<T>(dy));
} else {
VLOG(4) << "Special case when dy_data is the same as dout_data, "
"and dx_data is the same as dout_data, do not need "
......@@ -264,7 +264,7 @@ void default_elementwise_sub_grad(const GPUContext &ctx,
auto *dout_data = dout.data<T>();
// dx
if (dx != nullptr) {
auto *dx_data = dx->mutable_data<T>(ctx.GetPlace());
auto *dx_data = ctx.template Alloc<T>(dx);
if (dx->dims() == dout.dims()) {
if (dx_data != dout_data) {
phi::Copy(ctx, dout, ctx.GetPlace(), false, dx);
......@@ -274,7 +274,8 @@ void default_elementwise_sub_grad(const GPUContext &ctx,
// the result of dy wrong.
if (dx->IsSharedBufferWith(dout)) {
dx->clear();
dx->mutable_data<T>(x.dims(), ctx.GetPlace());
dx->Resize(x.dims());
ctx.template Alloc<T>(dx);
}
std::vector<int> reduce_dims =
funcs::GetReduceDim(x.dims(), out.dims(), axis);
......@@ -284,7 +285,7 @@ void default_elementwise_sub_grad(const GPUContext &ctx,
}
// dy
if (dy != nullptr) {
auto *dy_data = dy->mutable_data<T>(ctx.GetPlace());
auto *dy_data = ctx.template Alloc<T>(dy);
if (dy->dims() == dout.dims()) {
if (dy_data != dout_data) {
dim3 block_size = dim3(PREDEFINED_BLOCK_SIZE, 1);
......@@ -293,10 +294,7 @@ void default_elementwise_sub_grad(const GPUContext &ctx,
dim3((size + PREDEFINED_BLOCK_SIZE - 1) / PREDEFINED_BLOCK_SIZE, 1);
SimpleElemwiseSubGradCUDAKernel<T>
<<<grid_size, block_size, 0, ctx.stream()>>>(
dout.data<T>(),
size,
nullptr,
dy->mutable_data<T>(ctx.GetPlace()));
dout.data<T>(), size, nullptr, ctx.template Alloc<T>(dy));
}
} else {
std::vector<int> reduce_dims =
......@@ -320,11 +318,10 @@ void elementwise_sub_grad(const GPUContext &ctx,
dim3 grid_size =
dim3((size + PREDEFINED_BLOCK_SIZE - 1) / PREDEFINED_BLOCK_SIZE, 1);
SimpleElemwiseSubGradCUDAKernel<T>
<<<grid_size, block_size, 0, ctx.stream()>>>(
dout.data<T>(),
size,
dx->mutable_data<T>(ctx.GetPlace()),
dy->mutable_data<T>(ctx.GetPlace()));
<<<grid_size, block_size, 0, ctx.stream()>>>(dout.data<T>(),
size,
ctx.template Alloc<T>(dx),
ctx.template Alloc<T>(dy));
}
/*
******************************
......
......@@ -314,9 +314,9 @@ void GraphReindexKernel(const Context& dev_ctx,
const auto* ph_index = hashtable_index.get_ptr();
hashtable_index_out.ShareDataWith(*ph_index);
int* hashtable_value_data =
hashtable_value_out.mutable_data<int>(dev_ctx.GetPlace());
dev_ctx.template Alloc<int>(&hashtable_value_out);
int* hashtable_index_data =
hashtable_index_out.mutable_data<int>(dev_ctx.GetPlace());
dev_ctx.template Alloc<int>(&hashtable_index_out);
BufferReindex<T, Context>(dev_ctx,
x_data,
src_outputs,
......
......@@ -85,7 +85,7 @@ void HistogramKernel(const Context& dev_ctx,
const T* input_data = input.data<T>();
const int input_numel = input.numel();
int64_t* out_data = output->mutable_data<int64_t>(dev_ctx.GetPlace());
int64_t* out_data = dev_ctx.template Alloc<int64_t>(output);
phi::funcs::SetConstant<Context, int64_t>()(
dev_ctx, output, static_cast<int64_t>(0));
......@@ -98,8 +98,10 @@ void HistogramKernel(const Context& dev_ctx,
auto input_x = phi::EigenVector<T>::Flatten(input);
DenseTensor input_min_t, input_max_t;
auto* input_min_data = input_min_t.mutable_data<T>({1}, dev_ctx.GetPlace());
auto* input_max_data = input_max_t.mutable_data<T>({1}, dev_ctx.GetPlace());
input_min_t.Resize({1});
input_max_t.Resize({1});
auto* input_min_data = dev_ctx.template Alloc<T>(&input_min_t);
auto* input_max_data = dev_ctx.template Alloc<T>(&input_max_t);
auto input_min_scala = phi::EigenScalar<T>::From(input_min_t);
auto input_max_scala = phi::EigenScalar<T>::From(input_max_t);
......
......@@ -67,7 +67,7 @@ bool SortKthvalue(const phi::GPUContext& dev_ctx,
DenseTensor temp_values, temp_indices;
const T* input = input_tensor->data<T>();
T* values = out_tensor->data<T>();
int64_t* indices = indices_tensor->mutable_data<int64_t>(dev_ctx.GetPlace());
int64_t* indices = dev_ctx.template Alloc<int64_t>(indices_tensor);
temp_values.Resize(dim);
temp_indices.Resize(dim);
sorted_values_ptr = dev_ctx.template Alloc<T>(&temp_values);
......@@ -208,13 +208,16 @@ void KthvalueKernel(const Context& dev_ctx,
}
trans_out_dims[in_dims.size() - 1] = 1;
DenseTensor trans_input;
trans_input.mutable_data<T>(trans_dims, dev_ctx.GetPlace());
trans_input.Resize(trans_dims);
dev_ctx.template Alloc<T>(&trans_input);
int ndims = trans.size();
funcs::TransCompute<phi::GPUContext, T>(
ndims, dev_ctx, x, &trans_input, trans);
DenseTensor trans_ind, trans_out;
trans_ind.mutable_data<int64_t>(trans_out_dims, dev_ctx.GetPlace());
trans_out.mutable_data<T>(trans_out_dims, dev_ctx.GetPlace());
trans_ind.Resize(trans_out_dims);
trans_out.Resize(trans_out_dims);
dev_ctx.template Alloc<int64_t>(&trans_ind);
dev_ctx.template Alloc<T>(&trans_out);
const int64_t input_height =
phi::product(phi::slice_ddim(trans_dims, 0, trans_dims.size() - 1));
const int64_t input_width = trans_dims[trans_dims.size() - 1];
......
......@@ -50,7 +50,7 @@ void PutAlongAxisGradKernel(const Context& dev_ctx,
}
if (value_grad) {
value_grad->Resize(index.dims());
value_grad->mutable_data<T>(dev_ctx.GetPlace());
dev_ctx.template Alloc<T>(value_grad);
if (index_type == DataType::INT32) {
paddle::operators::gpu_gather_kernel<T, int32_t>(
out_grad,
......
......@@ -58,7 +58,7 @@ class RNNDescriptors {
template <typename T>
void Create(const gpuDnnHandle_t &handle,
const Place &place,
const DeviceContext &dev_ctx,
const std::vector<int> &sequence_length,
size_t *workspace_size,
size_t *reserve_size,
......@@ -103,17 +103,15 @@ class RNNDescriptors {
#ifdef PADDLE_WITH_HIP
PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::miopenDropoutGetStatesSize(handle, &state_size));
dropout_state->mutable_data<uint8_t>({static_cast<int64_t>(state_size)},
place);
#else
PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::cudnnDropoutGetStatesSize(handle, &state_size));
dropout_state->mutable_data<uint8_t>({static_cast<int64_t>(state_size)},
place);
#endif
dropout_state->Resize({static_cast<int64_t>(state_size)});
dev_ctx.template Alloc<uint8_t>(dropout_state);
}
dropout_desc_.descriptor(handle,
place,
dev_ctx.GetPlace(),
is_initialized,
dropout_prob_,
is_test_ ? nullptr : dropout_state,
......
......@@ -248,7 +248,7 @@ void RnnGradKernel(const Context &dev_ctx,
is_test);
rnn.Create<T>(handle,
dev_ctx.GetPlace(),
dev_ctx,
SequenceLength,
&workspace_size,
&reserve_size,
......
......@@ -280,7 +280,7 @@ void RnnKernel(const Context &dev_ctx,
is_bidirec,
is_test);
rnn.Create<T>(handle,
dev_ctx.GetPlace(),
dev_ctx,
SequenceLength,
&workspace_size,
&reserve_size,
......
......@@ -82,9 +82,8 @@ void SGDDenseKernel(const Context& dev_ctx,
const MPDType* master_in_data =
multi_precision ? master_param->data<MPDType>() : nullptr;
MPDType* master_out_data =
multi_precision
? master_param_out->mutable_data<MPDType>(dev_ctx.GetPlace())
: nullptr;
multi_precision ? dev_ctx.template Alloc<MPDType>(master_param_out)
: nullptr;
int block = 512;
int grid = (param.numel() + block - 1) / block;
......@@ -94,7 +93,7 @@ void SGDDenseKernel(const Context& dev_ctx,
grad.data<T>(),
learning_rate.data<T>(),
param.numel(),
param_out->mutable_data<T>(dev_ctx.GetPlace()),
dev_ctx.template Alloc<T>(param_out),
master_in_data,
master_out_data);
}
......@@ -119,9 +118,8 @@ void SGDDenseParamSparseGradKernel(
const MPDType* master_in_data =
multi_precision ? master_param->data<MPDType>() : nullptr;
MPDType* master_out_data =
multi_precision
? master_param_out->mutable_data<MPDType>(dev_ctx.GetPlace())
: nullptr;
multi_precision ? dev_ctx.template Alloc<MPDType>(master_param_out)
: nullptr;
PADDLE_ENFORCE_EQ(
&param,
......
......@@ -120,8 +120,8 @@ void TemporalShiftGradKernel(const Context& dev_ctx,
(data_layout == DataLayout::kNCHW ? phi::make_ddim({nt, c, h, w})
: phi::make_ddim({nt, h, w, c}));
const T* output_grad_data = output_grad->data<T>();
T* input_grad_data =
input_grad->mutable_data<T>(in_grad_dims, dev_ctx.GetPlace());
input_grad->Resize(in_grad_dims);
T* input_grad_data = dev_ctx.template Alloc<T>(input_grad);
int pixelNum = nt * chw;
int threads = 1024;
......
......@@ -120,7 +120,8 @@ void TemporalShiftKernel(const Context& dev_ctx,
(data_layout == DataLayout::kNCHW ? phi::make_ddim({nt, c, h, w})
: phi::make_ddim({nt, h, w, c}));
const T* input_data = input->data<T>();
T* output_data = output->mutable_data<T>(out_dims, dev_ctx.GetPlace());
output->Resize(out_dims);
T* output_data = dev_ctx.template Alloc<T>(output);
int pixelNum = nt * chw;
int threads = 1024;
......
......@@ -139,9 +139,10 @@ void YoloBoxKernel(const Context& dev_ctx,
const T* input_data = input->data<T>();
const int* imgsize_data = img_size.data<int>();
T* boxes_data = boxes->mutable_data<T>({n, box_num, 4}, dev_ctx.GetPlace());
T* scores_data =
scores->mutable_data<T>({n, box_num, class_num}, dev_ctx.GetPlace());
boxes->Resize({n, box_num, 4});
T* boxes_data = dev_ctx.template Alloc<T>(boxes);
scores->Resize({n, box_num, class_num});
T* scores_data = dev_ctx.template Alloc<T>(scores);
phi::funcs::SetConstant<phi::GPUContext, T> set_zero;
set_zero(dev_ctx, boxes, static_cast<T>(0));
set_zero(dev_ctx, scores, static_cast<T>(0));
......
......@@ -42,7 +42,7 @@ void DigammaGradKernel(const Context& ctx,
const DenseTensor& x,
const DenseTensor& out_grad,
DenseTensor* x_grad) {
x_grad->mutable_data<T>(ctx.GetPlace());
ctx.template Alloc<T>(x_grad);
auto* dout_data = out_grad.data<T>();
auto* x_data = x.data<T>();
......
......@@ -38,7 +38,7 @@ struct DigammaFunctor {
template <typename T, typename Context>
void DigammaKernel(const Context& ctx, const DenseTensor& x, DenseTensor* out) {
out->mutable_data<T>(ctx.GetPlace());
ctx.template Alloc<T>(out);
auto* x_data = x.data<T>();
auto* out_data = out->data<T>();
auto numel = x.numel();
......
......@@ -68,7 +68,7 @@ void AddDoubleGradImpl(const Context& dev_ctx,
funcs::GetDoubleGradSafeTensor<Context, T>(
dev_ctx, y, ddy.get_ptr(), &ddy_safe);
ddout->mutable_data<T>(dev_ctx.GetPlace());
dev_ctx.template Alloc<T>(ddout);
auto ddx_dims = ddx_safe.dims();
auto ddy_dims = ddy_safe.dims();
if (ddx_dims.size() >= ddy_dims.size()) {
......@@ -102,7 +102,7 @@ void SubtractDoubleGradImpl(const Context& dev_ctx,
funcs::GetDoubleGradSafeTensor<Context, T>(
dev_ctx, y, ddy.get_ptr(), &ddy_safe);
ddout->mutable_data<T>(dev_ctx.GetPlace());
dev_ctx.template Alloc<T>(ddout);
funcs::ElementwiseCompute<funcs::SubtractFunctor<T>, T>(
dev_ctx, ddx_safe, ddy_safe, axis, funcs::SubtractFunctor<T>(), ddout);
}
......
......@@ -25,32 +25,6 @@ namespace phi {
static constexpr size_t WAIT_THRESHOLD = 64 * 1024;
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
template <>
void MemcpyH2DKernel(const GPUContext& dev_ctx,
const DenseTensor& x,
int dst_place_type,
DenseTensor* out) {
PADDLE_ENFORCE_GE(
dst_place_type,
0,
errors::OutOfRange("dst_place_type only support 0-3, but got: %d",
dst_place_type));
PADDLE_ENFORCE_LE(
dst_place_type,
3,
errors::OutOfRange("dst_place_type only support 0-3, but got: %d",
dst_place_type));
auto stream = dev_ctx.stream();
out->mutable_data(dev_ctx.GetPlace(),
x.dtype(),
phi::Stream(reinterpret_cast<phi::StreamId>(stream)));
Copy(dev_ctx, x, dev_ctx.GetPlace(), false, out);
}
#endif
template <typename Context>
void MemcpyH2DKernel(const Context& dev_ctx,
const DenseTensor& x,
......@@ -77,10 +51,6 @@ void MemcpyD2HKernel(const Context& dev_ctx,
DenseTensor* out) {
switch (dst_place_type) {
case 0:
// NOTE(lvyongkang): phi::Copy will use DeviceContext.zero_allocator to
// alloc and assign DeviceContext.place to out, which causes place check
// fails. So we specify out's place here.
out->mutable_data(CPUPlace());
Copy(dev_ctx, x, CPUPlace(), false, out);
// NOTE(copy from Aurelius84): host <-> device memory copies of a memory
// block of 64 KB or less are asynchronous. See
......@@ -91,10 +61,6 @@ void MemcpyD2HKernel(const Context& dev_ctx,
break;
case 1:
// NOTE(lvyongkang): phi::Copy will use DeviceContext.zero_allocator to
// alloc and assign DeviceContext.place to out, which causes place check
// fails. So we specify out's place here.
out->mutable_data(GPUPinnedPlace());
Copy(dev_ctx, x, GPUPinnedPlace(), false, out);
// paddle::memory::Copy use async copy for GPUPinnedPlace
dev_ctx.Wait();
......
......@@ -74,7 +74,7 @@ void AddGradKernel(const Context& dev_ctx,
}
if (dy != nullptr) {
T* dy_data = dy->mutable_data<T>(dev_ctx.GetPlace());
T* dy_data = dev_ctx.template Alloc<T>(dy);
if (dy->dims() == dz_dims) {
if (dy_data != dz_data) {
int ret = xpu::copy(dev_ctx.x_context(),
......
......@@ -45,7 +45,7 @@ void TensorSetConstantXPU(phi::DenseTensor* tensor,
template <typename T, typename Context, typename VType>
void FullValueXPU(const Context& dev_ctx, DenseTensor* tensor, VType val) {
tensor->mutable_data<T>(dev_ctx.GetPlace());
dev_ctx.template Alloc<T>(tensor);
PD_VISIT_ALL_TYPES(tensor->dtype(), "FullValueXPU", ([&] {
TensorSetConstantXPU<VType, data_t>(
......
......@@ -342,17 +342,17 @@ if [ "${PHI_INCLUDE_FLUID_FILES}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then
check_approval 1 chenwhql YuanRisheng zyfncg
fi
HAS_MODIFIED_PHI_KERNEL_FILES=`git diff --name-only upstream/$BRANCH | grep "paddle/phi/kernels" || true`
PHI_USE_MUTABLE_DATA_FILES=""
for CHANGE_FILE in ${HAS_MODIFIED_PHI_KERNEL_FILES}; do
PHI_DIR_ADDED_LINES=`git diff -U0 upstream/$BRANCH -- ${PADDLE_ROOT}/${CHANGE_FILE} | grep "^+" | grep -w "mutable_data" || true`
if [ "${PHI_DIR_ADDED_LINES}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then
PHI_USE_MUTABLE_DATA_FILES="${PHI_USE_MUTABLE_DATA_FILES} ${CHANGE_FILE}"
HAS_MODIFIED_PHI_OR_FLUID_FILES=`git diff --name-only upstream/$BRANCH | grep -E "paddle/phi|paddle/fluid" || true`
USE_MUTABLE_DATA_FILES=""
for CHANGE_FILE in ${HAS_MODIFIED_PHI_OR_FLUID_FILES}; do
ADDED_LINES=`git diff -U0 upstream/$BRANCH -- ${PADDLE_ROOT}/${CHANGE_FILE} | grep "^+" | grep -w "mutable_data" || true`
if [ "${ADDED_LINES}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then
USE_MUTABLE_DATA_FILES="${USE_MUTABLE_DATA_FILES} ${CHANGE_FILE}"
fi
done
if [ "${PHI_USE_MUTABLE_DATA_FILES}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then
echo_line="You can not use the DenseTensor::mutable_data() method in paddle/phi/kernels files(${PHI_USE_MUTABLE_DATA_FILES}). If you want to alloc memory, use phi::DeviceContext::Alloc() or phi::DeviceContext::HostAlloc() instead and if you want to get mutable data, use DenseTensor::data(). If you have any questions, you can have one RD (chenwhql, Shixiaowei02, YuanRisheng or zyfncg) review and approve.\n"
check_approval 1 chenwhql Shixiaowei02 YuanRisheng zyfncg
if [ "${USE_MUTABLE_DATA_FILES}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then
echo_line="You can not use the DenseTensor::mutable_data() method in files(${USE_MUTABLE_DATA_FILES}). If you want to alloc memory, use phi::DeviceContext::Alloc() or phi::DeviceContext::HostAlloc() instead and if you want to get mutable data, use DenseTensor::data(). If you have any questions, you can have one RD (chenwhql, Shixiaowei02, YuanRisheng, zyfncg or From00) review and approve.\n"
check_approval 1 chenwhql Shixiaowei02 YuanRisheng zyfncg From00
fi
ALL_CHANGE_FILES=`git diff --numstat upstream/$BRANCH | awk '{print $3}' | grep ".py"`
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册