diff --git a/paddle/phi/api/lib/data_transform.cc b/paddle/phi/api/lib/data_transform.cc index 048a24ff5e312f44686785e9cd0489c3fdeda222..c6a773ebe5fc77600dea614129c1163a69c504a7 100644 --- a/paddle/phi/api/lib/data_transform.cc +++ b/paddle/phi/api/lib/data_transform.cc @@ -169,10 +169,6 @@ inline phi::DenseTensor TransDataPlace(const phi::DenseTensor& tensor, VLOG(3) << "DeviceTransform in, src_place " << tensor.place() << " dst_place: " << dst_place; - DefaultAllocator alloc(dst_place); - phi::DenseTensor out(&alloc, - {tensor.dtype(), tensor.dims(), tensor.layout()}); - #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) auto& pool = paddle::platform::DeviceContextPool::Instance(); // NOTE(yy): TransDataPlace should wait for computation of input. @@ -191,6 +187,7 @@ inline phi::DenseTensor TransDataPlace(const phi::DenseTensor& tensor, // the transforming is from CPU to GPU and the number of elements is little. // But the embarrassment is that this solution this solution makes training // slower. + phi::DenseTensor out; paddle::framework::TensorCopySync(tensor, dst_place, &out); return out; } @@ -305,6 +302,47 @@ paddle::optional> PrepareData( return paddle::none; } +std::shared_ptr PrepareDataForSelectedRows( + const Tensor& input, + const phi::TensorArgDef& target_args_def, + const TransformFlag& transform_flag) { + const auto& tensor_in = input.impl(); + if (tensor_in) { + phi::SelectedRows& selected_rows = + *static_cast(tensor_in.get()); + if (!transform_flag.NeedTransform() || !selected_rows.initialized() || + (!NeedTransformPlace( + selected_rows.place(), target_args_def.backend, transform_flag))) { + return std::static_pointer_cast(tensor_in); + } + + auto dense_out = TransDataPlace( + selected_rows.value(), phi::TransToPhiPlace(target_args_def.backend)); + if (selected_rows.place().GetType() == AllocationType::GPUPINNED) { + selected_rows.mutable_value()->ShareBufferWith(dense_out); + return std::static_pointer_cast(tensor_in); + } + + auto out_new = std::make_shared(selected_rows.rows(), + selected_rows.height()); + *out_new->mutable_value() = dense_out; + return out_new; + } + PADDLE_THROW(phi::errors::InvalidArgument( + "The impl() of input tensor is nullptr, it doesn't support for " + "selected_rows data transform now.")); +} + +paddle::optional PrepareDataForSelectedRows( + const paddle::optional& input, + const phi::TensorArgDef& target_args_def, + const TransformFlag& transform_flag) { + if (input) { + return *PrepareDataForSelectedRows(*input, target_args_def, transform_flag); + } + return paddle::none; +} + void TransDataBackend(const phi::DenseTensor* tensor, Backend target_backend, phi::DenseTensor* out) { diff --git a/paddle/phi/api/lib/data_transform.h b/paddle/phi/api/lib/data_transform.h index 7695855e30b56013c03e4283d667934b11e04238..7a97bb01f61fa3a649c959d1a9c7e9378ceae40b 100644 --- a/paddle/phi/api/lib/data_transform.h +++ b/paddle/phi/api/lib/data_transform.h @@ -82,6 +82,17 @@ paddle::optional> PrepareData( const phi::TensorArgDef& target_args_def, const TransformFlag& transform_flag); +// Only support transfering place for SelectedRows +std::shared_ptr PrepareDataForSelectedRows( + const Tensor& input, + const phi::TensorArgDef& target_args_def, + const TransformFlag& transform_flag); + +paddle::optional PrepareDataForSelectedRows( + const paddle::optional& input, + const phi::TensorArgDef& target_args_def, + const TransformFlag& transform_flag); + void TransDataBackend(const phi::DenseTensor* tensor, Backend target_backend, phi::DenseTensor* out); diff --git a/paddle/phi/api/yaml/generator/api_base.py b/paddle/phi/api/yaml/generator/api_base.py index e67023d2faf71526eb08d72cb8c0b8c5c863c970..53b950b63f05231b0e8d7067b79407aea6158ca1 100644 --- a/paddle/phi/api/yaml/generator/api_base.py +++ b/paddle/phi/api/yaml/generator/api_base.py @@ -715,7 +715,7 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d input_tensor_code = ( input_tensor_code + f""" -{code_indent} auto {PREFIX_TENSOR_NAME}{input_name} = TensorToSelectedRows({input_name}); +{code_indent} auto {PREFIX_TENSOR_NAME}{input_name} = PrepareDataForSelectedRows({input_name}, kernel.InputAt({kernel_param.index(input_name)}), {trans_flag}); """ ) return input_tensor_code