From 4b3f2af1cb17473061790ca5888ea030a82fc429 Mon Sep 17 00:00:00 2001 From: YuanRisheng Date: Mon, 19 Sep 2022 11:25:45 +0800 Subject: [PATCH] [PHI]Move sum op to PHI (#45860) * move sum * fix ci bugs * fix ci bugs * fix set_lod bugs * fix infershape bugs * fix ci bugs * fix ci unittest bug * fix ci bugs * perfect code * update code according comment * add unittest * fix ci bugs --- paddle/fluid/framework/infershape_utils.cc | 50 +++- paddle/fluid/framework/infershape_utils.h | 4 + .../new_executor/standalone_executor_test.cc | 3 +- paddle/fluid/framework/operator.h | 7 + .../tensorrt/plugin_arg_mapping_context.cc | 4 + .../tensorrt/plugin_arg_mapping_context.h | 2 + .../fluid/operators/mkldnn/sum_mkldnn_op.cc | 6 +- paddle/fluid/operators/sum_op.cc | 110 +------ paddle/fluid/operators/sum_op.cu | 280 ------------------ paddle/fluid/operators/sum_op.h | 222 -------------- paddle/fluid/operators/sum_op_mlu.cc | 5 +- paddle/fluid/operators/sum_op_npu.cc | 5 +- paddle/fluid/operators/sum_op_xpu.cc | 6 +- .../dialect/phi/pass/proto_arg_map_context.cc | 6 + .../dialect/phi/pass/proto_arg_map_context.h | 1 + paddle/phi/api/lib/api_custom_impl.cc | 89 ++++++ paddle/phi/api/lib/api_custom_impl.h | 2 + paddle/phi/api/lib/api_gen_utils.cc | 20 ++ paddle/phi/api/lib/api_gen_utils.h | 6 + paddle/phi/api/yaml/legacy_ops.yaml | 5 +- paddle/phi/core/compat/arg_map_context.h | 1 + paddle/phi/core/kernel_registry.h | 18 ++ paddle/phi/core/kernel_utils.h | 2 + paddle/phi/core/meta_tensor.cc | 10 +- paddle/phi/core/meta_tensor.h | 6 + paddle/phi/infermeta/multiary.cc | 29 ++ paddle/phi/infermeta/multiary.h | 4 + paddle/phi/kernels/add_n_kernel.h | 10 +- paddle/phi/kernels/cpu/add_n_kernel.cc | 62 ++-- paddle/phi/kernels/gpu/add_n_kernel.cu | 164 ++++++++-- paddle/phi/kernels/impl/add_n_kernel_impl.h | 80 +++++ .../phi/kernels/selected_rows/add_n_kernel.h | 27 ++ .../kernels/selected_rows/cpu/add_n_kernel.cc | 25 ++ .../kernels/selected_rows/gpu/add_n_kernel.cu | 26 ++ .../selected_rows/impl/add_n_kernel_impl.h | 90 ++++++ .../{memcpy_d2h_sig.cc => memcpy_sig.cc} | 9 + paddle/phi/ops/compat/sum_sig.cc | 7 +- paddle/phi/tests/api/CMakeLists.txt | 4 + paddle/phi/tests/api/test_add_n_api.cc | 84 ++++++ paddle/phi/tests/ops/test_op_signature.h | 4 + python/paddle/optimizer/optimizer.py | 4 +- python/paddle/tensor/math.py | 3 - 42 files changed, 826 insertions(+), 676 deletions(-) delete mode 100644 paddle/fluid/operators/sum_op.cu delete mode 100644 paddle/fluid/operators/sum_op.h create mode 100644 paddle/phi/kernels/impl/add_n_kernel_impl.h create mode 100644 paddle/phi/kernels/selected_rows/add_n_kernel.h create mode 100644 paddle/phi/kernels/selected_rows/cpu/add_n_kernel.cc create mode 100644 paddle/phi/kernels/selected_rows/gpu/add_n_kernel.cu create mode 100644 paddle/phi/kernels/selected_rows/impl/add_n_kernel_impl.h rename paddle/phi/ops/compat/{memcpy_d2h_sig.cc => memcpy_sig.cc} (77%) create mode 100644 paddle/phi/tests/api/test_add_n_api.cc diff --git a/paddle/fluid/framework/infershape_utils.cc b/paddle/fluid/framework/infershape_utils.cc index f59bb2503a5..22d9eb43c59 100644 --- a/paddle/fluid/framework/infershape_utils.cc +++ b/paddle/fluid/framework/infershape_utils.cc @@ -87,6 +87,15 @@ class InferShapeArgumentMappingContext : public phi::ArgumentMappingContext { }); } + bool IsSelectedRowsInputs(const std::string& name) const override { + auto var_types = ctx_.GetInputsVarType(name); + return std::all_of(var_types.begin(), + var_types.end(), + [](const proto::VarType::Type& type) { + return type == proto::VarType::SELECTED_ROWS; + }); + } + bool IsSelectedRowsInput(const std::string& name) const override { auto var_type = ctx_.GetInputVarType(name); return var_type == proto::VarType::SELECTED_ROWS; @@ -145,6 +154,36 @@ int64_t CompatMetaTensor::numel() const { } } +bool CompatMetaTensor::is_selected_rows() const { + if (is_runtime_) { + auto* var = PADDLE_GET_CONST(Variable*, var_); + return var->IsType(); + } else { + auto* var = PADDLE_GET_CONST(VarDesc*, var_); + return var->GetType() == proto::VarType::SELECTED_ROWS; + } +} + +bool CompatMetaTensor::is_dense() const { + if (is_runtime_) { + auto* var = PADDLE_GET_CONST(Variable*, var_); + return var->IsType(); + } else { + auto* var = PADDLE_GET_CONST(VarDesc*, var_); + return var->GetType() == proto::VarType::LOD_TENSOR; + } +} + +bool CompatMetaTensor::is_tensor_array() const { + if (is_runtime_) { + auto* var = PADDLE_GET_CONST(Variable*, var_); + return var->IsType(); + } else { + auto* var = PADDLE_GET_CONST(VarDesc*, var_); + return var->GetType() == proto::VarType::LOD_TENSOR_ARRAY; + } +} + DDim CompatMetaTensor::dims() const { ValidCheck(*this); if (is_runtime_) { @@ -152,7 +191,7 @@ DDim CompatMetaTensor::dims() const { if (var->IsType()) { return var->Get().dims(); } else if (var->IsType()) { - return var->Get().dims(); + return var->Get().GetCompleteDims(); } else if (var->IsType()) { // use tensor array size as dims auto& tensor_array = var->Get(); @@ -224,8 +263,7 @@ void CompatMetaTensor::set_dims(const DDim& dims) { auto* tensor = var->GetMutable(); phi::DenseTensorUtils::GetMutableMeta(tensor)->dims = dims; } else if (var->IsType()) { - auto* tensor = var->GetMutable()->mutable_value(); - phi::DenseTensorUtils::GetMutableMeta(tensor)->dims = dims; + var->GetMutable()->set_height(dims[0]); } else if (var->IsType()) { auto* tensor_array = var->GetMutable(); // Note: Here I want enforce `tensor_array->size() == 0UL`, because @@ -299,7 +337,7 @@ void CompatMetaTensor::share_lod(const MetaTensor& meta_tensor) { ValidCheck(meta_tensor); if (is_runtime_) { auto* var = PADDLE_GET(Variable*, var_); - if (var->IsType()) { + if (var->IsType() && meta_tensor.is_dense()) { auto* tensor = var->GetMutable(); phi::DenseTensorUtils::GetMutableMeta(tensor)->lod = static_cast(meta_tensor).GetRuntimeLoD(); @@ -309,6 +347,10 @@ void CompatMetaTensor::share_lod(const MetaTensor& meta_tensor) { } } else { auto* var = PADDLE_GET(VarDesc*, var_); + if (!meta_tensor.is_dense() && !meta_tensor.is_tensor_array()) { + VLOG(3) << "input metatensor is not LoDTensor or LoDTensorArray."; + return; + } var->SetLoDLevel( static_cast(meta_tensor).GetCompileTimeLoD()); } diff --git a/paddle/fluid/framework/infershape_utils.h b/paddle/fluid/framework/infershape_utils.h index 1f745e5bf9b..3b7744515b2 100644 --- a/paddle/fluid/framework/infershape_utils.h +++ b/paddle/fluid/framework/infershape_utils.h @@ -59,6 +59,10 @@ class CompatMetaTensor : public phi::MetaTensor { bool initialized() const override { return initialized_; }; + bool is_selected_rows() const; + bool is_tensor_array() const; + bool is_dense() const; + operator unspecified_bool_type() const override { return initialized_ ? unspecified_bool_true : 0; } diff --git a/paddle/fluid/framework/new_executor/standalone_executor_test.cc b/paddle/fluid/framework/new_executor/standalone_executor_test.cc index 2531a8e7cd3..67e72938778 100644 --- a/paddle/fluid/framework/new_executor/standalone_executor_test.cc +++ b/paddle/fluid/framework/new_executor/standalone_executor_test.cc @@ -50,7 +50,7 @@ USE_OP_ITSELF(concat_grad); USE_OP_ITSELF(elementwise_mul_grad); USE_OP_ITSELF(sigmoid_grad); USE_OP_ITSELF(tanh_grad); -USE_OP(sum); +USE_OP_ITSELF(sum); USE_OP_ITSELF(slice_grad); USE_OP_ITSELF(lookup_table_grad); USE_OP_ITSELF(sqrt); @@ -101,6 +101,7 @@ PD_DECLARE_KERNEL(slice_grad, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(cross_entropy_with_softmax, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(cross_entropy_with_softmax_grad, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(sqrt, GPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(add_n, GPU, ALL_LAYOUT); namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h index c82976d197c..16707ca63ab 100644 --- a/paddle/fluid/framework/operator.h +++ b/paddle/fluid/framework/operator.h @@ -506,6 +506,13 @@ class ExecutionArgumentMappingContext : public phi::ArgumentMappingContext { }); } + bool IsSelectedRowsInputs(const std::string& name) const override { + auto vars = ctx_.MultiInputVar(name); + return std::all_of(vars.begin(), vars.end(), [](const Variable* var) { + return var->IsType(); + }); + } + bool IsSelectedRowsInput(const std::string& name) const override { const auto* var = ctx_.InputVar(name); return var->IsType(); diff --git a/paddle/fluid/inference/tensorrt/plugin_arg_mapping_context.cc b/paddle/fluid/inference/tensorrt/plugin_arg_mapping_context.cc index 5d9998d2556..8c4dd20ee06 100644 --- a/paddle/fluid/inference/tensorrt/plugin_arg_mapping_context.cc +++ b/paddle/fluid/inference/tensorrt/plugin_arg_mapping_context.cc @@ -104,6 +104,10 @@ bool PluginArgumentMappingContext::IsSelectedRowsInput( const std::string& name) const { return false; } +bool PluginArgumentMappingContext::IsSelectedRowsInputs( + const std::string& name) const { + return false; +} bool PluginArgumentMappingContext::IsDenseTensorVectorInput( const std::string& name) const { return false; diff --git a/paddle/fluid/inference/tensorrt/plugin_arg_mapping_context.h b/paddle/fluid/inference/tensorrt/plugin_arg_mapping_context.h index 35229a5ab79..1a483626dce 100644 --- a/paddle/fluid/inference/tensorrt/plugin_arg_mapping_context.h +++ b/paddle/fluid/inference/tensorrt/plugin_arg_mapping_context.h @@ -46,6 +46,8 @@ class PluginArgumentMappingContext : public ::phi::ArgumentMappingContext { bool IsSelectedRowsInput(const std::string& name) const override; + bool IsSelectedRowsInputs(const std::string& name) const override; + bool IsDenseTensorVectorInput(const std::string& name) const override; bool IsDenseTensorOutput(const std::string& name) const override; diff --git a/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc index f71785e72cd..072016d729c 100644 --- a/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc @@ -24,7 +24,8 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/sum_op.h" +#include "paddle/fluid/framework/lod_tensor_array.h" +#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/mkldnn_reuse.h" namespace phi { @@ -37,6 +38,9 @@ namespace operators { using paddle::platform::MKLDNNDeviceContext; using phi::CPUContext; using platform::to_void_cast; +using Tensor = framework::Tensor; +using SelectedRows = phi::SelectedRows; +using LoDTensor = framework::LoDTensor; template class SumMKLDNNHandler diff --git a/paddle/fluid/operators/sum_op.cc b/paddle/fluid/operators/sum_op.cc index fcedf2d24bc..ec570f709c3 100644 --- a/paddle/fluid/operators/sum_op.cc +++ b/paddle/fluid/operators/sum_op.cc @@ -9,15 +9,17 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/sum_op.h" - #include #include #include #include #include +#include "paddle/fluid/framework/infershape_utils.h" +#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/var_type_inference.h" +#include "paddle/phi/core/infermeta_utils.h" +#include "paddle/phi/infermeta/multiary.h" #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_helper.h" @@ -32,94 +34,6 @@ class SumOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasInputs("X"), "Input", "X", "sum"); - OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "sum"); - - if (ctx->IsRuntime() && ctx->GetOutputsVarType("Out")[0] == - framework::proto::VarType::LOD_TENSOR_ARRAY) { - return; // skip runtime infershape when is tensor array; - } - - auto x_var_types = ctx->GetInputsVarType("X"); - auto x_dims = ctx->GetInputsDim("X"); - - auto N = x_dims.size(); - PADDLE_ENFORCE_GT( - N, - 0, - platform::errors::InvalidArgument( - "The input tensor X's dimensions of SumOp " - "should be larger than 0. But received X's dimensions %d, " - "X's shape = [%s].", - N, - &x_dims)); - if (N == 1) { - VLOG(3) << "Warning: SumOp have only one input, may waste memory"; - } - - framework::DDim in_dim({0}); - for (size_t i = 0; i < x_dims.size(); ++i) { - auto& x_dim = x_dims[i]; - // x_dim.size() == 1 means the real dim of selected rows is [0] - if (x_var_types[i] == framework::proto::VarType::SELECTED_ROWS && - x_dim.size() == 1) { - continue; - } - if (phi::product(x_dim) == 0) { - continue; - } - if (phi::product(in_dim) == 0) { - in_dim = x_dim; - } else { - if (ctx->IsRuntime()) { - PADDLE_ENFORCE_EQ(in_dim, - x_dim, - platform::errors::InvalidArgument( - "The input tensor X of SumOp must" - " have same shape. But received X[0]'s shape = " - "[%s], X[%d]'s shape = [%s].", - in_dim, - i, - x_dim)); - } else { - PADDLE_ENFORCE_EQ( - in_dim.size(), - x_dim.size(), - platform::errors::InvalidArgument( - "The input tensor X of SumOp must have same " - "dimensions. But received X[0]'s dimensions = %d, X[0]'s " - "shape = " - "[%s], X[%d]'s dimensions = %d, X[%d]'s shape = [%s].", - in_dim.size(), - in_dim, - i, - x_dim.size(), - i, - x_dim)); - // if in_dim or x_dim has -1, not check equal - for (int j = 0; j < x_dim.size(); ++j) { - if (x_dim[j] == -1 || in_dim[j] == -1) { - continue; - } - PADDLE_ENFORCE_EQ( - in_dim[j], - x_dim[j], - platform::errors::InvalidArgument( - "The input tensor X of SumOp must have same shape " - "if not -1." - "But received X[0]'s shape = [%s], X[%d]'s shape = [%s].", - in_dim, - i, - x_dim)); - } - } - } - } - ctx->SetOutputDim("Out", in_dim); - ctx->ShareLoD("X", /*->*/ "Out"); - } - protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { @@ -350,18 +264,16 @@ DECLARE_INPLACE_OP_INFERER(SumInplaceInferer, {"X", "Out"}); namespace ops = paddle::operators; +namespace ops = paddle::operators; +DECLARE_INFER_SHAPE_FUNCTOR(sum, + AddNInferShapeFunctor, + PD_INFER_META(phi::AddNTensorArrayInferMeta)); + REGISTER_OPERATOR(sum, ops::SumOp, ops::SumOpMaker, ops::SumGradDescMaker, ops::SumGradOpBaseMaker, ops::SumOpVarTypeInference, - ops::SumInplaceInferer); - -REGISTER_OP_CPU_KERNEL( - sum, - ops::SumKernel, - ops::SumKernel, - ops::SumKernel, - ops::SumKernel, - ops::SumKernel); + ops::SumInplaceInferer, + AddNInferShapeFunctor); diff --git a/paddle/fluid/operators/sum_op.cu b/paddle/fluid/operators/sum_op.cu deleted file mode 100644 index 35a1680d84d..00000000000 --- a/paddle/fluid/operators/sum_op.cu +++ /dev/null @@ -1,280 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at -http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/memory/malloc.h" -#include "paddle/fluid/operators/sum_op.h" -#include "paddle/fluid/platform/float16.h" - -namespace plat = paddle::platform; - -namespace paddle { -namespace operators { - -#define CEIL_DIV(x, y) (((x) + (y)-1) / (y)) - -using LoDTensor = framework::LoDTensor; - -template -__global__ void Sum2CUDAKernel(const T *in_0, - const T *in_1, - T *out, - int64_t N) { - int id = blockIdx.x * blockDim.x + threadIdx.x; - while (id < N) { - out[id] = in_0[id] + in_1[id]; - id += blockDim.x * gridDim.x; - } -} - -template -__global__ void SumArrayCUDAKernel( - T **in, T *out, int64_t N, size_t in_size, bool read_dst) { - int id = blockIdx.x * blockDim.x + threadIdx.x; - while (id < N) { - T total(read_dst ? out[id] : static_cast(0)); - for (int i = 0; i < in_size; ++i) { - const T *tmp = in[i]; - if (tmp) { - total += tmp[id]; - } - } - out[id] = total; - id += blockDim.x * gridDim.x; - } -} - -template -__global__ void SumSelectedRowsCUDAKernel(T **sr_in_out, - int64_t N, - size_t rows) { - int id = blockIdx.x * blockDim.x + threadIdx.x; - while (id < N) { - for (int i = 0; i < 2 * rows; i += 2) { - const T *tmp = sr_in_out[i]; - T *tmp_out = sr_in_out[i + 1]; - if (tmp && tmp_out) { - tmp_out[id] += tmp[id]; - } - } - id += blockDim.x * gridDim.x; - } -} - -template -void SumToLoDTensor(const framework::ExecutionContext &context) { - auto in_vars = context.MultiInputVar("X"); - const size_t in_num = in_vars.size(); - - constexpr size_t theory_sm_threads = 1024; - auto &dev_ctx = context.template device_context(); - auto stream = dev_ctx.stream(); - - auto max_threads = dev_ctx.GetMaxPhysicalThreadCount(); - auto sm_count = max_threads / theory_sm_threads; - size_t tile_size = 0; - dim3 grids; - dim3 blocks; - - auto ComputeKernelParameter = [&](size_t length) { - if (length >= max_threads) - tile_size = 1024; - else if (length < max_threads && length > sm_count * 128) - tile_size = 512; - else if (length <= sm_count * 128) - tile_size = 256; - grids = dim3(CEIL_DIV(length, tile_size), 1, 1); - blocks = dim3(tile_size, 1, 1); - }; - - auto *out = context.Output("Out"); - bool in_place = in_vars[0] == context.OutputVar("Out"); - - if (!in_place) { - auto *out_ptr = out->mutable_data(context.GetPlace()); - if (in_num >= 1 && in_vars[0]->IsType()) { - auto &in_0_tensor = in_vars[0]->Get(); - if (in_0_tensor.numel() > 0) { - in_place = (in_0_tensor.data() == out_ptr); - } - } - } - - // Sum of two tensors - if (in_num == 2 && in_vars[0]->IsType() && - in_vars[1]->IsType()) { - auto &in_0 = in_vars[0]->Get(); - auto &in_1 = in_vars[1]->Get(); - int64_t length_0 = in_0.numel(); - int64_t length_1 = in_1.numel(); - if (length_0 && length_1 && in_0.IsInitialized() && in_1.IsInitialized()) { - auto result = EigenVector::Flatten(*out); - auto &place = *dev_ctx.eigen_device(); - auto in_0_e = EigenVector::Flatten(in_0); - auto in_1_e = EigenVector::Flatten(in_1); - result.device(place) = in_0_e + in_1_e; - } else if (length_0 && in_0.IsInitialized()) { - auto result = EigenVector::Flatten(*out); - auto &place = *dev_ctx.eigen_device(); - result.device(place) = EigenVector::Flatten(in_0); - } else if (length_1 && in_1.IsInitialized()) { - auto result = EigenVector::Flatten(*out); - auto &place = *dev_ctx.eigen_device(); - result.device(place) = EigenVector::Flatten(in_1); - } - return; - } - - int start = in_place ? 1 : 0; - if (!in_place) { - phi::funcs::SetConstant constant_functor; - constant_functor(context.template device_context(), - out, - static_cast(0)); - } - - std::vector in_data; - std::vector selectrow_index; - int64_t lod_length = 0; - bool dst_write = false; - for (int i = start; i < in_num; ++i) { - if (in_vars[i]->IsType()) { - auto &in_i = in_vars[i]->Get(); - lod_length = in_i.numel(); - if (lod_length && in_i.IsInitialized()) { - in_data.emplace_back(in_i.data()); - } - } else if (in_vars[i]->IsType()) { - selectrow_index.push_back(i); - } - } - - // compute select rows separately. - if (!selectrow_index.empty()) { - std::vector sr_in_out_data; - size_t rows = 0; - int64_t length = 0; - for (auto index : selectrow_index) { - auto &sr = in_vars[index]->Get(); - auto &sr_value = sr.value(); - auto &sr_rows = sr.rows(); - - auto row_numel = sr_value.numel() / sr_rows.size(); - auto out_dims = out->dims(); - - PADDLE_ENFORCE_EQ(sr.height(), - out_dims[0], - platform::errors::InvalidArgument( - "The table height of input must be same as output, " - "but received input height is %d" - ", output height is %d", - sr.height(), - out_dims[0])); - PADDLE_ENFORCE_EQ(row_numel, - out->numel() / sr.height(), - platform::errors::InvalidArgument( - "The table width of input must be same as output, " - "but received input width is %d" - ", output width is %d", - row_numel, - out->numel() / sr.height())); - - auto *sr_data = sr_value.data(); - auto *sr_out_data = out->data(); - rows += sr_rows.size(); - length = row_numel; - - for (size_t i = 0; i < sr_rows.size(); ++i) { - sr_in_out_data.emplace_back(&sr_data[i * row_numel]); - sr_in_out_data.emplace_back(&sr_out_data[sr_rows[i] * row_numel]); - } - } - if (!sr_in_out_data.empty()) { - auto tmp_sr_in_out_array = memory::Alloc( - dev_ctx.GetPlace(), - sr_in_out_data.size() * sizeof(T *), - phi::Stream(reinterpret_cast(dev_ctx.stream()))); - - memory::Copy(dev_ctx.GetPlace(), - tmp_sr_in_out_array->ptr(), - platform::CPUPlace(), - reinterpret_cast(sr_in_out_data.data()), - sr_in_out_data.size() * sizeof(T *), - dev_ctx.stream()); - - T **sr_in_out_array_data = - reinterpret_cast(tmp_sr_in_out_array->ptr()); - - ComputeKernelParameter(length); - SumSelectedRowsCUDAKernel - <<>>(sr_in_out_array_data, length, rows); - dst_write = true; - } - } - // if indata not null, merge into one kernel call. - if (!in_data.empty()) { - auto tmp_in_array = memory::Alloc( - dev_ctx.GetPlace(), - in_data.size() * sizeof(T *), - phi::Stream(reinterpret_cast(dev_ctx.stream()))); - - memory::Copy(dev_ctx.GetPlace(), - tmp_in_array->ptr(), - platform::CPUPlace(), - reinterpret_cast(in_data.data()), - in_data.size() * sizeof(T *), - dev_ctx.stream()); - - T **in_array_data = reinterpret_cast(tmp_in_array->ptr()); - ComputeKernelParameter(lod_length); - SumArrayCUDAKernel<<>>(in_array_data, - out->data(), - lod_length, - in_data.size(), - dst_write | in_place); - } -} - -template -class SumKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &context) const override { - auto out_var = context.OutputVar("Out"); - - if (out_var->IsType()) { - SumToLoDTensor(context); - } else if (out_var->IsType()) { - SelectedRowsCompute(context); - } else if (out_var->IsType()) { - LodTensorArrayCompute(context); - } else { - PADDLE_THROW(platform::errors::InvalidArgument( - "Expected type of Output(out) must be Tensor, SelectedRows or " - "LodTensorArray. But got " - "unsupport type: %s.", - framework::ToTypeName(out_var->Type()))); - } - } -}; -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; -REGISTER_OP_CUDA_KERNEL(sum, - ops::SumKernel, - ops::SumKernel, - ops::SumKernel, - ops::SumKernel, - ops::SumKernel, - ops::SumKernel); diff --git a/paddle/fluid/operators/sum_op.h b/paddle/fluid/operators/sum_op.h deleted file mode 100644 index 804bfbb2099..00000000000 --- a/paddle/fluid/operators/sum_op.h +++ /dev/null @@ -1,222 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at -http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include - -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/lod_tensor_array.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/selected_rows_functor.h" -#include "paddle/phi/kernels/funcs/math_function.h" - -namespace paddle { -namespace operators { - -using Tensor = framework::Tensor; -using SelectedRows = phi::SelectedRows; -using LoDTensor = framework::LoDTensor; -template -using EigenVector = framework::EigenVector; - -template -void SelectedRowsCompute(const framework::ExecutionContext &context) { - auto in_vars = context.MultiInputVar("X"); - auto out_var = context.OutputVar("Out"); - bool in_place = out_var == in_vars[0]; - - if (in_place && in_vars.size() < 2) { - return; - } - - std::vector inputs; - SelectedRows temp_in0; - - if (in_place) { - auto &in0 = in_vars[0]->Get(); - temp_in0.set_height(in0.height()); - temp_in0.set_rows(in0.rows()); - framework::TensorCopy(in0.value(), - in0.place(), - context.device_context(), - temp_in0.mutable_value()); - inputs.push_back(&temp_in0); - for (size_t i = 1; i < in_vars.size(); ++i) { - auto &in = in_vars[i]->Get(); - if (in.rows().size() > 0) { - inputs.push_back(&in); - } - } - } else { - for (auto &in_var : in_vars) { - auto &in = in_var->Get(); - if (in.rows().size() > 0) { - inputs.push_back(&in_var->Get()); - } - } - } - - auto *out = context.Output("Out"); - out->mutable_rows()->clear(); - - bool has_data = false; - for (auto &in : inputs) { - if (in->rows().size() > 0) { - has_data = true; - break; - } - } - if (has_data) { - math::scatter::MergeAdd merge_add; - merge_add(context.template device_context(), inputs, out); - - out->SyncIndex(); - - } else { - // no data, just set a empty out tensor. - out->mutable_value()->mutable_data(phi::make_ddim({0}), - context.GetPlace()); - } -} - -template -void LodTensorArrayCompute(const framework::ExecutionContext &context) { - auto in_vars = context.MultiInputVar("X"); - auto out_var = context.OutputVar("Out"); - bool in_place = out_var == in_vars[0]; - auto &out_array = *out_var->GetMutable(); - for (size_t i = in_place ? 1 : 0; i < in_vars.size(); ++i) { - PADDLE_ENFORCE_EQ(in_vars[i]->IsType(), - true, - platform::errors::InvalidArgument( - "Only support all inputs are TensorArray, " - "but inputs[%d] is not TensorArray.", - i)); - auto &in_array = in_vars[i]->Get(); - - for (size_t i = 0; i < in_array.size(); ++i) { - if (in_array[i].IsInitialized() && (in_array[i].numel() != 0)) { - if (i >= out_array.size()) { - out_array.resize(i + 1); - } - if (!out_array[i].IsInitialized() || (out_array[i].numel() == 0)) { - framework::TensorCopy(in_array[i], - in_array[i].place(), - context.device_context(), - &out_array[i]); - out_array[i].set_lod(in_array[i].lod()); - } else { - PADDLE_ENFORCE_EQ( - out_array[i].lod(), - in_array[i].lod(), - platform::errors::InvalidArgument( - "The lod message between inputs[%d] and" - " outputs[%d] must be same, but now is not same.", - i, - i)); - auto in = EigenVector::Flatten(in_array[i]); - auto result = EigenVector::Flatten(out_array[i]); - result.device(*context.template device_context() - .eigen_device()) = result + in; - } - } - } - } -} - -template -class SumKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &context) const override { - VLOG(10) << "start sum kernel"; - auto in_vars = context.MultiInputVar("X"); - size_t in_num = in_vars.size(); - auto out_var = context.OutputVar("Out"); - - bool in_place = out_var == in_vars[0]; - - if (out_var->IsType()) { - auto *out = out_var->GetMutable(); - auto *out_ptr = out->mutable_data(context.GetPlace()); - if (in_num >= 1 && in_vars[0]->IsType() && - in_vars[0]->Get().IsInitialized()) { - auto &in_0_tensor = in_vars[0]->Get(); - if (in_0_tensor.numel() > 0) { - in_place = (in_0_tensor.data() == out_ptr); - } - } - - auto result = EigenVector::Flatten(*out); - auto &place = - *context.template device_context().eigen_device(); - int start = in_place ? 1 : 0; - if (!in_place) { - if ((in_num >= 2) && in_vars[0]->IsType() && - in_vars[1]->IsType() && - in_vars[0]->Get().IsInitialized() && - in_vars[1]->Get().IsInitialized()) { - auto &in_0 = in_vars[0]->Get(); - auto &in_1 = in_vars[1]->Get(); - if (in_0.numel() && in_1.numel()) { - auto in_0_e = EigenVector::Flatten(in_0); - auto in_1_e = EigenVector::Flatten(in_1); - result.device(place) = in_0_e + in_1_e; - start = 2; - } - } - if (start != 2) { - VLOG(10) << "Fill with constant = 0 in sum kernel."; - phi::funcs::SetConstant constant_functor; - constant_functor(context.template device_context(), - out, - static_cast(0)); - } - } - - math::SelectedRowsAddToTensor functor; - // If in_place, just skip the first tensor - for (size_t i = start; i < in_num; i++) { - if (in_vars[i]->IsType()) { - auto &in_t = in_vars[i]->Get(); - if (!in_t.IsInitialized() || in_t.numel() == 0) { - continue; - } - auto in = EigenVector::Flatten(in_t); - result.device(place) = result + in; - } else if (in_vars[i]->IsType()) { - auto &in_t = in_vars[i]->Get(); - functor(context.template device_context(), in_t, out); - } else { - PADDLE_THROW(platform::errors::InvalidArgument( - "Expected type of Input(X) of %d-th must be Tensor, " - "SelectedRows. But got " - "unsupport type: %s.", - framework::ToTypeName(in_vars[i]->Type()))); - } - } - } else if (out_var->IsType()) { - SelectedRowsCompute(context); - } else if (out_var->IsType()) { - LodTensorArrayCompute(context); - } else { - PADDLE_THROW(platform::errors::InvalidArgument( - "Expected type of Output(out) must be Tensor, SelectedRows, " - "LoDTensorArray. But got " - "unsupport type: %s.", - framework::ToTypeName(out_var->Type()))); - } - VLOG(10) << "end sum kernel"; - } -}; -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/sum_op_mlu.cc b/paddle/fluid/operators/sum_op_mlu.cc index 6195a6c9b7a..0bb51581e93 100644 --- a/paddle/fluid/operators/sum_op_mlu.cc +++ b/paddle/fluid/operators/sum_op_mlu.cc @@ -12,13 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/framework/lod_tensor_array.h" +#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/mlu/mlu_baseop.h" -#include "paddle/fluid/operators/sum_op.h" namespace paddle { namespace operators { using Tensor = framework::Tensor; +using SelectedRows = phi::SelectedRows; +using LoDTensor = framework::LoDTensor; template class SumMLUKernel : public framework::OpKernel { diff --git a/paddle/fluid/operators/sum_op_npu.cc b/paddle/fluid/operators/sum_op_npu.cc index 8beecb70e7f..a7bb442fa65 100644 --- a/paddle/fluid/operators/sum_op_npu.cc +++ b/paddle/fluid/operators/sum_op_npu.cc @@ -16,13 +16,16 @@ limitations under the License. */ #include #include -#include "paddle/fluid/operators/sum_op.h" +#include "paddle/fluid/framework/lod_tensor_array.h" +#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/device/npu/npu_op_runner.h" namespace paddle { namespace operators { using Tensor = framework::Tensor; +using SelectedRows = phi::SelectedRows; +using LoDTensor = framework::LoDTensor; template class SumNPUKernel : public framework::OpKernel { diff --git a/paddle/fluid/operators/sum_op_xpu.cc b/paddle/fluid/operators/sum_op_xpu.cc index b73677b59ce..a4458681534 100644 --- a/paddle/fluid/operators/sum_op_xpu.cc +++ b/paddle/fluid/operators/sum_op_xpu.cc @@ -13,14 +13,16 @@ limitations under the License. */ #include -#include "paddle/fluid/operators/sum_op.h" +#include "paddle/fluid/framework/lod_tensor_array.h" +#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/device/device_wrapper.h" #include "paddle/fluid/platform/device/xpu/xpu_header.h" namespace paddle { namespace operators { using framework::Tensor; - +using SelectedRows = phi::SelectedRows; +using LoDTensor = framework::LoDTensor; template class SumXPUKernel : public framework::OpKernel { using XPUType = typename XPUTypeTrait::Type; diff --git a/paddle/infrt/dialect/phi/pass/proto_arg_map_context.cc b/paddle/infrt/dialect/phi/pass/proto_arg_map_context.cc index 49fe069217e..6f5db28abf6 100644 --- a/paddle/infrt/dialect/phi/pass/proto_arg_map_context.cc +++ b/paddle/infrt/dialect/phi/pass/proto_arg_map_context.cc @@ -69,10 +69,16 @@ bool ProtoArgumentMappingContext::IsDenseTensorInputs( return true; } +bool ProtoArgumentMappingContext::IsSelectedRowsInputs( + const std::string& name) const { + return false; +} + bool ProtoArgumentMappingContext::IsSelectedRowsInput( const std::string& name) const { return false; } + bool ProtoArgumentMappingContext::IsDenseTensorVectorInput( const std::string& name) const { return false; diff --git a/paddle/infrt/dialect/phi/pass/proto_arg_map_context.h b/paddle/infrt/dialect/phi/pass/proto_arg_map_context.h index a1c730fd849..7c0bd0ff399 100644 --- a/paddle/infrt/dialect/phi/pass/proto_arg_map_context.h +++ b/paddle/infrt/dialect/phi/pass/proto_arg_map_context.h @@ -45,6 +45,7 @@ class ProtoArgumentMappingContext : public ::phi::ArgumentMappingContext { bool IsDenseTensorInput(const std::string& name) const override; bool IsDenseTensorInputs(const std::string& name) const override; bool IsSelectedRowsInput(const std::string& name) const override; + bool IsSelectedRowsInputs(const std::string& name) const override; bool IsDenseTensorVectorInput(const std::string& name) const override; bool IsDenseTensorOutput(const std::string& name) const override; diff --git a/paddle/phi/api/lib/api_custom_impl.cc b/paddle/phi/api/lib/api_custom_impl.cc index 2313f4c1296..37fd84e8b1b 100644 --- a/paddle/phi/api/lib/api_custom_impl.cc +++ b/paddle/phi/api/lib/api_custom_impl.cc @@ -34,6 +34,95 @@ namespace experimental { ////////////////// Forward api impls ////////////////////// +Tensor add_n_impl(const std::vector& x) { + Backend kernel_backend = Backend::UNDEFINED; + DataLayout kernel_layout = DataLayout::UNDEFINED; + DataType kernel_data_type = DataType::UNDEFINED; + + if (kernel_backend == Backend::UNDEFINED || + kernel_layout == DataLayout::UNDEFINED || + kernel_data_type == DataType::UNDEFINED) { + auto kernel_key_set = ParseKernelKeyByInputArgs(x); + auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey(); + if (kernel_backend == Backend::UNDEFINED) { + kernel_backend = kernel_key.backend(); + } + if (kernel_layout == DataLayout::UNDEFINED) { + kernel_layout = kernel_key.layout(); + } + if (kernel_data_type == DataType::UNDEFINED) { + kernel_data_type = kernel_key.dtype(); + } + } + + bool is_sr_kernel = true; + for (auto& input : x) { + if (phi::DenseTensor::classof(input.impl().get())) { + is_sr_kernel = false; + break; + } + } + + const std::string kernel_name = (is_sr_kernel ? "add_n_sr" : "add_n"); + + VLOG(6) << "add_n API kernel key: [" << kernel_backend << ", " + << kernel_layout << ", " << kernel_data_type << "]"; + auto kernel_result = phi::KernelFactory::Instance().SelectKernelOrThrowError( + kernel_name, {kernel_backend, kernel_layout, kernel_data_type}); + const auto& kernel = kernel_result.kernel; + VLOG(6) << kernel_name << " kernel: " << kernel; + auto* dev_ctx = GetDeviceContextByBackend( + kernel_result.has_fallback_cpu ? Backend::CPU : kernel_backend); + + Tensor api_output; + + if (is_sr_kernel) { + std::vector input_x(x.size()); + for (size_t i = 0; i < input_x.size(); ++i) { + input_x[i] = static_cast(x[i].impl().get()); + } + auto x_meta_vec = MakeMetaTensor(input_x); + std::vector x_metas(x_meta_vec.size()); + for (size_t i = 0; i < x_meta_vec.size(); ++i) { + x_metas[i] = &x_meta_vec[i]; + } + auto kernel_out = SetSelectedRowsKernelOutput(&api_output); + phi::MetaTensor meta_out(kernel_out); + phi::AddNInferMeta(x_metas, &meta_out); + + using kernel_signature = + void (*)(const platform::DeviceContext&, + const std::vector&, + phi::SelectedRows*); + auto* kernel_fn = kernel.GetVariadicKernelFn(); + + (*kernel_fn)(*dev_ctx, input_x, kernel_out); + } else { + std::vector input_x(x.size()); + for (size_t i = 0; i < input_x.size(); ++i) { + input_x[i] = x[i].impl().get(); + } + auto x_meta_vec = MakeMetaTensor(input_x); + std::vector x_metas(x_meta_vec.size()); + for (size_t i = 0; i < x_meta_vec.size(); ++i) { + x_metas[i] = &x_meta_vec[i]; + } + auto kernel_out = SetKernelOutput(&api_output); + phi::MetaTensor meta_out(kernel_out); + phi::AddNInferMeta(x_metas, &meta_out); + + using kernel_signature = + void (*)(const platform::DeviceContext&, + const std::vector&, + phi::DenseTensor*); + auto* kernel_fn = kernel.GetVariadicKernelFn(); + + (*kernel_fn)(*dev_ctx, input_x, kernel_out); + } + + return api_output; +} + Tensor copy_to_impl(const Tensor& x, Place place, bool blocking) { Tensor out; copy(x, place, blocking, &out); diff --git a/paddle/phi/api/lib/api_custom_impl.h b/paddle/phi/api/lib/api_custom_impl.h index e7fca7bfbc8..ab1d1705149 100644 --- a/paddle/phi/api/lib/api_custom_impl.h +++ b/paddle/phi/api/lib/api_custom_impl.h @@ -31,6 +31,8 @@ namespace experimental { ////////////////// Forward api impls ////////////////////// +Tensor add_n_impl(const std::vector& x); + std::tuple batch_norm_impl( const Tensor& x, const Tensor& scale, diff --git a/paddle/phi/api/lib/api_gen_utils.cc b/paddle/phi/api/lib/api_gen_utils.cc index cbcc475a0df..c760d787582 100644 --- a/paddle/phi/api/lib/api_gen_utils.cc +++ b/paddle/phi/api/lib/api_gen_utils.cc @@ -94,6 +94,16 @@ phi::MetaTensor MakeMetaTensor(const phi::TensorBase& tensor) { return phi::MetaTensor(tensor); } +std::vector MakeMetaTensor( + const std::vector& tensors) { + std::vector meta_tensors; + meta_tensors.reserve(tensors.size()); + for (const auto* t : tensors) { + meta_tensors.emplace_back(*t); + } + return meta_tensors; +} + phi::MetaTensor MakeMetaTensor( const paddle::optional& tensor) { if (tensor) { @@ -112,6 +122,16 @@ std::vector MakeMetaTensor( return meta_tensors; } +std::vector MakeMetaTensor( + const std::vector& tensors) { + std::vector meta_tensors; + meta_tensors.reserve(tensors.size()); + for (const auto* t : tensors) { + meta_tensors.emplace_back(*t); + } + return meta_tensors; +} + std::vector MakeMetaTensor( const std::vector& tensors) { std::vector meta_tensors; diff --git a/paddle/phi/api/lib/api_gen_utils.h b/paddle/phi/api/lib/api_gen_utils.h index 2a7f283dabb..6677c2cb085 100644 --- a/paddle/phi/api/lib/api_gen_utils.h +++ b/paddle/phi/api/lib/api_gen_utils.h @@ -65,12 +65,18 @@ std::vector MakeMetaTensor( std::vector MakeMetaTensor( const std::vector& tensors); +std::vector MakeMetaTensor( + const std::vector& tensors); + phi::MetaTensor MakeMetaTensor( const paddle::optional& tensor); std::vector MakeMetaTensor( const paddle::optional>& tensors); +std::vector MakeMetaTensor( + const std::vector& tensors); + /* ------------------ for output ----------------------- */ phi::DenseTensor* SetKernelOutput(Tensor* out); diff --git a/paddle/phi/api/yaml/legacy_ops.yaml b/paddle/phi/api/yaml/legacy_ops.yaml index a8e2cda3896..ee251b8d999 100755 --- a/paddle/phi/api/yaml/legacy_ops.yaml +++ b/paddle/phi/api/yaml/legacy_ops.yaml @@ -102,10 +102,7 @@ - op : add_n args : (Tensor[] x) output : Tensor - infer_meta : - func : AddNInferMeta - kernel : - func : add_n + invoke : add_n_impl(x) backward : add_n_grad - op : addmm diff --git a/paddle/phi/core/compat/arg_map_context.h b/paddle/phi/core/compat/arg_map_context.h index 6cfd18369c9..206ad5d96f6 100644 --- a/paddle/phi/core/compat/arg_map_context.h +++ b/paddle/phi/core/compat/arg_map_context.h @@ -108,6 +108,7 @@ class ArgumentMappingContext { virtual bool IsDenseTensorInput(const std::string& name) const = 0; virtual bool IsDenseTensorInputs(const std::string& name) const = 0; virtual bool IsSelectedRowsInput(const std::string& name) const = 0; + virtual bool IsSelectedRowsInputs(const std::string& name) const = 0; // For compatibility with LoDTensorArray virtual bool IsDenseTensorVectorInput(const std::string& name) const = 0; diff --git a/paddle/phi/core/kernel_registry.h b/paddle/phi/core/kernel_registry.h index 28c750dd9d9..7ae01b7c725 100644 --- a/paddle/phi/core/kernel_registry.h +++ b/paddle/phi/core/kernel_registry.h @@ -100,6 +100,24 @@ struct KernelArgsParseFunctor { default_tensor_layout, default_key.dtype(), arg_type); + } else if (arg_type == std::type_index(typeid( + const std::vector&))) { + args_def->AppendInput(default_key.backend(), + default_tensor_layout, + default_key.dtype(), + arg_type); + } else if (arg_type == std::type_index(typeid( + const std::vector&))) { + args_def->AppendInput(default_key.backend(), + default_tensor_layout, + default_key.dtype(), + arg_type); + } else if (arg_type == std::type_index(typeid( + const std::vector&))) { + args_def->AppendInput(default_key.backend(), + default_tensor_layout, + default_key.dtype(), + arg_type); } else if (arg_type == std::type_index(typeid(const SelectedRows&))) { args_def->AppendInput(default_key.backend(), default_tensor_layout, diff --git a/paddle/phi/core/kernel_utils.h b/paddle/phi/core/kernel_utils.h index 9237b8eb31a..cdfdefa059c 100644 --- a/paddle/phi/core/kernel_utils.h +++ b/paddle/phi/core/kernel_utils.h @@ -270,6 +270,8 @@ struct KernelImpl { PD_SPECIALIZE_KernelCallHelper_FOR_OPTIONAL_INPUT(DenseTensor); PD_SPECIALIZE_KernelCallHelper_FOR_OPTIONAL_INPUT(SelectedRows); PD_SPECIALIZE_KernelCallHelper_FOR_MULTI_INPUT(DenseTensor); + PD_SPECIALIZE_KernelCallHelper_FOR_MULTI_INPUT(TensorBase); + PD_SPECIALIZE_KernelCallHelper_FOR_MULTI_INPUT(SelectedRows); PD_SPECIALIZE_KernelCallHelper_FOR_INPUT(SelectedRows); PD_SPECIALIZE_KernelCallHelper_FOR_OPTIONAL_MULTI_INPUT(DenseTensor); diff --git a/paddle/phi/core/meta_tensor.cc b/paddle/phi/core/meta_tensor.cc index 9a008e429da..9a80f5eac3e 100644 --- a/paddle/phi/core/meta_tensor.cc +++ b/paddle/phi/core/meta_tensor.cc @@ -139,6 +139,14 @@ void MetaTensor::share_meta(const MetaTensor& meta_tensor) { } } +TensorBase* MetaTensor::tensor() const { return tensor_; } + +bool MetaTensor::is_dense() const { return DenseTensor::classof(tensor_); } +bool MetaTensor::is_selected_rows() const { + return SelectedRows::classof(tensor_); +} +bool MetaTensor::is_tensor_array() const { return false; } + void MetaTensor::share_dims(const MetaTensor& meta_tensor) { ValidCheck(*this); bool is_dense_tensor = phi::DenseTensor::classof(tensor_); @@ -178,6 +186,4 @@ const LoD& MetaTensor::lod() const { } } -TensorBase* MetaTensor::tensor() const { return tensor_; } - } // namespace phi diff --git a/paddle/phi/core/meta_tensor.h b/paddle/phi/core/meta_tensor.h index 377d0e9bc4d..1a9dfb0d3c1 100644 --- a/paddle/phi/core/meta_tensor.h +++ b/paddle/phi/core/meta_tensor.h @@ -68,6 +68,12 @@ class MetaTensor { virtual bool initialized() const; + virtual bool is_selected_rows() const; + virtual bool is_dense() const; + // TODO(YuanRisheng) This API is for compatible with Fluid + // and it will be deleted in the future. + virtual bool is_tensor_array() const; + virtual operator unspecified_bool_type() const { return tensor_ == nullptr ? 0 : unspecified_bool_true; } diff --git a/paddle/phi/infermeta/multiary.cc b/paddle/phi/infermeta/multiary.cc index 7bab7747798..e2a7291f74b 100644 --- a/paddle/phi/infermeta/multiary.cc +++ b/paddle/phi/infermeta/multiary.cc @@ -301,6 +301,10 @@ void AddNInferMeta(const std::vector& x, phi::DDim in_dim({0}); for (size_t i = 0; i < x.size(); ++i) { auto x_dim = x[i]->dims(); + // x_dim.size() == 1 means the real dim of selected rows is [0] + if (x[i]->is_selected_rows() && x_dim.size() == 1) { + continue; + } if (phi::product(x_dim) == 0) { continue; } @@ -355,6 +359,31 @@ void AddNInferMeta(const std::vector& x, out->share_lod(*x[0]); } +// TODO(YuanRisheng) This InferMeta is used in Fluid +// and will be deleted in the future. +void AddNTensorArrayInferMeta(const std::vector& x, + MetaTensor* out, + MetaConfig config) { + int64_t max_length = 0; + bool has_tensor_array = false; + for (auto input : x) { + if (input->is_tensor_array()) { + has_tensor_array = true; + // if input is lod_tensor_array, dims() will return its size (one element) + max_length = + input->dims()[0] > max_length ? input->dims()[0] : max_length; + } + } + + if (has_tensor_array) { + if (out->is_tensor_array()) { + out->set_dims(make_ddim({max_length})); + } + } else { + AddNInferMeta(x, out, config); + } +} + void AucInferMeta(const MetaTensor& input, const MetaTensor& label, const MetaTensor& stat_pos, diff --git a/paddle/phi/infermeta/multiary.h b/paddle/phi/infermeta/multiary.h index b9d8aedfb2e..6c790ca3b36 100644 --- a/paddle/phi/infermeta/multiary.h +++ b/paddle/phi/infermeta/multiary.h @@ -123,6 +123,10 @@ void AddNInferMeta(const std::vector& x, MetaTensor* out, MetaConfig config = MetaConfig()); +void AddNTensorArrayInferMeta(const std::vector& x, + MetaTensor* out, + MetaConfig config); + void AucInferMeta(const MetaTensor& input, const MetaTensor& label, const MetaTensor& stat_pos, diff --git a/paddle/phi/kernels/add_n_kernel.h b/paddle/phi/kernels/add_n_kernel.h index c35dc2270aa..13d974a5877 100644 --- a/paddle/phi/kernels/add_n_kernel.h +++ b/paddle/phi/kernels/add_n_kernel.h @@ -15,12 +15,20 @@ #pragma once #include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/core/tensor_array.h" namespace phi { +// Note(YuanRisheng): std::vector shouldn't be widely used in +// PHI. Here, we use it to be compatible with Fluid. template void AddNKernel(const Context& dev_ctx, - const std::vector& x, + const std::vector& x, DenseTensor* out); +template +void AddNArrayKernel(const Context& dev_ctx, + const std::vector& x, + TensorArray* out); + } // namespace phi diff --git a/paddle/phi/kernels/cpu/add_n_kernel.cc b/paddle/phi/kernels/cpu/add_n_kernel.cc index d658b55758e..42532161053 100644 --- a/paddle/phi/kernels/cpu/add_n_kernel.cc +++ b/paddle/phi/kernels/cpu/add_n_kernel.cc @@ -12,24 +12,21 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/phi/kernels/add_n_kernel.h" - -#include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/funcs/eigen/common.h" -#include "paddle/phi/kernels/funcs/math_function.h" +#include "paddle/phi/kernels/impl/add_n_kernel_impl.h" namespace phi { template void AddNKernel(const Context& dev_ctx, - const std::vector& x, + const std::vector& x, DenseTensor* out) { size_t in_num = x.size(); - bool in_place = out == x[0]; - auto* out_ptr = dev_ctx.template Alloc(out); - if (in_num >= 1 && x[0]->initialized()) { - if (x[0]->numel() > 0) { - in_place = (x[0]->data() == out_ptr); + dev_ctx.template Alloc(out); + + bool in_place = false; + if (x.size() > 0 && x[0]->initialized() && DenseTensor::classof(x[0])) { + if ((static_cast(x[0]))->Holder() == out->Holder()) { + in_place = true; } } @@ -37,9 +34,11 @@ void AddNKernel(const Context& dev_ctx, auto& place = *dev_ctx.eigen_device(); int start = in_place ? 1 : 0; if (!in_place) { - if ((in_num >= 2) && x[0]->initialized() && x[1]->initialized()) { - auto& in_0 = *x[0]; - auto& in_1 = *x[1]; + if ((in_num >= 2) && DenseTensor::classof(x[0]) && + DenseTensor::classof(x[1]) && x[0]->initialized() && + x[1]->initialized()) { + auto& in_0 = *(static_cast(x[0])); + auto& in_1 = *(static_cast(x[1])); if (in_0.numel() && in_1.numel()) { auto in_0_e = EigenVector::Flatten(in_0); auto in_1_e = EigenVector::Flatten(in_1); @@ -49,20 +48,33 @@ void AddNKernel(const Context& dev_ctx, } if (start != 2) { VLOG(10) << "Fill with constant = 0 in sum kernel."; - funcs::SetConstant constant_functor; + phi::funcs::SetConstant constant_functor; constant_functor(dev_ctx, out, static_cast(0)); } } + paddle::operators::math::SelectedRowsAddToTensor functor; // If in_place, just skip the first tensor for (size_t i = start; i < in_num; i++) { - auto& in_t = *x[i]; - if (!in_t.initialized() || in_t.numel() == 0) { - continue; + if (DenseTensor::classof(x[i])) { + auto& in_t = *(static_cast(x[i])); + if (!in_t.initialized() || in_t.numel() == 0) { + continue; + } + auto in = EigenVector::Flatten(in_t); + result.device(place) = result + in; + } else if (SelectedRows::classof(x[i])) { + auto& in_t = *(static_cast(x[i])); + functor(dev_ctx, in_t, out); + } else { + PADDLE_THROW(phi::errors::InvalidArgument( + "Expected type of Input(X) of %d-th must be Tensor, " + "SelectedRows. But got " + "unsupport type: %s.", + x[i]->type_info().name())); } - auto in = EigenVector::Flatten(in_t); - result.device(place) = result + in; } + VLOG(10) << "end add_n kernel"; } } // namespace phi @@ -76,3 +88,13 @@ PD_REGISTER_KERNEL(add_n, int, phi::dtype::bfloat16, int64_t) {} + +PD_REGISTER_KERNEL(add_n_array, + CPU, + ALL_LAYOUT, + phi::AddNArrayKernel, + float, + double, + int, + phi::dtype::bfloat16, + int64_t) {} diff --git a/paddle/phi/kernels/gpu/add_n_kernel.cu b/paddle/phi/kernels/gpu/add_n_kernel.cu index 981ec24dac6..f32ba597f5b 100644 --- a/paddle/phi/kernels/gpu/add_n_kernel.cu +++ b/paddle/phi/kernels/gpu/add_n_kernel.cu @@ -14,16 +14,27 @@ #include "paddle/phi/kernels/add_n_kernel.h" +#include "paddle/phi/kernels/impl/add_n_kernel_impl.h" + #include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/memory/memcpy.h" -#include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/funcs/eigen/common.h" -#include "paddle/phi/kernels/funcs/math_function.h" namespace phi { #define CEIL_DIV(x, y) (((x) + (y)-1) / (y)) +template +__global__ void Sum2CUDAKernel(const T *in_0, + const T *in_1, + T *out, + int64_t N) { + int id = blockIdx.x * blockDim.x + threadIdx.x; + while (id < N) { + out[id] = in_0[id] + in_1[id]; + id += blockDim.x * gridDim.x; + } +} + template __global__ void SumArrayCUDAKernel( T **in, T *out, int64_t N, size_t in_size, bool read_dst) { @@ -41,9 +52,26 @@ __global__ void SumArrayCUDAKernel( } } +template +__global__ void SumSelectedRowsCUDAKernel(T **sr_in_out, + int64_t N, + size_t rows) { + int id = blockIdx.x * blockDim.x + threadIdx.x; + while (id < N) { + for (int i = 0; i < 2 * rows; i += 2) { + const T *tmp = sr_in_out[i]; + T *tmp_out = sr_in_out[i + 1]; + if (tmp && tmp_out) { + tmp_out[id] += tmp[id]; + } + } + id += blockDim.x * gridDim.x; + } +} + template void AddNKernel(const Context &dev_ctx, - const std::vector &x, + const std::vector &x, DenseTensor *out) { const size_t in_num = x.size(); @@ -66,36 +94,38 @@ void AddNKernel(const Context &dev_ctx, grids = dim3(CEIL_DIV(length, tile_size), 1, 1); blocks = dim3(tile_size, 1, 1); }; + auto *out_ptr = dev_ctx.template Alloc(out); + bool in_place = false; + if (x.size() > 0 && x[0]->initialized() && DenseTensor::classof(x[0])) { + if ((static_cast(x[0]))->data() == out->data()) { + in_place = true; + } + } - bool in_place = x[0] == out; - - if (!in_place) { - auto *out_ptr = dev_ctx.template Alloc(out); - if (in_num >= 1) { - auto &in_0_tensor = *x[0]; - if (in_0_tensor.numel() > 0) { - in_place = (in_0_tensor.data() == out_ptr); - } + if (!in_place && in_num >= 1 && DenseTensor::classof(x[0])) { + auto &in_0_tensor = *(static_cast(x[0])); + if (in_0_tensor.numel() > 0) { + in_place = (in_0_tensor.data() == out_ptr); } } // Sum of two tensors - if (in_num == 2) { - auto &in_0 = *x[0]; - auto &in_1 = *x[1]; + if (in_num == 2 && DenseTensor::classof(x[0]) && DenseTensor::classof(x[1])) { + auto &in_0 = *(static_cast(x[0])); + auto &in_1 = *(static_cast(x[1])); int64_t length_0 = in_0.numel(); int64_t length_1 = in_1.numel(); - if (length_0 && length_1 && in_0.initialized() && in_1.initialized()) { + if (length_0 && length_1 && in_0.IsInitialized() && in_1.IsInitialized()) { auto result = EigenVector::Flatten(*out); auto &place = *dev_ctx.eigen_device(); auto in_0_e = EigenVector::Flatten(in_0); auto in_1_e = EigenVector::Flatten(in_1); result.device(place) = in_0_e + in_1_e; - } else if (length_0 && in_0.initialized()) { + } else if (length_0 && in_0.IsInitialized()) { auto result = EigenVector::Flatten(*out); auto &place = *dev_ctx.eigen_device(); result.device(place) = EigenVector::Flatten(in_0); - } else if (length_1 && in_1.initialized()) { + } else if (length_1 && in_1.IsInitialized()) { auto result = EigenVector::Flatten(*out); auto &place = *dev_ctx.eigen_device(); result.device(place) = EigenVector::Flatten(in_1); @@ -105,27 +135,90 @@ void AddNKernel(const Context &dev_ctx, int start = in_place ? 1 : 0; if (!in_place) { - funcs::SetConstant constant_functor; + phi::funcs::SetConstant constant_functor; constant_functor(dev_ctx, out, static_cast(0)); } std::vector in_data; + std::vector selectrow_index; int64_t lod_length = 0; bool dst_write = false; for (int i = start; i < in_num; ++i) { - auto &in_i = *x[i]; - lod_length = in_i.numel(); - if (lod_length && in_i.initialized()) { - in_data.emplace_back(in_i.data()); + if (DenseTensor::classof(x[i])) { + auto &in_i = *(static_cast(x[i])); + lod_length = in_i.numel(); + if (lod_length && in_i.IsInitialized()) { + in_data.emplace_back(in_i.data()); + } + } else if (SelectedRows::classof(x[i])) { + selectrow_index.push_back(i); } } + // compute select rows separately. + if (!selectrow_index.empty()) { + std::vector sr_in_out_data; + size_t rows = 0; + int64_t length = 0; + for (auto index : selectrow_index) { + auto &sr = *(static_cast(x[index])); + auto &sr_value = sr.value(); + auto &sr_rows = sr.rows(); + + auto row_numel = sr_value.numel() / sr_rows.size(); + auto out_dims = out->dims(); + + PADDLE_ENFORCE_EQ(sr.height(), + out_dims[0], + errors::InvalidArgument( + "The table height of input must be same as output, " + "but received input height is %d" + ", output height is %d", + sr.height(), + out_dims[0])); + PADDLE_ENFORCE_EQ(row_numel, + out->numel() / sr.height(), + errors::InvalidArgument( + "The table width of input must be same as output, " + "but received input width is %d" + ", output width is %d", + row_numel, + out->numel() / sr.height())); + + auto *sr_data = sr_value.data(); + auto *sr_out_data = out->data(); + rows += sr_rows.size(); + length = row_numel; + + for (size_t i = 0; i < sr_rows.size(); ++i) { + sr_in_out_data.emplace_back(&sr_data[i * row_numel]); + sr_in_out_data.emplace_back(&sr_out_data[sr_rows[i] * row_numel]); + } + } + if (!sr_in_out_data.empty()) { + auto tmp_sr_in_out_array = paddle::memory::Alloc( + dev_ctx.GetPlace(), sr_in_out_data.size() * sizeof(T *)); + + paddle::memory::Copy(dev_ctx.GetPlace(), + tmp_sr_in_out_array->ptr(), + phi::CPUPlace(), + reinterpret_cast(sr_in_out_data.data()), + sr_in_out_data.size() * sizeof(T *), + dev_ctx.stream()); + + T **sr_in_out_array_data = + reinterpret_cast(tmp_sr_in_out_array->ptr()); + + ComputeKernelParameter(length); + SumSelectedRowsCUDAKernel + <<>>(sr_in_out_array_data, length, rows); + dst_write = true; + } + } // if indata not null, merge into one kernel call. if (!in_data.empty()) { - auto tmp_in_array = paddle::memory::Alloc( - dev_ctx.GetPlace(), - in_data.size() * sizeof(T *), - phi::Stream(reinterpret_cast(dev_ctx.stream()))); + auto tmp_in_array = + paddle::memory::Alloc(dev_ctx.GetPlace(), in_data.size() * sizeof(T *)); paddle::memory::Copy(dev_ctx.GetPlace(), tmp_in_array->ptr(), @@ -153,6 +246,17 @@ PD_REGISTER_KERNEL(add_n, float, double, int, - int64_t, phi::dtype::bfloat16, - phi::dtype::float16) {} + phi::dtype::float16, + int64_t) {} + +PD_REGISTER_KERNEL(add_n_array, + GPU, + ALL_LAYOUT, + phi::AddNArrayKernel, + float, + double, + int, + phi::dtype::bfloat16, + phi::dtype::float16, + int64_t) {} diff --git a/paddle/phi/kernels/impl/add_n_kernel_impl.h b/paddle/phi/kernels/impl/add_n_kernel_impl.h new file mode 100644 index 00000000000..654ef5efbd9 --- /dev/null +++ b/paddle/phi/kernels/impl/add_n_kernel_impl.h @@ -0,0 +1,80 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/phi/kernels/add_n_kernel.h" + +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/core/tensor_utils.h" +#include "paddle/phi/kernels/funcs/eigen/common.h" +#include "paddle/phi/kernels/funcs/math_function.h" + +#include "paddle/fluid/operators/math/selected_rows_functor.h" + +namespace phi { + +template +void AddNArrayKernel(const Context& dev_ctx, + const std::vector& x, + TensorArray* out) { + for (auto& ele : *out) { + dev_ctx.template Alloc(&ele); + } + bool in_place = true; + if (x.size() > 0 && x[0]->size() == out->size()) { + for (size_t i = 0; i < out->size(); i++) { + if (x[0]->at(i).IsInitialized() && + out->at(i).data() != x[0]->at(i).data()) { + in_place = false; + break; + } + } + } else { + in_place = false; + } + for (size_t i = in_place ? 1 : 0; i < x.size(); ++i) { + auto* in_array = x.at(i); + + for (size_t j = 0; j < in_array->size(); ++j) { + if (in_array->at(j).IsInitialized() && (in_array->at(j).numel() != 0)) { + if (j >= out->size()) { + out->resize(j + 1); + } + if (!out->at(j).IsInitialized() || (out->at(j).numel() == 0)) { + Copy(dev_ctx, + in_array->at(j), + in_array->at(j).place(), + false, + &out->at(j)); + out->at(j).set_lod(in_array->at(j).lod()); + } else { + PADDLE_ENFORCE_EQ( + out->at(j).lod(), + in_array->at(j).lod(), + phi::errors::InvalidArgument( + "The lod message between inputs[%d] and" + " outputs[%d] must be same, but now is not same.", + j, + j)); + auto in = EigenVector::Flatten(in_array->at(j)); + auto result = EigenVector::Flatten(out->at(j)); + result.device(*dev_ctx.eigen_device()) = result + in; + } + } + } + } +} + +} // namespace phi diff --git a/paddle/phi/kernels/selected_rows/add_n_kernel.h b/paddle/phi/kernels/selected_rows/add_n_kernel.h new file mode 100644 index 00000000000..c56985fb072 --- /dev/null +++ b/paddle/phi/kernels/selected_rows/add_n_kernel.h @@ -0,0 +1,27 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/phi/core/selected_rows.h" + +namespace phi { +namespace sr { + +template +void AddNKernel(const Context& dev_ctx, + const std::vector& x, + SelectedRows* out); +} // namespace sr +} // namespace phi diff --git a/paddle/phi/kernels/selected_rows/cpu/add_n_kernel.cc b/paddle/phi/kernels/selected_rows/cpu/add_n_kernel.cc new file mode 100644 index 00000000000..1cd6529014e --- /dev/null +++ b/paddle/phi/kernels/selected_rows/cpu/add_n_kernel.cc @@ -0,0 +1,25 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/selected_rows/impl/add_n_kernel_impl.h" + +PD_REGISTER_KERNEL(add_n_sr, + CPU, + ALL_LAYOUT, + phi::sr::AddNKernel, + float, + double, + int, + phi::dtype::bfloat16, + int64_t) {} diff --git a/paddle/phi/kernels/selected_rows/gpu/add_n_kernel.cu b/paddle/phi/kernels/selected_rows/gpu/add_n_kernel.cu new file mode 100644 index 00000000000..43442348d20 --- /dev/null +++ b/paddle/phi/kernels/selected_rows/gpu/add_n_kernel.cu @@ -0,0 +1,26 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/selected_rows/impl/add_n_kernel_impl.h" + +PD_REGISTER_KERNEL(add_n_sr, + GPU, + ALL_LAYOUT, + phi::sr::AddNKernel, + float, + double, + int, + phi::dtype::bfloat16, + phi::dtype::float16, + int64_t) {} diff --git a/paddle/phi/kernels/selected_rows/impl/add_n_kernel_impl.h b/paddle/phi/kernels/selected_rows/impl/add_n_kernel_impl.h new file mode 100644 index 00000000000..d5bd9f2b2c7 --- /dev/null +++ b/paddle/phi/kernels/selected_rows/impl/add_n_kernel_impl.h @@ -0,0 +1,90 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/phi/kernels/selected_rows/add_n_kernel.h" + +#include "paddle/fluid/operators/math/selected_rows_functor.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/core/tensor_utils.h" +#include "paddle/phi/kernels/funcs/math_function.h" + +namespace phi { +namespace sr { +template +void AddNKernel(const Context &dev_ctx, + const std::vector &x, + SelectedRows *out) { + dev_ctx.template Alloc(out->mutable_value()); + + bool in_place = false; + if (x.size() > 0 && x[0]->value().Holder() == out->value().Holder()) { + in_place = true; + } + + if (in_place && x.size() < 2) { + return; + } + + std::vector inputs; + SelectedRows temp_in0; + + if (in_place) { + auto &in0 = *x[0]; + temp_in0.set_height(in0.height()); + temp_in0.set_rows(in0.rows()); + Copy( + dev_ctx, in0.value(), in0.place(), false, temp_in0.mutable_value()); + inputs.push_back(&temp_in0); + for (size_t i = 1; i < x.size(); ++i) { + auto &in = *x[i]; + if (in.rows().size() > 0) { + inputs.push_back(&in); + } + } + } else { + for (auto in_var : x) { + auto &in = *in_var; + if (in.rows().size() > 0) { + inputs.push_back(in_var); + } + } + } + + out->mutable_rows()->clear(); + + bool has_data = false; + for (auto &in : inputs) { + if (in->rows().size() > 0) { + has_data = true; + break; + } + } + if (has_data) { + paddle::operators::math::scatter::MergeAdd merge_add; + merge_add(dev_ctx, inputs, out); + + out->SyncIndex(); + + } else { + // no data, just set a empty out tensor. + auto *out_dense = out->mutable_value(); + out_dense->clear(); + out_dense->Resize(phi::make_ddim({0})); + dev_ctx.template Alloc(out_dense); + } +} +} // namespace sr +} // namespace phi diff --git a/paddle/phi/ops/compat/memcpy_d2h_sig.cc b/paddle/phi/ops/compat/memcpy_sig.cc similarity index 77% rename from paddle/phi/ops/compat/memcpy_d2h_sig.cc rename to paddle/phi/ops/compat/memcpy_sig.cc index 38b0f01082e..96da0abbd4f 100644 --- a/paddle/phi/ops/compat/memcpy_d2h_sig.cc +++ b/paddle/phi/ops/compat/memcpy_sig.cc @@ -27,6 +27,15 @@ KernelSignature MemcpyD2HOpArgumentMapping(const ArgumentMappingContext& ctx) { return KernelSignature("memcpy_d2h", {"X"}, {"dst_place_type"}, {"Out"}); } +KernelSignature MemcpyOpArgumentMapping(const ArgumentMappingContext& ctx) { + if (ctx.IsDenseTensorInput("X")) { + return KernelSignature("memcpy", {"X"}, {"dst_place_type"}, {"Out"}); + } + + return KernelSignature("unregistered", {}, {}, {}); +} + } // namespace phi PD_REGISTER_ARG_MAPPING_FN(memcpy_d2h, phi::MemcpyD2HOpArgumentMapping); +PD_REGISTER_ARG_MAPPING_FN(memcpy, phi::MemcpyOpArgumentMapping); diff --git a/paddle/phi/ops/compat/sum_sig.cc b/paddle/phi/ops/compat/sum_sig.cc index d71111408f8..ff68d43ed7b 100644 --- a/paddle/phi/ops/compat/sum_sig.cc +++ b/paddle/phi/ops/compat/sum_sig.cc @@ -18,10 +18,13 @@ namespace phi { KernelSignature SumOpArgumentMapping(const ArgumentMappingContext& ctx) { - if (ctx.IsDenseTensorInputs("X")) { + if (ctx.IsSelectedRowsInputs("X")) { + return KernelSignature("add_n_sr", {"X"}, {}, {"Out"}); + } else if (ctx.IsDenseTensorVectorInput("X")) { + return KernelSignature("add_n_array", {"X"}, {}, {"Out"}); + } else { return KernelSignature("add_n", {"X"}, {}, {"Out"}); } - return KernelSignature("unregistered", {}, {}, {}); } } // namespace phi diff --git a/paddle/phi/tests/api/CMakeLists.txt b/paddle/phi/tests/api/CMakeLists.txt index a337e4ee4bd..e8fb6359004 100644 --- a/paddle/phi/tests/api/CMakeLists.txt +++ b/paddle/phi/tests/api/CMakeLists.txt @@ -109,3 +109,7 @@ cc_test( test_strings_lower_upper_api SRCS test_strings_lower_upper_api.cc DEPS ${COMMON_API_TEST_DEPS}) +cc_test( + test_add_n_api + SRCS test_add_n_api.cc + DEPS ${COMMON_API_TEST_DEPS}) diff --git a/paddle/phi/tests/api/test_add_n_api.cc b/paddle/phi/tests/api/test_add_n_api.cc new file mode 100644 index 00000000000..bfa957667f6 --- /dev/null +++ b/paddle/phi/tests/api/test_add_n_api.cc @@ -0,0 +1,84 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include + +#include + +#include "paddle/fluid/memory/allocation/allocator_facade.h" +#include "paddle/phi/api/include/tensor.h" +#include "paddle/phi/api/lib/api_custom_impl.h" +#include "paddle/phi/common/place.h" +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/core/selected_rows.h" + +PD_DECLARE_KERNEL(add_n_sr, CPU, ALL_LAYOUT); + +namespace paddle { +namespace tests { + +TEST(API, add_n) { + // 1. create tensor + std::vector rows = {0, 1, 2, 3, 4, 5, 6}; + int64_t row_numel = 12; + auto x_sr = std::make_shared(rows, 10); + auto x_meta = phi::DenseTensorMeta( + phi::DataType::FLOAT32, + phi::make_ddim({static_cast(rows.size()), row_numel}), + phi::DataLayout::NCHW); + x_sr->mutable_value()->set_meta(x_meta); + x_sr->AllocateFrom(paddle::memory::allocation::AllocatorFacade::Instance() + .GetAllocator(paddle::platform::CPUPlace()) + .get(), + phi::DataType::FLOAT32); + auto* dense_x_data = x_sr->mutable_value()->data(); + + auto y_sr = std::make_shared(rows, 10); + y_sr->mutable_value()->set_meta(x_meta); + y_sr->AllocateFrom(paddle::memory::allocation::AllocatorFacade::Instance() + .GetAllocator(paddle::platform::CPUPlace()) + .get(), + phi::DataType::FLOAT32); + auto* dense_y_data = y_sr->mutable_value()->data(); + + float sum[84] = {0.0}; + for (size_t i = 0; i < 7; ++i) { + for (size_t j = 0; j < 12; ++j) { + dense_x_data[i * 12 + j] = (i * 4 + j); + dense_y_data[i * 12 + j] = (i * 4 + j); + sum[i * 12 + j] += (i * 4 + j) * 2; + } + } + + paddle::experimental::Tensor x(x_sr); + paddle::experimental::Tensor y(y_sr); + auto out = paddle::experimental::add_n_impl({x, y}); + + // check slice result + ASSERT_EQ( + static_cast(std::dynamic_pointer_cast(out.impl()) + ->rows() + .size()), + 7); + for (int64_t i = 0; i < 84; ++i) { + ASSERT_EQ(sum[i], + std::dynamic_pointer_cast(out.impl()) + ->value() + .data()[i]); + } +} + +} // namespace tests +} // namespace paddle diff --git a/paddle/phi/tests/ops/test_op_signature.h b/paddle/phi/tests/ops/test_op_signature.h index 745f263208f..426bb6b1243 100644 --- a/paddle/phi/tests/ops/test_op_signature.h +++ b/paddle/phi/tests/ops/test_op_signature.h @@ -77,6 +77,10 @@ class TestArgumentMappingContext : public phi::ArgumentMappingContext { return selected_rows_inputs.count(name) > 0; } + bool IsSelectedRowsInputs(const std::string& name) const override { + return selected_rows_inputs.count(name) > 0; + } + // add member if needed bool IsDenseTensorVectorInput(const std::string& name) const override { return false; diff --git a/python/paddle/optimizer/optimizer.py b/python/paddle/optimizer/optimizer.py index 18af11609c3..980d46942cf 100644 --- a/python/paddle/optimizer/optimizer.py +++ b/python/paddle/optimizer/optimizer.py @@ -1040,9 +1040,7 @@ class Optimizer(object): assert regularization_term is not None if framework.in_dygraph_mode(): - if grad.is_dense() and regularization_term.is_dense(): - return _C_ops.add_n([grad, regularization_term]) - return _legacy_C_ops.sum([grad, regularization_term]) + return _C_ops.add_n([grad, regularization_term]) elif framework._in_legacy_dygraph(): return _legacy_C_ops.sum([grad, regularization_term]) diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py index e9369b5da38..2b114e2f4ad 100644 --- a/python/paddle/tensor/math.py +++ b/python/paddle/tensor/math.py @@ -1501,9 +1501,6 @@ def add_n(inputs, name=None): if in_dygraph_mode(): if isinstance(inputs, Variable): inputs = [inputs] - for x in inputs: - if not x.is_dense(): - return _legacy_C_ops.sum(inputs, 'use_mkldnn', False) return _C_ops.add_n(inputs) if _in_legacy_dygraph(): if isinstance(inputs, Variable): -- GitLab