diff --git a/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cpu.cc b/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cpu.cc index 93cd0d1338f684a2d18803f0038c9fb8f53d7dc9..d71d78b5d9d0c13955c53ed5f1b7a8b73052cf4b 100644 --- a/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cpu.cc +++ b/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cpu.cc @@ -175,7 +175,7 @@ TEST(Benchmark, EagerIntermediateMLPCPU) { } } -USE_OP(scale); +USE_OP_ITSELF(scale); USE_OP(elementwise_add); USE_OP(matmul_v2); USE_OP(reduce_sum); diff --git a/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cuda.cc b/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cuda.cc index 2df44bfcab57fcda293841af8e3a89fa6290499d..640ee0152efc4fa74ba59dd2e8803e26bdb91fa5 100644 --- a/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cuda.cc +++ b/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cuda.cc @@ -185,7 +185,7 @@ TEST(Benchmark, EagerIntermediateMLPCUDA) { } } -USE_OP(scale); +USE_OP_ITSELF(scale); USE_OP(matmul_v2); USE_OP(reduce_sum); USE_OP(reduce_sum_grad); diff --git a/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cpu.cc b/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cpu.cc index b2a96468ece22c979cbe022531bff9e7739e5153..c2f0479460064e05fc917ec432a7384e43e73cf3 100644 --- a/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cpu.cc +++ b/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cpu.cc @@ -211,7 +211,7 @@ TEST(Benchmark, FluidMLPCPU) { } // namespace imperative } // namespace paddle -USE_OP(scale); +USE_OP_ITSELF(scale); USE_OP(elementwise_add); USE_OP(matmul_v2); USE_OP(reduce_sum); diff --git a/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cuda.cc b/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cuda.cc index 7f8b845b0703b70523c8732737d182357a64cf83..250005e31150c3c9d83d3d094ccb4e00b2de7429 100644 --- a/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cuda.cc +++ b/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cuda.cc @@ -245,7 +245,7 @@ TEST(Benchmark, FluidMLPCUDA) { } // namespace imperative } // namespace paddle -USE_OP(scale); +USE_OP_ITSELF(scale); USE_OP(matmul_v2); USE_OP(reduce_sum); USE_OP(reduce_sum_grad); diff --git a/paddle/fluid/framework/heter_pipeline_trainer_test.cc b/paddle/fluid/framework/heter_pipeline_trainer_test.cc index 417c7685bcbeb44b2db2b3d849d3915f351cf002..a605d5d6811eb08721a1f220ccb81cafb3babdb6 100644 --- a/paddle/fluid/framework/heter_pipeline_trainer_test.cc +++ b/paddle/fluid/framework/heter_pipeline_trainer_test.cc @@ -26,7 +26,7 @@ #define _LINUX #endif -USE_OP(scale); +USE_OP_ITSELF(scale); USE_NO_KERNEL_OP(heter_listen_and_serv); namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/infershape_utils.cc b/paddle/fluid/framework/infershape_utils.cc index e1d7190a9e418ab5ae049707e2d2d92d78a896be..652286ab2666e6253173f6b7d5c3751a22ee788c 100644 --- a/paddle/fluid/framework/infershape_utils.cc +++ b/paddle/fluid/framework/infershape_utils.cc @@ -78,7 +78,6 @@ class InferShapeArgumentMappingContext : public pten::ArgumentMappingContext { const InferShapeContext& ctx_; }; -// TODO(chenweihang): Support SelectedRows later // TODO(chenweihang): Support TensorArray later class CompatMetaTensor : public pten::MetaTensor { public: @@ -104,7 +103,14 @@ class CompatMetaTensor : public pten::MetaTensor { DDim dims() const override { if (is_runtime_) { auto* var = BOOST_GET_CONST(Variable*, var_); - return var->Get().dims(); + if (var->IsType()) { + return var->Get().dims(); + } else if (var->IsType()) { + return var->Get().dims(); + } else { + PADDLE_THROW(platform::errors::Unimplemented( + "Currently, only can get dims from DenseTensor or SelectedRows.")); + } } else { auto* var = BOOST_GET_CONST(VarDesc*, var_); return make_ddim(var->GetShape()); @@ -114,7 +120,14 @@ class CompatMetaTensor : public pten::MetaTensor { pten::DataType dtype() const override { if (is_runtime_) { auto* var = BOOST_GET_CONST(Variable*, var_); - return var->Get().dtype(); + if (var->IsType()) { + return var->Get().dtype(); + } else if (var->IsType()) { + return var->Get().dtype(); + } else { + PADDLE_THROW(platform::errors::Unimplemented( + "Currently, only can get dtype from DenseTensor or SelectedRows.")); + } } else { auto* var = BOOST_GET_CONST(VarDesc*, var_); return pten::TransToPtenDataType(var->GetDataType()); @@ -135,10 +148,16 @@ class CompatMetaTensor : public pten::MetaTensor { void set_dims(const DDim& dims) override { if (is_runtime_) { auto* var = BOOST_GET(Variable*, var_); - LoDTensor* tensor = var->GetMutable(); - pten::DenseTensorUtils::GetMutableMeta( - static_cast(tensor)) - ->dims = dims; + if (var->IsType()) { + auto* tensor = var->GetMutable(); + pten::DenseTensorUtils::GetMutableMeta(tensor)->dims = dims; + } else if (var->IsType()) { + auto* tensor = var->GetMutable()->mutable_value(); + pten::DenseTensorUtils::GetMutableMeta(tensor)->dims = dims; + } else { + PADDLE_THROW(platform::errors::Unimplemented( + "Currently, only can set dims from DenseTensor or SelectedRows.")); + } } else { auto* var = BOOST_GET(VarDesc*, var_); var->SetShape(vectorize(dims)); @@ -148,10 +167,16 @@ class CompatMetaTensor : public pten::MetaTensor { void set_dtype(pten::DataType dtype) override { if (is_runtime_) { auto* var = BOOST_GET(Variable*, var_); - LoDTensor* tensor = var->GetMutable(); - pten::DenseTensorUtils::GetMutableMeta( - static_cast(tensor)) - ->dtype = dtype; + if (var->IsType()) { + auto* tensor = var->GetMutable(); + pten::DenseTensorUtils::GetMutableMeta(tensor)->dtype = dtype; + } else if (var->IsType()) { + auto* tensor = var->GetMutable()->mutable_value(); + pten::DenseTensorUtils::GetMutableMeta(tensor)->dtype = dtype; + } else { + PADDLE_THROW(platform::errors::Unimplemented( + "Currently, only can set dtype from DenseTensor or SelectedRows.")); + } } else { auto* var = BOOST_GET(VarDesc*, var_); var->SetDataType(pten::TransToProtoVarType(dtype)); @@ -174,11 +199,14 @@ class CompatMetaTensor : public pten::MetaTensor { void share_lod(const MetaTensor& meta_tensor) override { if (is_runtime_) { auto* var = BOOST_GET(Variable*, var_); - LoDTensor* tensor = var->GetMutable(); - pten::DenseTensorUtils::GetMutableMeta( - static_cast(tensor)) - ->lod = - static_cast(meta_tensor).GetRuntimeLoD(); + if (var->IsType()) { + auto* tensor = var->GetMutable(); + pten::DenseTensorUtils::GetMutableMeta(tensor)->lod = + static_cast(meta_tensor).GetRuntimeLoD(); + } else { + // NOTE(chenweihang): do nothing + // only LoDTensor need to share lod + } } else { auto* var = BOOST_GET(VarDesc*, var_); var->SetLoDLevel(static_cast(meta_tensor) @@ -191,7 +219,21 @@ class CompatMetaTensor : public pten::MetaTensor { set_dtype(meta_tensor.dtype()); // VarDesc doesn't contains layout, so we cannot share layout // set_layout(meta_tensor.layout()); + + // special case 1: share lod of LoDTensor share_lod(meta_tensor); + + // special case 2: share height and rows of SelectedRows in runtime + if (is_runtime_) { + auto* var = BOOST_GET(Variable*, var_); + if (var->IsType()) { + auto* selected_rows = var->GetMutable(); + auto& input_selected_rows = + static_cast(meta_tensor).GetSelectedRows(); + selected_rows->set_rows(input_selected_rows.rows()); + selected_rows->set_height(input_selected_rows.height()); + } + } } private: @@ -199,11 +241,23 @@ class CompatMetaTensor : public pten::MetaTensor { auto* var = BOOST_GET_CONST(Variable*, var_); return var->Get().lod(); } + int32_t GetCompileTimeLoD() const { auto* var = BOOST_GET_CONST(VarDesc*, var_); return var->GetLoDLevel(); } + const pten::SelectedRows& GetSelectedRows() const { + PADDLE_ENFORCE_EQ(is_runtime_, true, + platform::errors::Unavailable( + "Only can get Tensor from MetaTensor in rumtime.")); + auto* var = BOOST_GET_CONST(Variable*, var_); + PADDLE_ENFORCE_EQ(var->IsType(), true, + platform::errors::Unavailable( + "The Tensor in MetaTensor is not SelectedRows.")); + return var->Get(); + } + InferShapeVarPtr var_; bool is_runtime_; }; diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/test_reference_count_pass_last_lived_ops.cc b/paddle/fluid/framework/ir/memory_optimize_pass/test_reference_count_pass_last_lived_ops.cc index f410171f9989654065fdc78281b94075d6c2c94e..746d90cef917cdb8c4740adf7dff3438c2ca1249 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/test_reference_count_pass_last_lived_ops.cc +++ b/paddle/fluid/framework/ir/memory_optimize_pass/test_reference_count_pass_last_lived_ops.cc @@ -21,7 +21,7 @@ #include "paddle/fluid/framework/parallel_executor.h" #include "paddle/fluid/framework/program_desc.h" -USE_OP(scale); +USE_OP_ITSELF(scale); USE_OP(elementwise_mul); USE_OP(elementwise_add); USE_OP(elementwise_add_grad); diff --git a/paddle/fluid/framework/new_executor/interpretercore.cc b/paddle/fluid/framework/new_executor/interpretercore.cc index ef9c5b921349266a56247dcced4066e71ebd15d6..53cc741d25664b175c12ee13ab2dc0c8330e28bc 100644 --- a/paddle/fluid/framework/new_executor/interpretercore.cc +++ b/paddle/fluid/framework/new_executor/interpretercore.cc @@ -393,7 +393,8 @@ void InterpreterCore::RunInstruction(const Instruction& instr_node) { platform::RecordEvent infershape_event("InferShape"); // If it is OperatorBase, InferShape do nothing. if (op_with_kernel != nullptr) - op_with_kernel->InferShape(instr_node.InnerInferShapeContext().get()); + op_with_kernel->Info().infer_shape_( + instr_node.InnerInferShapeContext().get()); } if (op_with_kernel != nullptr && diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index 50c315bf03848966d196dd86d2122b346c9e88d4..5ab14a1daba226f02e92db4d0d172bf2ac549646 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -1998,16 +1998,17 @@ void OperatorWithKernel::BuildPtenKernelContext( size_t end_idx = start_idx + ins_vector.size(); for (size_t offset = 0; offset < ins_vector.size(); ++offset) { - const framework::Tensor* tensor_in = nullptr; + const pten::TensorBase* tensor_in = nullptr; auto* var = ins_vector[offset]; - if (var->IsType()) { - tensor_in = &(var->Get()); + if (var->IsType()) { + tensor_in = &(var->Get()); + } else if (var->IsType()) { + tensor_in = &(var->Get()); } else { PADDLE_THROW(platform::errors::Unimplemented( "Unsupported input `%s` type when call pt kernel.", framework::ToTypeName(var->Type()))); - } // TODO(zyfncg): Add support for SelectedRows - + } pt_kernel_context->EmplaceBackInputWithoutSetRange(tensor_in); } pt_kernel_context->AssignInputRange(std::make_pair(start_idx, end_idx), i); @@ -2021,17 +2022,20 @@ void OperatorWithKernel::BuildPtenKernelContext( size_t end_idx = start_idx + outs_vector.size(); for (size_t offset = 0; offset < outs_vector.size(); ++offset) { - framework::Tensor* tensor_out = nullptr; + pten::TensorBase* tensor_out = nullptr; auto* var = outs_vector[offset]; - if (var->template IsType()) { - tensor_out = var->template GetMutable(); + if (var->template IsType()) { + tensor_out = var->template GetMutable(); + } else if (var->template IsType()) { + tensor_out = var->template GetMutable(); } else { PADDLE_THROW(platform::errors::Unimplemented( "Unsupported output `%s` type when call pt kernel.", framework::ToTypeName(var->Type()))); - } // TODO(zyfncg): Add support for SelectedRows + } - experimental::ResetTensorByArgDef(tensor_out, output_defs.at(i)); + experimental::ResetTensorDtypeAndLayoutByArgDef(tensor_out, + output_defs.at(i)); SetAllocationForOutputTenosr( tensor_out, pten::TransToFluidPlace(output_defs.at(i).backend)); diff --git a/paddle/fluid/framework/pten_utils.cc b/paddle/fluid/framework/pten_utils.cc index 1a27f971fa082f894ac422fcfb8762ab7fb46725..265bd99593dc427fbde6396833198c47356f9d02 100644 --- a/paddle/fluid/framework/pten_utils.cc +++ b/paddle/fluid/framework/pten_utils.cc @@ -207,21 +207,40 @@ void InitDefaultKernelSignatureMap() { }); } -void SetAllocationForOutputTenosr(pten::DenseTensor* tensor, +static void SetAllocationForUninitializedDenseTensor( + pten::DenseTensor* dense_tensor, const platform::Place& place) { + int dtype_size = dense_tensor->dtype() == DataType::UNDEFINED + ? 0 + : experimental::SizeOf(dense_tensor->dtype()); + int64_t numels = product(dense_tensor->dims()); + numels = numels < 0 ? 0 : numels; + auto tmp_allocation_ptr = memory::Alloc(place, numels * dtype_size); + auto& deleter = tmp_allocation_ptr.get_deleter(); + auto* allocation_ptr = tmp_allocation_ptr.release(); + auto shared_allocation = + std::shared_ptr(allocation_ptr, deleter); + + dense_tensor->ResetHolder(shared_allocation); +} + +void SetAllocationForOutputTenosr(pten::TensorBase* tensor, const platform::Place& place) { - if (!tensor->IsInitialized() || !(tensor->place() == place)) { - int dtype_size = tensor->dtype() == DataType::UNDEFINED - ? 0 - : experimental::SizeOf(tensor->dtype()); - int64_t numels = product(tensor->dims()); - numels = numels < 0 ? 0 : numels; - auto tmp_allocation_ptr = memory::Alloc(place, numels * dtype_size); - auto& deleter = tmp_allocation_ptr.get_deleter(); - auto* allocation_ptr = tmp_allocation_ptr.release(); - auto shared_allocation = - std::shared_ptr(allocation_ptr, deleter); - - tensor->ResetHolder(shared_allocation); + if (pten::DenseTensor::classof(tensor)) { + auto* dense_tensor = static_cast(tensor); + if (!dense_tensor->IsInitialized() || !(dense_tensor->place() == place)) { + SetAllocationForUninitializedDenseTensor(dense_tensor, place); + } + } else if (pten::SelectedRows::classof(tensor)) { + auto* selected_rows = static_cast(tensor); + if (!selected_rows->value().IsInitialized() || + !(selected_rows->place() == place)) { + SetAllocationForUninitializedDenseTensor(selected_rows->mutable_value(), + place); + } + } else { + PADDLE_THROW(platform::errors::Unimplemented( + "Unsupported tensor type is received when setting allocation for " + "output tensor.")); } } diff --git a/paddle/fluid/framework/pten_utils.h b/paddle/fluid/framework/pten_utils.h index ae0388079d280a3c3ffa2637f6114a58141387ed..44f5ee9f9d8c0c63dcc09947c6e23b786fb4932b 100644 --- a/paddle/fluid/framework/pten_utils.h +++ b/paddle/fluid/framework/pten_utils.h @@ -63,7 +63,7 @@ class KernelArgsNameMaker { void InitDefaultKernelSignatureMap(); -void SetAllocationForOutputTenosr(pten::DenseTensor* tensor, +void SetAllocationForOutputTenosr(pten::TensorBase* tensor, const platform::Place& place); // TODO(Wilber): support others device context. diff --git a/paddle/fluid/imperative/prepared_operator.h b/paddle/fluid/imperative/prepared_operator.h index 719036742d3f76985a098116945e00e8aa345aa1..8775f715bfb202f1b07a4ccacb113ff97d71fada 100644 --- a/paddle/fluid/imperative/prepared_operator.h +++ b/paddle/fluid/imperative/prepared_operator.h @@ -29,6 +29,9 @@ #include "paddle/fluid/imperative/type_defs.h" #include "paddle/fluid/imperative/var_helper.h" +#include "paddle/pten/core/dense_tensor.h" +#include "paddle/pten/core/selected_rows.h" + DECLARE_bool(use_mkldnn); namespace paddle { @@ -262,7 +265,17 @@ void BuildDygraphPtenKernelContext( size_t end_idx = start_idx + ins_vector.size(); for (size_t offset = 0; offset < ins_vector.size(); ++offset) { - const auto* tensor_in = GetTensorFromVar(ins_vector[offset]->Var()); + const pten::TensorBase* tensor_in = nullptr; + auto& var = ins_vector[offset]->Var(); + if (var.template IsType()) { + tensor_in = &(var.template Get()); + } else if (var.template IsType()) { + tensor_in = &(var.template Get()); + } else { + PADDLE_THROW(platform::errors::Unimplemented( + "Unsupported input `%s` type when call pt kernel.", + framework::ToTypeName(var.Type()))); + } kernel_ctx->EmplaceBackInputWithoutSetRange(tensor_in); } kernel_ctx->AssignInputRange(std::make_pair(start_idx, end_idx), i); @@ -287,17 +300,21 @@ void BuildDygraphPtenKernelContext( kernel_ctx->EmplaceBackOutputWithoutSetRange({nullptr}); continue; } + + pten::TensorBase* tensor_out = nullptr; auto* var = outs_vector[offset]->MutableVar(); - framework::Tensor* tensor_out = nullptr; - if (var->template IsType()) { - tensor_out = var->template GetMutable(); + if (var->template IsType()) { + tensor_out = var->template GetMutable(); + } else if (var->template IsType()) { + tensor_out = var->template GetMutable(); } else { PADDLE_THROW(platform::errors::Unimplemented( "Unsupported output `%s` type when call pt kernel.", framework::ToTypeName(var->Type()))); - } // TODO(zyfncg): Add support for SelectedRows + } - experimental::ResetTensorByArgDef(tensor_out, output_defs.at(i)); + experimental::ResetTensorDtypeAndLayoutByArgDef(tensor_out, + output_defs.at(i)); framework::SetAllocationForOutputTenosr( tensor_out, pten::TransToFluidPlace(output_defs.at(i).backend)); diff --git a/paddle/fluid/operators/pscore/heter_listen_and_server_test.cc b/paddle/fluid/operators/pscore/heter_listen_and_server_test.cc index a195b8dee3c2f5580be5f7c094194576b9eccb88..ddc6287011bcff9f12065b005faa315ffeec948a 100644 --- a/paddle/fluid/operators/pscore/heter_listen_and_server_test.cc +++ b/paddle/fluid/operators/pscore/heter_listen_and_server_test.cc @@ -33,7 +33,7 @@ using MultiVarMsg = ::paddle::distributed::MultiVariableMessage; using VarMsg = ::paddle::distributed::VariableMessage; DECLARE_double(eager_delete_tensor_gb); -USE_OP(scale); +USE_OP_ITSELF(scale); USE_NO_KERNEL_OP(heter_listen_and_serv); framework::BlockDesc* AppendSendAndRecvBlock(framework::ProgramDesc* program) { diff --git a/paddle/fluid/operators/pscore/heter_server_test.cc b/paddle/fluid/operators/pscore/heter_server_test.cc index 7914e9d9a1058ab15a08e3b0dee8725e7a74bb38..f7e8ae1c09d031d761d43481aa2a955f683cf956 100644 --- a/paddle/fluid/operators/pscore/heter_server_test.cc +++ b/paddle/fluid/operators/pscore/heter_server_test.cc @@ -29,7 +29,7 @@ namespace distributed = paddle::distributed; using MultiVarMsg = ::paddle::distributed::MultiVariableMessage; using VarMsg = ::paddle::distributed::VariableMessage; -USE_OP(scale); +USE_OP_ITSELF(scale); std::shared_ptr b_rpc_service; diff --git a/paddle/fluid/operators/pscore/send_and_recv_op_cpu_test.cc b/paddle/fluid/operators/pscore/send_and_recv_op_cpu_test.cc index 07fe44601ca08831a9e4372d04c097a8e56644f2..077eecb72a96427c2f99c5e66739820c8a519d60 100644 --- a/paddle/fluid/operators/pscore/send_and_recv_op_cpu_test.cc +++ b/paddle/fluid/operators/pscore/send_and_recv_op_cpu_test.cc @@ -31,7 +31,7 @@ namespace distributed = paddle::distributed; using MultiVarMsg = ::paddle::distributed::MultiVariableMessage; using VarMsg = ::paddle::distributed::VariableMessage; -USE_OP(scale); +USE_OP_ITSELF(scale); USE_OP(send_and_recv); std::shared_ptr b_rpc_service; diff --git a/paddle/fluid/operators/pscore/send_and_recv_op_gpu_test.cc b/paddle/fluid/operators/pscore/send_and_recv_op_gpu_test.cc index 2c701bdae76010cc1b1e1eb341f30753269269bc..b7049019bc4bef6f8e5c392c4e36735421108d1a 100644 --- a/paddle/fluid/operators/pscore/send_and_recv_op_gpu_test.cc +++ b/paddle/fluid/operators/pscore/send_and_recv_op_gpu_test.cc @@ -35,7 +35,7 @@ namespace memory = paddle::memory; using MultiVarMsg = ::paddle::distributed::MultiVariableMessage; using VarMsg = ::paddle::distributed::VariableMessage; -USE_OP(scale); +USE_OP_ITSELF(scale); USE_OP(send_and_recv); std::shared_ptr b_rpc_service2; diff --git a/paddle/fluid/operators/scale_op.cc b/paddle/fluid/operators/scale_op.cc index 912af2c85b2cfa8fd4372101285241699633503b..ccf3afe29c73e182bfb6f2b8ab5d642888102158 100644 --- a/paddle/fluid/operators/scale_op.cc +++ b/paddle/fluid/operators/scale_op.cc @@ -12,49 +12,19 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/scale_op.h" #include +#include "paddle/fluid/framework/infershape_utils.h" +#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/float16.h" - -namespace paddle { -namespace framework { -class InferShapeContext; -class OpDesc; -} // namespace framework -namespace imperative { -class OpBase; -} // namespace imperative -namespace platform { -class CPUDeviceContext; -} // namespace platform -} // namespace paddle +#include "paddle/pten/core/infermeta_utils.h" +#include "paddle/pten/infermeta/unary.h" namespace paddle { namespace operators { class ScaleOp : public framework::OperatorWithKernel { public: - ScaleOp(const std::string &type, const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : OperatorWithKernel(type, inputs, outputs, attrs) {} - - void InferShape(framework::InferShapeContext *ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "scale"); - OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "scale"); - - if (ctx->IsRuntime() && ctx->HasInput("ScaleTensor")) { - auto scale = ctx->Inputs("ScaleTensor"); - PADDLE_ENFORCE_EQ(scale.size(), 1, - platform::errors::InvalidArgument( - "Input(ScaleTensor) size must be 1, " - "but received size is %d.", - scale.size())); - } - - ctx->SetOutputDim("Out", ctx->GetInputDim("X")); - ctx->ShareLoD("X", /*->*/ "Out"); - } + using framework::OperatorWithKernel::OperatorWithKernel; framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext &ctx) const override { @@ -150,32 +120,10 @@ DECLARE_INPLACE_OP_INFERER(ScaleOpInplaceInferer, {"X", "Out"}); namespace ops = paddle::operators; +DELCARE_INFER_SHAPE_FUNCTOR(scale, ScaleInferShapeFunctor, + PT_INFER_META(pten::UnchangedInferMeta)); REGISTER_OPERATOR(scale, ops::ScaleOp, ops::ScaleOpMaker, ops::ScaleGradMaker, ops::ScaleGradMaker, - ops::ScaleOpVarTypeInference, ops::ScaleOpInplaceInferer); -REGISTER_OP_CPU_KERNEL( - scale, ops::ScaleKernel, - ops::ScaleKernel, - ops::ScaleKernel, - ops::ScaleKernel, - ops::ScaleKernel, - ops::ScaleKernel, - ops::ScaleKernel, - ops::ScaleKernel); - -REGISTER_OP_CUDA_KERNEL( - scale, - paddle::operators::ScaleKernel, - paddle::operators::ScaleKernel, - paddle::operators::ScaleKernel, - paddle::operators::ScaleKernel, - paddle::operators::ScaleKernel, - paddle::operators::ScaleKernel, - paddle::operators::ScaleKernel, - paddle::operators::ScaleKernel); + ScaleInferShapeFunctor, ops::ScaleOpVarTypeInference, + ops::ScaleOpInplaceInferer); diff --git a/paddle/fluid/operators/scale_op.h b/paddle/fluid/operators/scale_op.h deleted file mode 100644 index 8ce0b7984cc0512b630d03d4ec2205d096c0c826..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/scale_op.h +++ /dev/null @@ -1,80 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/pten_utils.h" - -// only can include the headers in paddle/top/api dirs -#include "paddle/pten/api/lib/utils/tensor_utils.h" -#include "paddle/pten/kernels/scale_kernel.h" - -namespace paddle { -namespace operators { - -template -static inline T GetAttrFromTensor(const framework::Tensor* tensor) { - const auto* tensor_data = tensor->data(); - framework::Tensor cpu_tensor; - if (platform::is_gpu_place(tensor->place()) || - platform::is_npu_place(tensor->place())) { - paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), - &cpu_tensor); - tensor_data = cpu_tensor.data(); - } - return tensor_data[0]; -} - -// See Note [ Why still keep the original kernel implementation? ] -template -class ScaleKernel : public framework::OpKernel { - public: - virtual void Compute(const framework::ExecutionContext& ctx) const { - auto* in_var = ctx.InputVar("X"); - - auto bias = ctx.Attr("bias"); - auto bias_after_scale = ctx.Attr("bias_after_scale"); - auto scale = ctx.Attr("scale"); - auto* out_var = ctx.OutputVar("Out"); - - if (ctx.HasInput("ScaleTensor")) { - auto* scale_tensor = ctx.Input("ScaleTensor"); - scale = static_cast(GetAttrFromTensor(scale_tensor)); - } - - auto* in = framework::GetLoDTensorOrSelectedRowsValueFromVar(*in_var); - auto* out = - framework::GetMutableLoDTensorOrSelectedRowsValueFromVar(out_var); - out->mutable_data(in->place()); - auto& dev_ctx = ctx.device_context(); - - // call new kernel - if (in_var->IsType()) { - pten::ScaleSR( - static_cast::TYPE&>(dev_ctx), - in_var->Get(), scale, bias, bias_after_scale, - out_var->GetMutable()); - } else { - pten::ScaleKernel( - static_cast::TYPE&>(dev_ctx), - *in, scale, bias, bias_after_scale, out); - } - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/scale_op_mlu.cc b/paddle/fluid/operators/scale_op_mlu.cc index 1e1187845ce477f939e8cf21650076c875861f3d..d027ac0d3317f0495462e3ec167b94ab89608382 100644 --- a/paddle/fluid/operators/scale_op_mlu.cc +++ b/paddle/fluid/operators/scale_op_mlu.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/scale_op.h" +#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/mlu/mlu_baseop.h" namespace paddle { diff --git a/paddle/fluid/operators/scale_op_npu.cc b/paddle/fluid/operators/scale_op_npu.cc index 7d84d56c2b3b870977215a90799062957a36d535..807ad7509e57389bfd47a25ae48c3d72f6a47d28 100644 --- a/paddle/fluid/operators/scale_op_npu.cc +++ b/paddle/fluid/operators/scale_op_npu.cc @@ -12,12 +12,25 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/scale_op.h" +#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/device/npu/npu_op_runner.h" namespace paddle { namespace operators { +template +static inline T GetAttrFromTensor(const framework::Tensor* tensor) { + const auto* tensor_data = tensor->data(); + framework::Tensor cpu_tensor; + if (platform::is_gpu_place(tensor->place()) || + platform::is_npu_place(tensor->place())) { + paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), + &cpu_tensor); + tensor_data = cpu_tensor.data(); + } + return tensor_data[0]; +} + template class ScaleNPUKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/scale_op_xpu.cc b/paddle/fluid/operators/scale_op_xpu.cc index 026a5dda89b5f07423090cb83bfb73e706cba7b7..2430007de0a5c25d17247ecc176366d20c3bad80 100644 --- a/paddle/fluid/operators/scale_op_xpu.cc +++ b/paddle/fluid/operators/scale_op_xpu.cc @@ -14,8 +14,8 @@ limitations under the License. */ #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/operators/scale_op.h" #include +#include "paddle/fluid/framework/op_registry.h" #include "paddle/pten/kernels/scale_kernel.h" namespace paddle { diff --git a/paddle/pten/api/lib/utils/tensor_utils.cc b/paddle/pten/api/lib/utils/tensor_utils.cc index 8fdfc29540bfada65c1b32137eca018a293459b1..e4c20aa971b952ad3cdd0bb54c7ea446fc9998f2 100644 --- a/paddle/pten/api/lib/utils/tensor_utils.cc +++ b/paddle/pten/api/lib/utils/tensor_utils.cc @@ -198,12 +198,25 @@ pten::ScalarArray MakePtenScalarArrayFromVarList( return {vector_data}; } -void ResetTensorByArgDef(pten::DenseTensor* dst, - const pten::TensorArgDef& arg_def) { +void ResetTensorDtypeAndLayoutByArgDef(pten::TensorBase* dst, + const pten::TensorArgDef& arg_def) { VLOG(5) << "ResetTensor by TensorArgDef."; - auto* meta = pten::DenseTensorUtils::GetMutableMeta(dst); - meta->dtype = arg_def.dtype; - meta->layout = arg_def.layout; + if (pten::DenseTensor::classof(dst)) { + auto* dense_t = static_cast(dst); + auto* meta = pten::DenseTensorUtils::GetMutableMeta(dense_t); + meta->dtype = arg_def.dtype; + meta->layout = arg_def.layout; + } else if (pten::SelectedRows::classof(dst)) { + auto* selected_rows = static_cast(dst); + auto* meta = + pten::DenseTensorUtils::GetMutableMeta(selected_rows->mutable_value()); + meta->dtype = arg_def.dtype; + meta->layout = arg_def.layout; + } else { + PADDLE_THROW(pten::errors::Unimplemented( + "Unsupported tensor type is received when reseting tensor dtype and " + "layout by argument definition.")); + } } } // namespace experimental diff --git a/paddle/pten/api/lib/utils/tensor_utils.h b/paddle/pten/api/lib/utils/tensor_utils.h index 1ffcc7d4d5b70a11f288efbd8a8c46a716fb42dc..1e2d8b74db84941f970c0613fad4fa488f813053 100644 --- a/paddle/pten/api/lib/utils/tensor_utils.h +++ b/paddle/pten/api/lib/utils/tensor_utils.h @@ -45,8 +45,8 @@ pten::ScalarArray MakePtenScalarArrayFromVar( pten::ScalarArray MakePtenScalarArrayFromVarList( const std::vector& variable_list); -void ResetTensorByArgDef(pten::DenseTensor* dst, - const pten::TensorArgDef& arg_def); +void ResetTensorDtypeAndLayoutByArgDef(pten::TensorBase* dst, + const pten::TensorArgDef& arg_def); } // namespace experimental } // namespace paddle diff --git a/paddle/pten/core/kernel_context.cc b/paddle/pten/core/kernel_context.cc index 34e9fabbe67e6b5fa219dae669f39b7a0877ecbd..9002003c9fa4db2aadd680973da654984b4ad982 100644 --- a/paddle/pten/core/kernel_context.cc +++ b/paddle/pten/core/kernel_context.cc @@ -48,10 +48,6 @@ void KernelContext::EmplaceBackOutputWithoutSetRange(TensorBase* output) { outputs_.emplace_back(output); } -void KernelContext::SetOutputWithoutSetRange(int index, TensorBase* output) { - outputs_.at(index) = output; -} - void KernelContext::EmplaceBackOutputs( paddle::SmallVector outputs) { int index = outputs_.size(); @@ -103,15 +99,4 @@ const std::pair& KernelContext::OutputRangeAt(size_t idx) const { return output_range_.at(idx); } -std::pair& KernelContext::MutableInputRangeAt(size_t idx) { - return input_range_[idx]; -} - -std::pair& KernelContext::MutableOutputRangeAt(size_t idx) { - return output_range_[idx]; -} - -// Temporary method: For compatible with fluid Tensor and improve performance -// Only deal with DenseTensor now -void KernelContext::ClearData() { attrs_.clear(); } } // namespace pten diff --git a/paddle/pten/core/kernel_context.h b/paddle/pten/core/kernel_context.h index 876c98e3bcf6b4dea82dbb9d86b1aa28348a5d1c..25a1f2ed9bb16b82c48184d565954a812639598a 100644 --- a/paddle/pten/core/kernel_context.h +++ b/paddle/pten/core/kernel_context.h @@ -26,10 +26,8 @@ namespace pten { -using DeviceContext = pten::DeviceContext; - /** - * Note: KernelContext doesn't manage the life if DeviceContext and Tensor + * Note: KernelContext doesn't manage the life of DeviceContext and Tensor * * Note: KernelContext does not couple the concept of framework, * its constructor can only take the members it needs as parameters, @@ -59,17 +57,15 @@ class KernelContext { void EmplaceBackOutputs(paddle::SmallVector outputs); - void SetOutputWithoutSetRange(int index, TensorBase* output); - void EmplaceBackAttr(paddle::any attr); const std::pair& InputRangeAt(size_t idx) const; const std::pair& OutputRangeAt(size_t idx) const; - std::pair& MutableInputRangeAt(size_t idx); + void AssignInputRange(std::pair&& range, size_t idx); - std::pair& MutableOutputRangeAt(size_t idx); + void AssignOutputRange(std::pair&& range, size_t idx); template const TensorType& InputAt(size_t idx) const { @@ -90,15 +86,11 @@ class KernelContext { for (size_t i = start; i < end; ++i) { auto t = static_cast(inputs_.at(i)); v.emplace_back(*t); - inputs_.at(i) = nullptr; + inputs_[i] = nullptr; } return v; } - void AssignInputRange(std::pair&& range, size_t idx); - - void AssignOutputRange(std::pair&& range, size_t idx); - template TensorType* MutableOutputAt(size_t idx) { return static_cast(outputs_.at(idx)); @@ -110,7 +102,6 @@ class KernelContext { for (size_t i = start; i < end; ++i) { v.emplace_back(static_cast(outputs_.at(i))); } - return v; } @@ -124,25 +115,17 @@ class KernelContext { } } - // Temporary method: For compatible with fluid Tensor and improve performance - // Only deal with DenseTensor now - void ClearData(); - size_t InputsSize() const { return inputs_.size(); } size_t OutputsSize() const { return outputs_.size(); } size_t AttrsSize() const { return attrs_.size(); } private: - // DeviceContext base class DeviceContext* dev_ctx_; - // TODO(chenweihang): Tensor -> Tensor*, Tensor should by managed `scope` - // Note: can't use API Tensor here, the inference don't use this API Tensor paddle::SmallVector inputs_; paddle::SmallVector outputs_; paddle::SmallVector attrs_; - // Only contains input like list[Tensor] need `range` paddle::SmallVector> input_range_; paddle::SmallVector> output_range_; }; diff --git a/paddle/pten/ops/compat/scale_sig.cc b/paddle/pten/ops/compat/scale_sig.cc index 5ce159a5d84c9faba760cd7b8605f2bd0734c53f..279be3df54a36b0707fe43478dc94721a5f18c1f 100644 --- a/paddle/pten/ops/compat/scale_sig.cc +++ b/paddle/pten/ops/compat/scale_sig.cc @@ -16,9 +16,37 @@ limitations under the License. */ namespace pten { +/** + * Note [ Why does the ArgumentMapping function need to be so complicated? ] + * + * In order to meet the requirements of infrt, the function used to match Op + * and Kernel parameters, need to be placed in pten as a compatible component, + * and does not depend on fluid. + * + * Because infrt not only needs to dynamically call this argument mapping + * function at runtime, but also needs to statically declare all possible + * results of the function before running without any information. + * + * The infrt declare like: + * + * def PDKEL_Reshape_to_CPU : Pat< + * (PD_ReshapeOp $x, $shape_tensor, $shape_attr), // OpMaker arguements + * (PDKEL_ReshapeKernelAttr $x, fn($shape_attr)>; // Kernel arguments + * def PDKEL_Reshape_to_CPU : Pat< + * (PD_ReshapeOp $x, $shape_tensor, $shape_attr), + * (PDKEL_ReshapeKernelAttr $x, fn($shape_tensor)>; + * + * Therefore, we need to write out each result of the argument mapping function, + * like `KernelSignature("full", {}, {"ShapeTensor", "value"}, {"Out"})`, it + * cannot contains variable, only can contains const char* string. + * + * Infrt will parse all results before running for the generation of the above + * static declare, which leads to some functions being written in a long way, + * and the complicated ones may have hundreds of lines, which has certain side + * effects on the programming experience. + */ KernelSignature ScaleOpArgumentMapping(const ArgumentMappingContext& ctx) { if (ctx.IsDenseTensorInput("X")) { - std::string scale_attr; if (ctx.HasInput("ScaleTensor")) { return KernelSignature( "scale", {"X"}, {"ScaleTensor", "bias", "bias_after_scale"}, {"Out"}); @@ -26,9 +54,19 @@ KernelSignature ScaleOpArgumentMapping(const ArgumentMappingContext& ctx) { return KernelSignature( "scale", {"X"}, {"scale", "bias", "bias_after_scale"}, {"Out"}); } + } else if (ctx.IsSelectedRowsInput("X")) { + if (ctx.HasInput("ScaleTensor")) { + return KernelSignature("scale_sr", + {"X"}, + {"ScaleTensor", "bias", "bias_after_scale"}, + {"Out"}); + } else { + return KernelSignature( + "scale_sr", {"X"}, {"scale", "bias", "bias_after_scale"}, {"Out"}); + } + } else { + return KernelSignature("unregistered", {}, {}, {}); } - // TODO(chenweihang): support other cases after selected rows added - return KernelSignature("scale.unregistered", {}, {}, {}); } } // namespace pten