未验证 提交 41eb2595 编写于 作者: C Chen Weihang 提交者: GitHub

[PTen] Support SelectedRows in execution and remove scale OpKernel and InferShape (#39351)

* adapt selectedrows in execution

* impl selected rows branch

* support selectedrow in infershape utils

* fix device compile failed

* fix new exe test failed

* revert some changes
上级 42910361
......@@ -175,7 +175,7 @@ TEST(Benchmark, EagerIntermediateMLPCPU) {
}
}
USE_OP(scale);
USE_OP_ITSELF(scale);
USE_OP(elementwise_add);
USE_OP(matmul_v2);
USE_OP(reduce_sum);
......@@ -185,7 +185,7 @@ TEST(Benchmark, EagerIntermediateMLPCUDA) {
}
}
USE_OP(scale);
USE_OP_ITSELF(scale);
USE_OP(matmul_v2);
USE_OP(reduce_sum);
USE_OP(reduce_sum_grad);
......
......@@ -211,7 +211,7 @@ TEST(Benchmark, FluidMLPCPU) {
} // namespace imperative
} // namespace paddle
USE_OP(scale);
USE_OP_ITSELF(scale);
USE_OP(elementwise_add);
USE_OP(matmul_v2);
USE_OP(reduce_sum);
......@@ -245,7 +245,7 @@ TEST(Benchmark, FluidMLPCUDA) {
} // namespace imperative
} // namespace paddle
USE_OP(scale);
USE_OP_ITSELF(scale);
USE_OP(matmul_v2);
USE_OP(reduce_sum);
USE_OP(reduce_sum_grad);
......
......@@ -26,7 +26,7 @@
#define _LINUX
#endif
USE_OP(scale);
USE_OP_ITSELF(scale);
USE_NO_KERNEL_OP(heter_listen_and_serv);
namespace paddle {
namespace framework {
......
......@@ -78,7 +78,6 @@ class InferShapeArgumentMappingContext : public pten::ArgumentMappingContext {
const InferShapeContext& ctx_;
};
// TODO(chenweihang): Support SelectedRows later
// TODO(chenweihang): Support TensorArray later
class CompatMetaTensor : public pten::MetaTensor {
public:
......@@ -104,7 +103,14 @@ class CompatMetaTensor : public pten::MetaTensor {
DDim dims() const override {
if (is_runtime_) {
auto* var = BOOST_GET_CONST(Variable*, var_);
return var->Get<LoDTensor>().dims();
if (var->IsType<pten::DenseTensor>()) {
return var->Get<pten::DenseTensor>().dims();
} else if (var->IsType<pten::SelectedRows>()) {
return var->Get<pten::SelectedRows>().dims();
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Currently, only can get dims from DenseTensor or SelectedRows."));
}
} else {
auto* var = BOOST_GET_CONST(VarDesc*, var_);
return make_ddim(var->GetShape());
......@@ -114,7 +120,14 @@ class CompatMetaTensor : public pten::MetaTensor {
pten::DataType dtype() const override {
if (is_runtime_) {
auto* var = BOOST_GET_CONST(Variable*, var_);
return var->Get<LoDTensor>().dtype();
if (var->IsType<pten::DenseTensor>()) {
return var->Get<pten::DenseTensor>().dtype();
} else if (var->IsType<pten::SelectedRows>()) {
return var->Get<pten::SelectedRows>().dtype();
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Currently, only can get dtype from DenseTensor or SelectedRows."));
}
} else {
auto* var = BOOST_GET_CONST(VarDesc*, var_);
return pten::TransToPtenDataType(var->GetDataType());
......@@ -135,10 +148,16 @@ class CompatMetaTensor : public pten::MetaTensor {
void set_dims(const DDim& dims) override {
if (is_runtime_) {
auto* var = BOOST_GET(Variable*, var_);
LoDTensor* tensor = var->GetMutable<LoDTensor>();
pten::DenseTensorUtils::GetMutableMeta(
static_cast<pten::DenseTensor*>(tensor))
->dims = dims;
if (var->IsType<pten::DenseTensor>()) {
auto* tensor = var->GetMutable<pten::DenseTensor>();
pten::DenseTensorUtils::GetMutableMeta(tensor)->dims = dims;
} else if (var->IsType<pten::SelectedRows>()) {
auto* tensor = var->GetMutable<pten::SelectedRows>()->mutable_value();
pten::DenseTensorUtils::GetMutableMeta(tensor)->dims = dims;
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Currently, only can set dims from DenseTensor or SelectedRows."));
}
} else {
auto* var = BOOST_GET(VarDesc*, var_);
var->SetShape(vectorize(dims));
......@@ -148,10 +167,16 @@ class CompatMetaTensor : public pten::MetaTensor {
void set_dtype(pten::DataType dtype) override {
if (is_runtime_) {
auto* var = BOOST_GET(Variable*, var_);
LoDTensor* tensor = var->GetMutable<LoDTensor>();
pten::DenseTensorUtils::GetMutableMeta(
static_cast<pten::DenseTensor*>(tensor))
->dtype = dtype;
if (var->IsType<pten::DenseTensor>()) {
auto* tensor = var->GetMutable<pten::DenseTensor>();
pten::DenseTensorUtils::GetMutableMeta(tensor)->dtype = dtype;
} else if (var->IsType<pten::SelectedRows>()) {
auto* tensor = var->GetMutable<pten::SelectedRows>()->mutable_value();
pten::DenseTensorUtils::GetMutableMeta(tensor)->dtype = dtype;
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Currently, only can set dtype from DenseTensor or SelectedRows."));
}
} else {
auto* var = BOOST_GET(VarDesc*, var_);
var->SetDataType(pten::TransToProtoVarType(dtype));
......@@ -174,11 +199,14 @@ class CompatMetaTensor : public pten::MetaTensor {
void share_lod(const MetaTensor& meta_tensor) override {
if (is_runtime_) {
auto* var = BOOST_GET(Variable*, var_);
LoDTensor* tensor = var->GetMutable<LoDTensor>();
pten::DenseTensorUtils::GetMutableMeta(
static_cast<pten::DenseTensor*>(tensor))
->lod =
if (var->IsType<pten::DenseTensor>()) {
auto* tensor = var->GetMutable<pten::DenseTensor>();
pten::DenseTensorUtils::GetMutableMeta(tensor)->lod =
static_cast<const CompatMetaTensor&>(meta_tensor).GetRuntimeLoD();
} else {
// NOTE(chenweihang): do nothing
// only LoDTensor need to share lod
}
} else {
auto* var = BOOST_GET(VarDesc*, var_);
var->SetLoDLevel(static_cast<const CompatMetaTensor&>(meta_tensor)
......@@ -191,7 +219,21 @@ class CompatMetaTensor : public pten::MetaTensor {
set_dtype(meta_tensor.dtype());
// VarDesc doesn't contains layout, so we cannot share layout
// set_layout(meta_tensor.layout());
// special case 1: share lod of LoDTensor
share_lod(meta_tensor);
// special case 2: share height and rows of SelectedRows in runtime
if (is_runtime_) {
auto* var = BOOST_GET(Variable*, var_);
if (var->IsType<pten::SelectedRows>()) {
auto* selected_rows = var->GetMutable<pten::SelectedRows>();
auto& input_selected_rows =
static_cast<const CompatMetaTensor&>(meta_tensor).GetSelectedRows();
selected_rows->set_rows(input_selected_rows.rows());
selected_rows->set_height(input_selected_rows.height());
}
}
}
private:
......@@ -199,11 +241,23 @@ class CompatMetaTensor : public pten::MetaTensor {
auto* var = BOOST_GET_CONST(Variable*, var_);
return var->Get<LoDTensor>().lod();
}
int32_t GetCompileTimeLoD() const {
auto* var = BOOST_GET_CONST(VarDesc*, var_);
return var->GetLoDLevel();
}
const pten::SelectedRows& GetSelectedRows() const {
PADDLE_ENFORCE_EQ(is_runtime_, true,
platform::errors::Unavailable(
"Only can get Tensor from MetaTensor in rumtime."));
auto* var = BOOST_GET_CONST(Variable*, var_);
PADDLE_ENFORCE_EQ(var->IsType<pten::SelectedRows>(), true,
platform::errors::Unavailable(
"The Tensor in MetaTensor is not SelectedRows."));
return var->Get<pten::SelectedRows>();
}
InferShapeVarPtr var_;
bool is_runtime_;
};
......
......@@ -21,7 +21,7 @@
#include "paddle/fluid/framework/parallel_executor.h"
#include "paddle/fluid/framework/program_desc.h"
USE_OP(scale);
USE_OP_ITSELF(scale);
USE_OP(elementwise_mul);
USE_OP(elementwise_add);
USE_OP(elementwise_add_grad);
......
......@@ -393,7 +393,8 @@ void InterpreterCore::RunInstruction(const Instruction& instr_node) {
platform::RecordEvent infershape_event("InferShape");
// If it is OperatorBase, InferShape do nothing.
if (op_with_kernel != nullptr)
op_with_kernel->InferShape(instr_node.InnerInferShapeContext().get());
op_with_kernel->Info().infer_shape_(
instr_node.InnerInferShapeContext().get());
}
if (op_with_kernel != nullptr &&
......
......@@ -1998,16 +1998,17 @@ void OperatorWithKernel::BuildPtenKernelContext(
size_t end_idx = start_idx + ins_vector.size();
for (size_t offset = 0; offset < ins_vector.size(); ++offset) {
const framework::Tensor* tensor_in = nullptr;
const pten::TensorBase* tensor_in = nullptr;
auto* var = ins_vector[offset];
if (var->IsType<framework::LoDTensor>()) {
tensor_in = &(var->Get<framework::LoDTensor>());
if (var->IsType<pten::DenseTensor>()) {
tensor_in = &(var->Get<pten::DenseTensor>());
} else if (var->IsType<pten::SelectedRows>()) {
tensor_in = &(var->Get<pten::SelectedRows>());
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported input `%s` type when call pt kernel.",
framework::ToTypeName(var->Type())));
} // TODO(zyfncg): Add support for SelectedRows
}
pt_kernel_context->EmplaceBackInputWithoutSetRange(tensor_in);
}
pt_kernel_context->AssignInputRange(std::make_pair(start_idx, end_idx), i);
......@@ -2021,17 +2022,20 @@ void OperatorWithKernel::BuildPtenKernelContext(
size_t end_idx = start_idx + outs_vector.size();
for (size_t offset = 0; offset < outs_vector.size(); ++offset) {
framework::Tensor* tensor_out = nullptr;
pten::TensorBase* tensor_out = nullptr;
auto* var = outs_vector[offset];
if (var->template IsType<framework::LoDTensor>()) {
tensor_out = var->template GetMutable<framework::LoDTensor>();
if (var->template IsType<pten::DenseTensor>()) {
tensor_out = var->template GetMutable<pten::DenseTensor>();
} else if (var->template IsType<pten::SelectedRows>()) {
tensor_out = var->template GetMutable<pten::SelectedRows>();
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported output `%s` type when call pt kernel.",
framework::ToTypeName(var->Type())));
} // TODO(zyfncg): Add support for SelectedRows
}
experimental::ResetTensorByArgDef(tensor_out, output_defs.at(i));
experimental::ResetTensorDtypeAndLayoutByArgDef(tensor_out,
output_defs.at(i));
SetAllocationForOutputTenosr(
tensor_out, pten::TransToFluidPlace(output_defs.at(i).backend));
......
......@@ -207,13 +207,12 @@ void InitDefaultKernelSignatureMap() {
});
}
void SetAllocationForOutputTenosr(pten::DenseTensor* tensor,
const platform::Place& place) {
if (!tensor->IsInitialized() || !(tensor->place() == place)) {
int dtype_size = tensor->dtype() == DataType::UNDEFINED
static void SetAllocationForUninitializedDenseTensor(
pten::DenseTensor* dense_tensor, const platform::Place& place) {
int dtype_size = dense_tensor->dtype() == DataType::UNDEFINED
? 0
: experimental::SizeOf(tensor->dtype());
int64_t numels = product(tensor->dims());
: experimental::SizeOf(dense_tensor->dtype());
int64_t numels = product(dense_tensor->dims());
numels = numels < 0 ? 0 : numels;
auto tmp_allocation_ptr = memory::Alloc(place, numels * dtype_size);
auto& deleter = tmp_allocation_ptr.get_deleter();
......@@ -221,7 +220,27 @@ void SetAllocationForOutputTenosr(pten::DenseTensor* tensor,
auto shared_allocation =
std::shared_ptr<pten::Allocation>(allocation_ptr, deleter);
tensor->ResetHolder(shared_allocation);
dense_tensor->ResetHolder(shared_allocation);
}
void SetAllocationForOutputTenosr(pten::TensorBase* tensor,
const platform::Place& place) {
if (pten::DenseTensor::classof(tensor)) {
auto* dense_tensor = static_cast<pten::DenseTensor*>(tensor);
if (!dense_tensor->IsInitialized() || !(dense_tensor->place() == place)) {
SetAllocationForUninitializedDenseTensor(dense_tensor, place);
}
} else if (pten::SelectedRows::classof(tensor)) {
auto* selected_rows = static_cast<pten::SelectedRows*>(tensor);
if (!selected_rows->value().IsInitialized() ||
!(selected_rows->place() == place)) {
SetAllocationForUninitializedDenseTensor(selected_rows->mutable_value(),
place);
}
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported tensor type is received when setting allocation for "
"output tensor."));
}
}
......
......@@ -63,7 +63,7 @@ class KernelArgsNameMaker {
void InitDefaultKernelSignatureMap();
void SetAllocationForOutputTenosr(pten::DenseTensor* tensor,
void SetAllocationForOutputTenosr(pten::TensorBase* tensor,
const platform::Place& place);
// TODO(Wilber): support others device context.
......
......@@ -29,6 +29,9 @@
#include "paddle/fluid/imperative/type_defs.h"
#include "paddle/fluid/imperative/var_helper.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/selected_rows.h"
DECLARE_bool(use_mkldnn);
namespace paddle {
......@@ -262,7 +265,17 @@ void BuildDygraphPtenKernelContext(
size_t end_idx = start_idx + ins_vector.size();
for (size_t offset = 0; offset < ins_vector.size(); ++offset) {
const auto* tensor_in = GetTensorFromVar(ins_vector[offset]->Var());
const pten::TensorBase* tensor_in = nullptr;
auto& var = ins_vector[offset]->Var();
if (var.template IsType<pten::DenseTensor>()) {
tensor_in = &(var.template Get<pten::DenseTensor>());
} else if (var.template IsType<pten::SelectedRows>()) {
tensor_in = &(var.template Get<pten::SelectedRows>());
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported input `%s` type when call pt kernel.",
framework::ToTypeName(var.Type())));
}
kernel_ctx->EmplaceBackInputWithoutSetRange(tensor_in);
}
kernel_ctx->AssignInputRange(std::make_pair(start_idx, end_idx), i);
......@@ -287,17 +300,21 @@ void BuildDygraphPtenKernelContext(
kernel_ctx->EmplaceBackOutputWithoutSetRange({nullptr});
continue;
}
pten::TensorBase* tensor_out = nullptr;
auto* var = outs_vector[offset]->MutableVar();
framework::Tensor* tensor_out = nullptr;
if (var->template IsType<framework::LoDTensor>()) {
tensor_out = var->template GetMutable<framework::LoDTensor>();
if (var->template IsType<pten::DenseTensor>()) {
tensor_out = var->template GetMutable<pten::DenseTensor>();
} else if (var->template IsType<pten::SelectedRows>()) {
tensor_out = var->template GetMutable<pten::SelectedRows>();
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported output `%s` type when call pt kernel.",
framework::ToTypeName(var->Type())));
} // TODO(zyfncg): Add support for SelectedRows
}
experimental::ResetTensorByArgDef(tensor_out, output_defs.at(i));
experimental::ResetTensorDtypeAndLayoutByArgDef(tensor_out,
output_defs.at(i));
framework::SetAllocationForOutputTenosr(
tensor_out, pten::TransToFluidPlace(output_defs.at(i).backend));
......
......@@ -33,7 +33,7 @@ using MultiVarMsg = ::paddle::distributed::MultiVariableMessage;
using VarMsg = ::paddle::distributed::VariableMessage;
DECLARE_double(eager_delete_tensor_gb);
USE_OP(scale);
USE_OP_ITSELF(scale);
USE_NO_KERNEL_OP(heter_listen_and_serv);
framework::BlockDesc* AppendSendAndRecvBlock(framework::ProgramDesc* program) {
......
......@@ -29,7 +29,7 @@ namespace distributed = paddle::distributed;
using MultiVarMsg = ::paddle::distributed::MultiVariableMessage;
using VarMsg = ::paddle::distributed::VariableMessage;
USE_OP(scale);
USE_OP_ITSELF(scale);
std::shared_ptr<distributed::HeterServer> b_rpc_service;
......
......@@ -31,7 +31,7 @@ namespace distributed = paddle::distributed;
using MultiVarMsg = ::paddle::distributed::MultiVariableMessage;
using VarMsg = ::paddle::distributed::VariableMessage;
USE_OP(scale);
USE_OP_ITSELF(scale);
USE_OP(send_and_recv);
std::shared_ptr<distributed::HeterServer> b_rpc_service;
......
......@@ -35,7 +35,7 @@ namespace memory = paddle::memory;
using MultiVarMsg = ::paddle::distributed::MultiVariableMessage;
using VarMsg = ::paddle::distributed::VariableMessage;
USE_OP(scale);
USE_OP_ITSELF(scale);
USE_OP(send_and_recv);
std::shared_ptr<distributed::HeterServer> b_rpc_service2;
......
......@@ -12,49 +12,19 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/scale_op.h"
#include <string>
#include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/float16.h"
namespace paddle {
namespace framework {
class InferShapeContext;
class OpDesc;
} // namespace framework
namespace imperative {
class OpBase;
} // namespace imperative
namespace platform {
class CPUDeviceContext;
} // namespace platform
} // namespace paddle
#include "paddle/pten/core/infermeta_utils.h"
#include "paddle/pten/infermeta/unary.h"
namespace paddle {
namespace operators {
class ScaleOp : public framework::OperatorWithKernel {
public:
ScaleOp(const std::string &type, const framework::VariableNameMap &inputs,
const framework::VariableNameMap &outputs,
const framework::AttributeMap &attrs)
: OperatorWithKernel(type, inputs, outputs, attrs) {}
void InferShape(framework::InferShapeContext *ctx) const override {
OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "scale");
OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "scale");
if (ctx->IsRuntime() && ctx->HasInput("ScaleTensor")) {
auto scale = ctx->Inputs("ScaleTensor");
PADDLE_ENFORCE_EQ(scale.size(), 1,
platform::errors::InvalidArgument(
"Input(ScaleTensor) size must be 1, "
"but received size is %d.",
scale.size()));
}
ctx->SetOutputDim("Out", ctx->GetInputDim("X"));
ctx->ShareLoD("X", /*->*/ "Out");
}
using framework::OperatorWithKernel::OperatorWithKernel;
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext &ctx) const override {
......@@ -150,32 +120,10 @@ DECLARE_INPLACE_OP_INFERER(ScaleOpInplaceInferer, {"X", "Out"});
namespace ops = paddle::operators;
DELCARE_INFER_SHAPE_FUNCTOR(scale, ScaleInferShapeFunctor,
PT_INFER_META(pten::UnchangedInferMeta));
REGISTER_OPERATOR(scale, ops::ScaleOp, ops::ScaleOpMaker,
ops::ScaleGradMaker<paddle::framework::OpDesc>,
ops::ScaleGradMaker<paddle::imperative::OpBase>,
ops::ScaleOpVarTypeInference, ops::ScaleOpInplaceInferer);
REGISTER_OP_CPU_KERNEL(
scale, ops::ScaleKernel<paddle::platform::CPUDeviceContext, float>,
ops::ScaleKernel<paddle::platform::CPUDeviceContext, double>,
ops::ScaleKernel<paddle::platform::CPUDeviceContext,
paddle::platform::bfloat16>,
ops::ScaleKernel<paddle::platform::CPUDeviceContext, uint8_t>,
ops::ScaleKernel<paddle::platform::CPUDeviceContext, int8_t>,
ops::ScaleKernel<paddle::platform::CPUDeviceContext, int16_t>,
ops::ScaleKernel<paddle::platform::CPUDeviceContext, int>,
ops::ScaleKernel<paddle::platform::CPUDeviceContext, int64_t>);
REGISTER_OP_CUDA_KERNEL(
scale,
paddle::operators::ScaleKernel<paddle::platform::CUDADeviceContext, float>,
paddle::operators::ScaleKernel<paddle::platform::CUDADeviceContext, double>,
paddle::operators::ScaleKernel<paddle::platform::CUDADeviceContext,
uint8_t>,
paddle::operators::ScaleKernel<paddle::platform::CUDADeviceContext, int8_t>,
paddle::operators::ScaleKernel<paddle::platform::CUDADeviceContext,
int16_t>,
paddle::operators::ScaleKernel<paddle::platform::CUDADeviceContext, int>,
paddle::operators::ScaleKernel<paddle::platform::CUDADeviceContext,
int64_t>,
paddle::operators::ScaleKernel<paddle::platform::CUDADeviceContext,
paddle::platform::float16>);
ScaleInferShapeFunctor, ops::ScaleOpVarTypeInference,
ops::ScaleOpInplaceInferer);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/pten_utils.h"
// only can include the headers in paddle/top/api dirs
#include "paddle/pten/api/lib/utils/tensor_utils.h"
#include "paddle/pten/kernels/scale_kernel.h"
namespace paddle {
namespace operators {
template <typename T>
static inline T GetAttrFromTensor(const framework::Tensor* tensor) {
const auto* tensor_data = tensor->data<T>();
framework::Tensor cpu_tensor;
if (platform::is_gpu_place(tensor->place()) ||
platform::is_npu_place(tensor->place())) {
paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(),
&cpu_tensor);
tensor_data = cpu_tensor.data<T>();
}
return tensor_data[0];
}
// See Note [ Why still keep the original kernel implementation? ]
template <typename DeviceContext, typename T>
class ScaleKernel : public framework::OpKernel<T> {
public:
virtual void Compute(const framework::ExecutionContext& ctx) const {
auto* in_var = ctx.InputVar("X");
auto bias = ctx.Attr<float>("bias");
auto bias_after_scale = ctx.Attr<bool>("bias_after_scale");
auto scale = ctx.Attr<float>("scale");
auto* out_var = ctx.OutputVar("Out");
if (ctx.HasInput("ScaleTensor")) {
auto* scale_tensor = ctx.Input<framework::Tensor>("ScaleTensor");
scale = static_cast<float>(GetAttrFromTensor<T>(scale_tensor));
}
auto* in = framework::GetLoDTensorOrSelectedRowsValueFromVar(*in_var);
auto* out =
framework::GetMutableLoDTensorOrSelectedRowsValueFromVar(out_var);
out->mutable_data<T>(in->place());
auto& dev_ctx = ctx.device_context<DeviceContext>();
// call new kernel
if (in_var->IsType<pten::SelectedRows>()) {
pten::ScaleSR<T>(
static_cast<const typename framework::ConvertToPtenContext<
DeviceContext>::TYPE&>(dev_ctx),
in_var->Get<pten::SelectedRows>(), scale, bias, bias_after_scale,
out_var->GetMutable<pten::SelectedRows>());
} else {
pten::ScaleKernel<T>(
static_cast<const typename framework::ConvertToPtenContext<
DeviceContext>::TYPE&>(dev_ctx),
*in, scale, bias, bias_after_scale, out);
}
}
};
} // namespace operators
} // namespace paddle
......@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/scale_op.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/mlu/mlu_baseop.h"
namespace paddle {
......
......@@ -12,12 +12,25 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/scale_op.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
namespace operators {
template <typename T>
static inline T GetAttrFromTensor(const framework::Tensor* tensor) {
const auto* tensor_data = tensor->data<T>();
framework::Tensor cpu_tensor;
if (platform::is_gpu_place(tensor->place()) ||
platform::is_npu_place(tensor->place())) {
paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(),
&cpu_tensor);
tensor_data = cpu_tensor.data<T>();
}
return tensor_data[0];
}
template <typename T>
class ScaleNPUKernel : public framework::OpKernel<T> {
public:
......
......@@ -14,8 +14,8 @@ limitations under the License. */
#ifdef PADDLE_WITH_XPU
#include "paddle/fluid/operators/scale_op.h"
#include <string>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/pten/kernels/scale_kernel.h"
namespace paddle {
......
......@@ -198,12 +198,25 @@ pten::ScalarArray MakePtenScalarArrayFromVarList(
return {vector_data};
}
void ResetTensorByArgDef(pten::DenseTensor* dst,
void ResetTensorDtypeAndLayoutByArgDef(pten::TensorBase* dst,
const pten::TensorArgDef& arg_def) {
VLOG(5) << "ResetTensor by TensorArgDef.";
auto* meta = pten::DenseTensorUtils::GetMutableMeta(dst);
if (pten::DenseTensor::classof(dst)) {
auto* dense_t = static_cast<pten::DenseTensor*>(dst);
auto* meta = pten::DenseTensorUtils::GetMutableMeta(dense_t);
meta->dtype = arg_def.dtype;
meta->layout = arg_def.layout;
} else if (pten::SelectedRows::classof(dst)) {
auto* selected_rows = static_cast<pten::SelectedRows*>(dst);
auto* meta =
pten::DenseTensorUtils::GetMutableMeta(selected_rows->mutable_value());
meta->dtype = arg_def.dtype;
meta->layout = arg_def.layout;
} else {
PADDLE_THROW(pten::errors::Unimplemented(
"Unsupported tensor type is received when reseting tensor dtype and "
"layout by argument definition."));
}
}
} // namespace experimental
......
......@@ -45,7 +45,7 @@ pten::ScalarArray MakePtenScalarArrayFromVar(
pten::ScalarArray MakePtenScalarArrayFromVarList(
const std::vector<framework::Variable*>& variable_list);
void ResetTensorByArgDef(pten::DenseTensor* dst,
void ResetTensorDtypeAndLayoutByArgDef(pten::TensorBase* dst,
const pten::TensorArgDef& arg_def);
} // namespace experimental
......
......@@ -48,10 +48,6 @@ void KernelContext::EmplaceBackOutputWithoutSetRange(TensorBase* output) {
outputs_.emplace_back(output);
}
void KernelContext::SetOutputWithoutSetRange(int index, TensorBase* output) {
outputs_.at(index) = output;
}
void KernelContext::EmplaceBackOutputs(
paddle::SmallVector<TensorBase*> outputs) {
int index = outputs_.size();
......@@ -103,15 +99,4 @@ const std::pair<int, int>& KernelContext::OutputRangeAt(size_t idx) const {
return output_range_.at(idx);
}
std::pair<int, int>& KernelContext::MutableInputRangeAt(size_t idx) {
return input_range_[idx];
}
std::pair<int, int>& KernelContext::MutableOutputRangeAt(size_t idx) {
return output_range_[idx];
}
// Temporary method: For compatible with fluid Tensor and improve performance
// Only deal with DenseTensor now
void KernelContext::ClearData() { attrs_.clear(); }
} // namespace pten
......@@ -26,10 +26,8 @@
namespace pten {
using DeviceContext = pten::DeviceContext;
/**
* Note: KernelContext doesn't manage the life if DeviceContext and Tensor
* Note: KernelContext doesn't manage the life of DeviceContext and Tensor
*
* Note: KernelContext does not couple the concept of framework,
* its constructor can only take the members it needs as parameters,
......@@ -59,17 +57,15 @@ class KernelContext {
void EmplaceBackOutputs(paddle::SmallVector<TensorBase*> outputs);
void SetOutputWithoutSetRange(int index, TensorBase* output);
void EmplaceBackAttr(paddle::any attr);
const std::pair<int, int>& InputRangeAt(size_t idx) const;
const std::pair<int, int>& OutputRangeAt(size_t idx) const;
std::pair<int, int>& MutableInputRangeAt(size_t idx);
void AssignInputRange(std::pair<int, int>&& range, size_t idx);
std::pair<int, int>& MutableOutputRangeAt(size_t idx);
void AssignOutputRange(std::pair<int, int>&& range, size_t idx);
template <typename TensorType>
const TensorType& InputAt(size_t idx) const {
......@@ -90,15 +86,11 @@ class KernelContext {
for (size_t i = start; i < end; ++i) {
auto t = static_cast<const TensorType*>(inputs_.at(i));
v.emplace_back(*t);
inputs_.at(i) = nullptr;
inputs_[i] = nullptr;
}
return v;
}
void AssignInputRange(std::pair<int, int>&& range, size_t idx);
void AssignOutputRange(std::pair<int, int>&& range, size_t idx);
template <typename TensorType>
TensorType* MutableOutputAt(size_t idx) {
return static_cast<TensorType*>(outputs_.at(idx));
......@@ -110,7 +102,6 @@ class KernelContext {
for (size_t i = start; i < end; ++i) {
v.emplace_back(static_cast<TensorType*>(outputs_.at(i)));
}
return v;
}
......@@ -124,25 +115,17 @@ class KernelContext {
}
}
// Temporary method: For compatible with fluid Tensor and improve performance
// Only deal with DenseTensor now
void ClearData();
size_t InputsSize() const { return inputs_.size(); }
size_t OutputsSize() const { return outputs_.size(); }
size_t AttrsSize() const { return attrs_.size(); }
private:
// DeviceContext base class
DeviceContext* dev_ctx_;
// TODO(chenweihang): Tensor -> Tensor*, Tensor should by managed `scope`
// Note: can't use API Tensor here, the inference don't use this API Tensor
paddle::SmallVector<const TensorBase*> inputs_;
paddle::SmallVector<TensorBase*> outputs_;
paddle::SmallVector<paddle::any> attrs_;
// Only contains input like list[Tensor] need `range`
paddle::SmallVector<std::pair<int, int>> input_range_;
paddle::SmallVector<std::pair<int, int>> output_range_;
};
......
......@@ -16,9 +16,37 @@ limitations under the License. */
namespace pten {
/**
* Note [ Why does the ArgumentMapping function need to be so complicated? ]
*
* In order to meet the requirements of infrt, the function used to match Op
* and Kernel parameters, need to be placed in pten as a compatible component,
* and does not depend on fluid.
*
* Because infrt not only needs to dynamically call this argument mapping
* function at runtime, but also needs to statically declare all possible
* results of the function before running without any information.
*
* The infrt declare like:
*
* def PDKEL_Reshape_to_CPU : Pat<
* (PD_ReshapeOp $x, $shape_tensor, $shape_attr), // OpMaker arguements
* (PDKEL_ReshapeKernelAttr $x, fn($shape_attr)>; // Kernel arguments
* def PDKEL_Reshape_to_CPU : Pat<
* (PD_ReshapeOp $x, $shape_tensor, $shape_attr),
* (PDKEL_ReshapeKernelAttr $x, fn($shape_tensor)>;
*
* Therefore, we need to write out each result of the argument mapping function,
* like `KernelSignature("full", {}, {"ShapeTensor", "value"}, {"Out"})`, it
* cannot contains variable, only can contains const char* string.
*
* Infrt will parse all results before running for the generation of the above
* static declare, which leads to some functions being written in a long way,
* and the complicated ones may have hundreds of lines, which has certain side
* effects on the programming experience.
*/
KernelSignature ScaleOpArgumentMapping(const ArgumentMappingContext& ctx) {
if (ctx.IsDenseTensorInput("X")) {
std::string scale_attr;
if (ctx.HasInput("ScaleTensor")) {
return KernelSignature(
"scale", {"X"}, {"ScaleTensor", "bias", "bias_after_scale"}, {"Out"});
......@@ -26,9 +54,19 @@ KernelSignature ScaleOpArgumentMapping(const ArgumentMappingContext& ctx) {
return KernelSignature(
"scale", {"X"}, {"scale", "bias", "bias_after_scale"}, {"Out"});
}
} else if (ctx.IsSelectedRowsInput("X")) {
if (ctx.HasInput("ScaleTensor")) {
return KernelSignature("scale_sr",
{"X"},
{"ScaleTensor", "bias", "bias_after_scale"},
{"Out"});
} else {
return KernelSignature(
"scale_sr", {"X"}, {"scale", "bias", "bias_after_scale"}, {"Out"});
}
} else {
return KernelSignature("unregistered", {}, {}, {});
}
// TODO(chenweihang): support other cases after selected rows added
return KernelSignature("scale.unregistered", {}, {}, {});
}
} // namespace pten
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册