未验证 提交 65420271 编写于 作者: 张春乔 提交者: GitHub

[phi::DenseTensor] Replace Tensor with phi::DenseTensor (#48682)

上级 693de9f0
......@@ -644,11 +644,11 @@ void GradientAccumulator::CallGradientHooks() {
true,
platform::errors::PreconditionNotMet(
"Only can call gradient hooks after sum gradient completed."));
PADDLE_ENFORCE_EQ(
HasInnerVar(),
true,
platform::errors::PreconditionNotMet(
"Leaf Tensor's inner var is nullptr when call gradient hook."));
PADDLE_ENFORCE_EQ(HasInnerVar(),
true,
platform::errors::PreconditionNotMet(
"Leaf Tensor's inner var is nullptr when "
"call gradient hook."));
PADDLE_ENFORCE_EQ(
inner_var_->Var().IsInitialized(),
true,
......
......@@ -18,8 +18,6 @@ limitations under the Licnse. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
template <typename T>
class AbsMLUKernel : public framework::OpKernel<T> {
public:
......@@ -54,7 +52,7 @@ class AbsGradMLUKernel : public framework::OpKernel<T> {
MLUCnnlOpTensorDesc mul_op_desc(
CNNL_OP_TENSOR_MUL, ToCnnlDataType<T>(), CNNL_NOT_PROPAGATE_NAN);
Tensor sign_x;
phi::DenseTensor sign_x;
sign_x.mutable_data<T>(x->dims(), ctx.GetPlace());
MLUCnnl::Sign(ctx,
......
......@@ -18,8 +18,6 @@ limitations under the Licnse. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
template <typename DeviceContext, typename T>
class AbsNPUKernel : public framework::OpKernel<T> {
public:
......
......@@ -21,8 +21,6 @@ limitations under the Licnse. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
template <cnnlActivationMode_t act_mode, typename T>
class ActivationMLUKernel : public framework::OpKernel<T> {
public:
......@@ -442,7 +440,7 @@ class ReciprocalGradMLUKernel : public framework::OpKernel<T> {
auto* dx = ctx.Output<phi::DenseTensor>(framework::GradVarName("X"));
auto place = ctx.GetPlace();
dx->mutable_data<T>(place);
Tensor square_out;
phi::DenseTensor square_out;
square_out.Resize(out->dims());
square_out.mutable_data<T>(place);
MLUCnnlTensorDesc out_desc(*out);
......
......@@ -28,8 +28,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
class AffineGridOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
......
......@@ -21,8 +21,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
template <typename DeviceContext, typename T>
class AllocFloatStatusKernel : public framework::OpKernel<T> {
public:
......
......@@ -19,8 +19,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
template <typename T>
class CheckFiniteAndUnscaleMLUKernel : public framework::OpKernel<T> {
using MPDType = typename details::MPTypeTrait<T>::Type;
......@@ -45,7 +43,7 @@ class CheckFiniteAndUnscaleMLUKernel : public framework::OpKernel<T> {
out->mutable_data<T>(ctx.GetPlace());
// check is_finite or is_nan
Tensor is_finite(found_inf->type());
phi::DenseTensor is_finite(found_inf->type());
if (i != 0) {
is_finite.Resize(phi::make_ddim({1}));
is_finite.mutable_data<bool>(ctx.GetPlace());
......@@ -78,8 +76,8 @@ class CheckFiniteAndUnscaleMLUKernel : public framework::OpKernel<T> {
// out = in/scale, if found_inf = false
// But when found_inf is true, the data of Out should not be used.
// So, on MLU, we always compute out with in/scale.
Tensor float_x;
Tensor float_out;
phi::DenseTensor float_x;
phi::DenseTensor float_out;
if (std::is_same<T, paddle::platform::float16>::value) {
float_x.Resize(x->dims());
float_out.Resize(out->dims());
......
......@@ -22,8 +22,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
// NOTE(zhiqiu): The CheckFiniteAndUnscaleNPUKernel is different from CUDA.
// On NPU, we do not really check the data of input tensors,
// but use NPUGetFloatStatus to check whether the nan/inf occurs on device,
......@@ -47,13 +45,13 @@ class CheckFiniteAndUnscaleNPUKernel : public framework::OpKernel<T> {
.stream();
// step1: inverse scale
Tensor const_tensor;
phi::DenseTensor const_tensor;
const_tensor.mutable_data<T>({1}, ctx.GetPlace());
FillNpuTensorWithConstant<T>(&const_tensor, static_cast<T>(1.0));
// Inverse(1.0/scale)
phi::DenseTensor* tmp_inverse_out = const_cast<phi::DenseTensor*>(scale);
Tensor inverse_out(scale->type());
phi::DenseTensor inverse_out(scale->type());
inverse_out.Resize(scale->dims());
inverse_out.mutable_data<T>(ctx.GetPlace());
const auto& runner_inverse =
......@@ -62,7 +60,7 @@ class CheckFiniteAndUnscaleNPUKernel : public framework::OpKernel<T> {
tmp_inverse_out = &inverse_out;
// NOTE(zhiqiu):
Tensor tmp;
phi::DenseTensor tmp;
tmp.mutable_data<float>({8}, ctx.GetPlace());
// NOTE(zhiqiu): NPUGetFloatStatus updates data on input in-place.
// tmp is only placeholder.
......@@ -73,7 +71,7 @@ class CheckFiniteAndUnscaleNPUKernel : public framework::OpKernel<T> {
{{"message", std::string("check_nan_and_inf")}});
runner_float_status.Run(stream);
Tensor sum;
phi::DenseTensor sum;
sum.mutable_data<float>({1}, ctx.GetPlace());
const auto& runner_reduce_sum =
NpuOpRunner("ReduceSumD",
......
......@@ -31,8 +31,6 @@ limitations under the License. */
namespace f = paddle::framework;
namespace p = paddle::platform;
using Tensor = phi::DenseTensor;
USE_OP_ITSELF(check_finite_and_unscale);
USE_OP_DEVICE_KERNEL(check_finite_and_unscale, NPU);
......@@ -110,7 +108,7 @@ void Compare(f::Scope *scope, const p::DeviceContext &ctx) {
ctx.Wait();
// out found_inf
Tensor found_inf_tensor;
phi::DenseTensor found_inf_tensor;
found_inf_tensor.Resize({1});
bool *found_inf_data =
found_inf_tensor.mutable_data<bool>(paddle::platform::CPUPlace());
......
......@@ -21,8 +21,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
template <typename DeviceContext, typename T>
class ClearFloatStatusKernel : public framework::OpKernel<T> {
public:
......@@ -35,7 +33,7 @@ class ClearFloatStatusKernel : public framework::OpKernel<T> {
platform::errors::PreconditionNotMet(
"The input(FloatStatus) and Output(FloatStatusOut) "
"should be the same."));
Tensor tmp;
phi::DenseTensor tmp;
tmp.mutable_data<float>({8}, ctx.GetPlace());
const auto& runner =
NpuOpRunner("NPUClearFloatStatus", {tmp}, {*float_status_out});
......
......@@ -21,8 +21,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
template <typename DeviceContext, typename T>
class GetFloatStatusKernel : public framework::OpKernel<T> {
public:
......@@ -35,7 +33,7 @@ class GetFloatStatusKernel : public framework::OpKernel<T> {
platform::errors::PreconditionNotMet(
"The input(FloatStatus) and Output(FloatStatusOut) "
"should be the same."));
Tensor tmp;
phi::DenseTensor tmp;
tmp.mutable_data<float>({8}, ctx.GetPlace());
auto stream =
ctx.template device_context<paddle::platform::NPUDeviceContext>()
......
......@@ -25,8 +25,6 @@ DECLARE_int32(min_loss_scaling);
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
template <typename T>
void Update(const platform::NPUDeviceContext& ctx,
const std::vector<bool> found_inf_vec,
......@@ -50,7 +48,7 @@ void Update(const platform::NPUDeviceContext& ctx,
good_out_tensor->numel() * sizeof(int),
stream);
// bad_out_data = bad_in_data + 1
Tensor factor_tensor(bad_out_tensor->dtype());
phi::DenseTensor factor_tensor(bad_out_tensor->dtype());
factor_tensor.mutable_data<int>({1}, place);
FillNpuTensorWithConstant<int>(&factor_tensor, static_cast<int>(1));
const auto& runner_p2 = NpuOpRunner(
......@@ -106,7 +104,7 @@ void Update(const platform::NPUDeviceContext& ctx,
stream);
// good_out_data = good_in_data + 1
Tensor factor_tensor(good_out_tensor->dtype());
phi::DenseTensor factor_tensor(good_out_tensor->dtype());
factor_tensor.mutable_data<int>({1}, place);
FillNpuTensorWithConstant<int>(&factor_tensor, static_cast<int>(1));
const auto& runner_p2 = NpuOpRunner(
......
......@@ -18,7 +18,6 @@ limitations under the Licnse. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
using NPUDeviceContext = platform::NPUDeviceContext;
template <typename T>
......@@ -36,7 +35,7 @@ struct VisitDataArgNPUMaxFunctor {
auto dtype = ctx.Attr<int>("dtype");
const bool& flatten = ctx.Attr<bool>("flatten");
Tensor transformed_x(x.type());
phi::DenseTensor transformed_x(x.type());
transformed_x.ShareDataWith(x);
if (flatten) {
transformed_x.Resize(phi::make_ddim({x.numel()}));
......
......@@ -17,7 +17,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
template <typename DeviceContext, typename T>
class ArgMinNPUKernel : public framework::OpKernel<T> {
......
......@@ -18,7 +18,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
using NPUDeviceContext = platform::NPUDeviceContext;
template <typename T>
......@@ -79,16 +78,16 @@ class ArgsortNPUKernel : public framework::OpKernel<T> {
framework::NPUAttributeMap attr = {{"axis", -1},
{"descending", descending}};
Tensor indices_tmp(experimental::DataType::INT32);
phi::DenseTensor indices_tmp(experimental::DataType::INT32);
indices_tmp.Resize(indices->dims());
if (framework::TransToProtoVarType(input->dtype()) ==
framework::proto::VarType::INT64) {
Tensor input_fp32(experimental::DataType::FLOAT32);
phi::DenseTensor input_fp32(experimental::DataType::FLOAT32);
input_fp32.Resize(input->dims());
CastToFP32(ctx, stream, *input, &input_fp32);
Tensor output_fp32(experimental::DataType::FLOAT32);
phi::DenseTensor output_fp32(experimental::DataType::FLOAT32);
output_fp32.Resize(output->dims());
if (axis == -1 || axis + 1 == in_dims.size()) {
......@@ -112,12 +111,12 @@ class ArgsortNPUKernel : public framework::OpKernel<T> {
}
auto trans_dims = phi::make_ddim(shape);
Tensor trans_input(input_fp32.type());
phi::DenseTensor trans_input(input_fp32.type());
trans_input.Resize(trans_dims);
TranposeNPU<float>(ctx, stream, &perm, input_fp32, &trans_input);
Tensor trans_output(input_fp32.type());
Tensor trans_indices(experimental::DataType::INT32);
phi::DenseTensor trans_output(input_fp32.type());
phi::DenseTensor trans_indices(experimental::DataType::INT32);
trans_output.mutable_data<float>(trans_dims, ctx.GetPlace());
trans_indices.mutable_data<int32_t>(trans_dims, ctx.GetPlace());
......@@ -150,12 +149,12 @@ class ArgsortNPUKernel : public framework::OpKernel<T> {
}
auto trans_dims = phi::make_ddim(shape);
Tensor trans_input(input->type());
phi::DenseTensor trans_input(input->type());
trans_input.Resize(trans_dims);
TranposeNPU<T>(ctx, stream, &perm, *input, &trans_input);
Tensor trans_output(input->type());
Tensor trans_indices(experimental::DataType::INT32);
phi::DenseTensor trans_output(input->type());
phi::DenseTensor trans_indices(experimental::DataType::INT32);
trans_output.mutable_data<T>(trans_dims, ctx.GetPlace());
trans_indices.mutable_data<int32_t>(trans_dims, ctx.GetPlace());
......@@ -183,12 +182,12 @@ static void FullAssignNPU(const framework::ExecutionContext& ctx,
phi::product(phi::slice_ddim(in_dims, 0, in_dims.size() - 1));
const int64_t input_width = in_dims[in_dims.size() - 1];
Tensor input_tmp;
phi::DenseTensor input_tmp;
input_tmp.ShareDataWith(input);
input_tmp.Resize(
phi::make_ddim(std::vector<int64_t>{input_height * input_width}));
Tensor indices_tmp;
phi::DenseTensor indices_tmp;
indices_tmp.ShareDataWith(indices);
indices_tmp.Resize(
phi::make_ddim(std::vector<int64_t>{input_height, input_width}));
......@@ -197,12 +196,12 @@ static void FullAssignNPU(const framework::ExecutionContext& ctx,
for (Type i = 0; i < input_height; i++) {
indexs_value.push_back(i * input_width);
}
Tensor indexs_tmp(indices.type());
phi::DenseTensor indexs_tmp(indices.type());
framework::TensorFromVector<int64_t>(
indexs_value, ctx.device_context(), &indexs_tmp);
indexs_tmp.Resize(phi::make_ddim(std::vector<int64_t>{input_height, 1}));
Tensor indices_index(indices.type());
phi::DenseTensor indices_index(indices.type());
indices_index.mutable_data<int64_t>(indices_tmp.dims(), ctx.GetPlace());
const auto& runner_add =
NpuOpRunner("Add", {indices_tmp, indexs_tmp}, {indices_index}, {});
......@@ -212,7 +211,7 @@ static void FullAssignNPU(const framework::ExecutionContext& ctx,
phi::make_ddim(std::vector<int64_t>{input_height * input_width}));
t_out->mutable_data<T>(ctx.GetPlace());
Tensor out_tmp(t_out->type());
phi::DenseTensor out_tmp(t_out->type());
out_tmp.ShareDataWith(*t_out);
const auto& runner = NpuOpRunner("TensorScatterUpdate",
......@@ -252,15 +251,15 @@ class ArgsortGradNPUKernel : public framework::OpKernel<T> {
}
auto trans_dims = phi::make_ddim(shape);
Tensor trans_dout(dO->type());
Tensor trans_ids(indices->type());
phi::DenseTensor trans_dout(dO->type());
phi::DenseTensor trans_ids(indices->type());
trans_dout.Resize(trans_dims);
trans_ids.Resize(trans_dims);
TranposeNPU<T>(ctx, stream, &perm, *dO, &trans_dout);
TranposeNPU<int64_t>(ctx, stream, &perm, *indices, &trans_ids);
Tensor trans_dx(dO->type());
phi::DenseTensor trans_dx(dO->type());
trans_dx.Resize(trans_dims);
FullAssignNPU<T, int64_t>(
ctx, stream, trans_dims, trans_dout, trans_ids, &trans_dx);
......
......@@ -212,39 +212,41 @@ void AttentionLSTMOpMaker::Make() {
"this phi::DenseTensor is a matrix with shape (T X M), where T is the "
"total time steps in this mini-batch, M is the dim size of x.");
AddInput("C0",
"(Tensor) LSTM C0"
"(phi::DenseTensor) LSTM C0"
"This is a tensor with shape (N x D), where N is the batch size, D "
"is the gate size."
"C0 is necessary because of attention.");
AddInput("H0",
"(Tensor, optional) LSTM H0"
"(phi::DenseTensor, optional) LSTM H0"
"This is a tensor with shape (N x D), where N is the "
"batch size and D is the gate size.")
.AsDispensable();
AddInput("AttentionWeight",
"(Tensor) the weights of attention fc. Always relu the fc result."
"(phi::DenseTensor) the weights of attention fc. Always relu the fc "
"result."
"The shape is ((M+D) x 1), where M is the dim size of x, D is the "
"gate size of LSTM.");
AddInput("AttentionBias",
"(Tensor, optional) the bias of attention fc."
"(phi::DenseTensor, optional) the bias of attention fc."
"The shape is (1 x 1)")
.AsDispensable();
AddInput("AttentionScalar",
"(Tensor, optional) the scalar on the result of attentioned fc. "
"(phi::DenseTensor, optional) the scalar on the result of "
"attentioned fc. "
"Always relu the Scalar."
"The shape is (1 x 1)")
.AsDispensable();
AddInput("AttentionScalarBias",
"(Tensor, optional) the scalar bias of attention fc."
"(phi::DenseTensor, optional) the scalar bias of attention fc."
"The shape is (1 x 1)")
.AsDispensable();
AddInput("LSTMWeight",
"(Tensor) the combined weight of LSTM"
"(phi::DenseTensor) the combined weight of LSTM"
" - The shape is ((D+M) x 4D), where D is the hidden gate size, M "
"is the dim size of x"
" - Weight = {W_forget, W_input, W_output, W_cell}");
AddInput("LSTMBias",
"(Tensor) the combined bias of LSTM, shape (1x4D)."
"(phi::DenseTensor) the combined bias of LSTM, shape (1x4D)."
"Note: we should add the bias of hidden and context accorindg to "
"the same gate: "
"{B_forget, B_input, B_output, B_cell}");
......@@ -257,21 +259,22 @@ void AttentionLSTMOpMaker::Make() {
"(phi::DenseTensor) (same as LSTMOp) the cell state of LSTM operator. "
"The shape is (T x D), and lod is the same with the `Input`.");
AddOutput("AttentionedX",
"(Tensor) shape is (T x 1), the result after X * AttentionWeight,"
"(phi::DenseTensor) shape is (T x 1), the result after X * "
"AttentionWeight,"
" where T is the total time steps in this mini-batch,"
" D is the hidden size.")
.AsIntermediate();
AddOutput("AttentionFCOut",
"(Tensor) (max_seq_len, 1), compute at each step.")
"(phi::DenseTensor) (max_seq_len, 1), compute at each step.")
.AsIntermediate();
AddOutput("LSTMX",
"(Tensor) the input X of LSTM for each step."
"(phi::DenseTensor) the input X of LSTM for each step."
"Shape is (1 x M), where M is the x frame size")
.AsIntermediate();
AddOutput(
"LSTMOUT",
"(Tensor) the output of LSTM X(1*(D+M))* weight((D+M)*4D) for each step."
"Shape is (1 x 4D), where M is the x frame size")
AddOutput("LSTMOUT",
"(phi::DenseTensor) the output of LSTM X(1*(D+M))* "
"weight((D+M)*4D) for each step."
"Shape is (1 x 4D), where M is the x frame size")
.AsIntermediate();
AddAttr<std::string>("gate_activation",
"(string, default: sigmoid)"
......
......@@ -18,8 +18,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
class AttentionLSTMOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
......
......@@ -207,7 +207,7 @@ framework::OpKernelType BatchNormOp::GetExpectedKernelType(
framework::OpKernelType BatchNormOp::GetKernelTypeForVar(
const std::string &var_name,
const Tensor &tensor,
const phi::DenseTensor &tensor,
const framework::OpKernelType &expected_kernel_type) const {
#ifdef PADDLE_WITH_MKLDNN
// Only input require reshaping, weights and
......@@ -265,7 +265,7 @@ void BatchNormOpMaker::Make() {
"The global variance (for training) "
"or estimated Variance (for testing)");
AddInput("MomentumTensor",
"(Tensor<float32>, optional) If provided, batch_norm will "
"(phi::DenseTensor<float32>, optional) If provided, batch_norm will "
"use this as momentum, this has a higher priority than "
"attr(momentum), the shape of this tensor MUST BE [1].")
.AsDispensable();
......@@ -380,9 +380,9 @@ framework::OpKernelType BatchNormGradOp::GetExpectedKernelType(
PADDLE_THROW(
platform::errors::InvalidArgument("can't find gradient variable of Y"));
}
const Tensor *t = nullptr;
if (var->IsType<Tensor>()) {
t = &var->Get<Tensor>();
const phi::DenseTensor *t = nullptr;
if (var->IsType<phi::DenseTensor>()) {
t = &var->Get<phi::DenseTensor>();
} else if (var->IsType<phi::DenseTensor>()) {
t = &var->Get<phi::DenseTensor>();
}
......@@ -397,7 +397,7 @@ framework::OpKernelType BatchNormGradOp::GetExpectedKernelType(
framework::OpKernelType BatchNormGradOp::GetKernelTypeForVar(
const std::string &var_name,
const Tensor &tensor,
const phi::DenseTensor &tensor,
const framework::OpKernelType &expected_kernel_type) const {
#ifdef PADDLE_WITH_MKLDNN
// Only input require reshaping, weights and
......@@ -522,9 +522,9 @@ framework::OpKernelType BatchNormDoubleGradOp::GetExpectedKernelType(
PADDLE_THROW(
platform::errors::NotFound("cannot find gradient variable of Y"));
}
const Tensor *t = nullptr;
if (var->IsType<Tensor>()) {
t = &var->Get<Tensor>();
const phi::DenseTensor *t = nullptr;
if (var->IsType<phi::DenseTensor>()) {
t = &var->Get<phi::DenseTensor>();
} else if (var->IsType<phi::DenseTensor>()) {
t = &var->Get<phi::DenseTensor>();
}
......
......@@ -34,7 +34,6 @@ DECLARE_bool(cudnn_batchnorm_spatial_persistent);
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
using DataLayout = phi::DataLayout;
template <typename T>
using CudnnDataType = platform::CudnnDataType<T>;
......
......@@ -27,7 +27,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
using DataLayout = phi::DataLayout;
template <typename T>
......
......@@ -78,8 +78,8 @@ class MLUBatchNormOpKernel : public framework::OpKernel<T> {
saved_mean->mutable_data<MPDType>(place);
saved_variance->mutable_data<MPDType>(place);
Tensor transformed_x;
Tensor transformed_y;
phi::DenseTensor transformed_x;
phi::DenseTensor transformed_y;
const int transformed_dim_size = 4;
const int transformed_shape[transformed_dim_size] = {N, sample_size, 1, C};
MLUCnnlTensorDesc transformed_desc(transformed_dim_size,
......@@ -116,7 +116,7 @@ class MLUBatchNormOpKernel : public framework::OpKernel<T> {
if (ctx.HasInput("MomentumTensor")) {
const auto *mom_tensor = ctx.Input<phi::DenseTensor>("MomentumTensor");
Tensor mom_cpu;
phi::DenseTensor mom_cpu;
framework::TensorCopySync(*mom_tensor, platform::CPUPlace(), &mom_cpu);
momentum = mom_cpu.data<float>()[0];
}
......@@ -226,9 +226,9 @@ class MLUBatchNormGradOpKernel : public framework::OpKernel<T> {
: x_dims[x_dims.size() - 1]);
const int sample_size = x->numel() / N / C;
Tensor transformed_d_y;
Tensor transformed_x;
Tensor transformed_d_x;
phi::DenseTensor transformed_d_y;
phi::DenseTensor transformed_x;
phi::DenseTensor transformed_d_x;
const int transformed_dim_size = 4;
const int transformed_shape[transformed_dim_size] = {N, sample_size, 1, C};
......
......@@ -89,7 +89,7 @@ class NPUBatchNormOpKernel : public framework::OpKernel<T> {
// is only used in this training branch
if (ctx.HasInput("MomentumTensor")) {
const auto *mom_tensor = ctx.Input<phi::DenseTensor>("MomentumTensor");
Tensor mom_cpu;
phi::DenseTensor mom_cpu;
paddle::framework::TensorCopySync(
*mom_tensor, platform::CPUPlace(), &mom_cpu);
momentum = mom_cpu.data<float>()[0];
......
......@@ -18,8 +18,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
template <typename T>
class BCELossMLUKernel : public framework::OpKernel<T> {
public:
......
......@@ -18,8 +18,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
template <typename DeviceContext, typename T>
class BCELossNPUKernel : public framework::OpKernel<T> {
public:
......
......@@ -40,7 +40,7 @@ class CastOpProtoMaker : public framework::OpProtoAndCheckerMaker {
Cast Operator.
This Operator casts the input tensor to another data type and
returns the Output Tensor. It's meaningless if the output dtype equals
returns the Output phi::DenseTensor. It's meaningless if the output dtype equals
the input dtype, but it's fine if you do so.
)DOC");
......
......@@ -19,8 +19,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
template <typename T>
class CastMLUKernel : public framework::OpKernel<T> {
public:
......
......@@ -32,8 +32,6 @@ static std::map<framework::proto::VarType::Type, aclDataType>
{framework::proto::VarType::FP64, ACL_DOUBLE},
};
using Tensor = phi::DenseTensor;
template <typename DeviceContext, typename T>
class CastNPUKernel : public framework::OpKernel<T> {
public:
......
......@@ -26,7 +26,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
template <typename T,
int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
......@@ -81,7 +80,7 @@ class CenterLossKernel : public framework::OpKernel<T> {
auto loss_data = out_loss->mutable_data<T>(ctx.GetPlace());
Tensor centers_diffacc; // used to accumulate all diff
phi::DenseTensor centers_diffacc; // used to accumulate all diff
auto centers_diffacc_data =
centers_diffacc.mutable_data<T>(centers_dim, ctx.GetPlace());
int numel = centers_diffacc.numel();
......
......@@ -23,7 +23,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
// using SelectedRows = phi::SelectedRows;
template <typename T,
int MajorType = Eigen::RowMajor,
......
......@@ -18,8 +18,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
template <typename DeviceContext, typename T>
class NPUClipByNormKernel : public framework::OpKernel<T> {
public:
......@@ -48,7 +46,7 @@ class NPUClipByNormKernel : public framework::OpKernel<T> {
"Input(X) of ClipByNormOp should not be null. "
"Please check if it is created correctly."));
Tensor square_sum(input->type());
phi::DenseTensor square_sum(input->type());
square_sum.mutable_data<T>(framework::DDim({1}), place);
const auto& x_dims = input->dims();
std::vector<int> axis;
......@@ -62,12 +60,12 @@ class NPUClipByNormKernel : public framework::OpKernel<T> {
{{"axis", axis}, {"keep_dims", false}});
square_sum_runner.Run(stream);
Tensor x_norm(input->type());
phi::DenseTensor x_norm(input->type());
x_norm.mutable_data<T>(framework::DDim({1}), place);
const auto& x_norm_runner = NpuOpRunner("Sqrt", {square_sum}, {x_norm}, {});
x_norm_runner.Run(stream);
Tensor x_norm_t;
phi::DenseTensor x_norm_t;
framework::TensorCopySync(x_norm, platform::CPUPlace(), &x_norm_t);
auto x_norm_v = static_cast<float>(*x_norm_t.data<T>());
if (x_norm_v <= max_norm) {
......
......@@ -29,7 +29,7 @@ class ClipMLUKernel : public framework::OpKernel<T> {
auto max = static_cast<T>(ctx.Attr<float>("max"));
if (ctx.HasInput("Min")) {
Tensor min_cpu;
phi::DenseTensor min_cpu;
auto* min_tensor = ctx.Input<phi::DenseTensor>("Min");
auto* min_data = min_tensor->data<T>();
if (platform::is_mlu_place(min_tensor->place())) {
......@@ -41,7 +41,7 @@ class ClipMLUKernel : public framework::OpKernel<T> {
}
if (ctx.HasInput("Max")) {
Tensor max_cpu;
phi::DenseTensor max_cpu;
auto* max_tensor = ctx.Input<phi::DenseTensor>("Max");
auto* max_data = max_tensor->data<T>();
if (platform::is_mlu_place(max_tensor->place())) {
......@@ -80,7 +80,7 @@ class ClipGradMLUKernel : public framework::OpKernel<T> {
auto min_val = ctx.Attr<float>("min");
if (min_tensor) {
Tensor min_data;
phi::DenseTensor min_data;
framework::TensorCopy(
*min_tensor,
platform::CPUPlace(),
......@@ -91,7 +91,7 @@ class ClipGradMLUKernel : public framework::OpKernel<T> {
}
auto max_val = ctx.Attr<float>("max");
if (max_tensor) {
Tensor max_data;
phi::DenseTensor max_data;
framework::TensorCopy(
*max_tensor,
platform::CPUPlace(),
......
......@@ -18,8 +18,6 @@
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
template <typename DeviceContext, typename T>
class ClipNPUKernel : public framework::OpKernel<T> {
public:
......@@ -33,8 +31,8 @@ class ClipNPUKernel : public framework::OpKernel<T> {
auto max_tensor =
ctx.HasInput("Max") ? ctx.Input<phi::DenseTensor>("Max") : nullptr;
Tensor min_tensor_temp(x->type());
Tensor max_tensor_temp(x->type());
phi::DenseTensor min_tensor_temp(x->type());
phi::DenseTensor max_tensor_temp(x->type());
if (min_tensor == nullptr) {
auto min_value = static_cast<T>(ctx.Attr<float>("min"));
min_tensor_temp.mutable_data<T>({1}, ctx.GetPlace());
......@@ -74,7 +72,7 @@ class ClipGradNPUKernel : public framework::OpKernel<T> {
auto min_val = ctx.Attr<float>("min");
if (min_tensor) {
Tensor min_data;
phi::DenseTensor min_data;
framework::TensorCopy(
*min_tensor,
platform::CPUPlace(),
......@@ -86,7 +84,7 @@ class ClipGradNPUKernel : public framework::OpKernel<T> {
auto max_val = ctx.Attr<float>("max");
if (max_tensor) {
Tensor max_data;
phi::DenseTensor max_data;
framework::TensorCopy(
*max_tensor,
platform::CPUPlace(),
......
......@@ -61,7 +61,7 @@ struct FillConstantVisitor {
* = nullptr) const {
#ifdef PADDLE_WITH_ASCEND_CL
if (platform::is_npu_place(dev_ctx_.GetPlace())) {
Tensor tensor_tmp(framework::TransToPhiDataType(dtype_));
phi::DenseTensor tensor_tmp(framework::TransToPhiDataType(dtype_));
tensor_tmp.mutable_data<T>({1}, context_.GetPlace());
FillNpuTensorWithConstant<T>(&tensor_tmp, static_cast<T>(value_));
......
......@@ -151,10 +151,9 @@ class CAllReduceOpCPUKernel : public framework::OpKernel<T> {
inline bool ContainsNan(const paddle::platform::NPUDeviceContext& dev_ctx,
aclrtStream stream,
const phi::DenseTensor* in) {
using Tensor = phi::DenseTensor;
Tensor out(in->type());
phi::DenseTensor out(in->type());
Tensor mean(in->type());
phi::DenseTensor mean(in->type());
mean.Resize({1});
mean.mutable_data<float>(dev_ctx.GetPlace());
std::vector<int> axes;
......
......@@ -24,8 +24,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
static constexpr int kNumCUDAThreads = 512;
static constexpr int kNumMaxinumNumBlocks = 4096;
......@@ -126,7 +124,7 @@ struct CSoftmaxWithCrossEntropyFunctor<phi::GPUContext, T> {
const int N = phi::funcs::SizeToAxis(axis, logits_dims);
const int D = phi::funcs::SizeFromAxis(axis, logits_dims);
Tensor logits_2d, softmax_2d, loss_2d;
phi::DenseTensor logits_2d, softmax_2d, loss_2d;
logits_2d.ShareDataWith(*logits).Resize({N, D});
softmax_2d.ShareDataWith(*softmax).Resize({N, D});
loss_2d.ShareDataWith(*loss).Resize({N, 1});
......@@ -135,7 +133,7 @@ struct CSoftmaxWithCrossEntropyFunctor<phi::GPUContext, T> {
auto eigen_softmax = math::EigenMatrix<T>::From(softmax_2d);
// step 1, obtain logit_max
Tensor logits_max;
phi::DenseTensor logits_max;
logits_max = ctx.AllocateTmpTensor<T, phi::GPUContext>({N, 1}, dev_ctx);
void* logits_max_buff = logits_max.mutable_data<T>(place);
......@@ -163,7 +161,7 @@ struct CSoftmaxWithCrossEntropyFunctor<phi::GPUContext, T> {
.unaryExpr(math::ValueClip<T>());
// step 3, obtain predict target
Tensor predicted_logits;
phi::DenseTensor predicted_logits;
predicted_logits =
ctx.AllocateTmpTensor<T, phi::GPUContext>({N, 1}, dev_ctx);
predicted_logits.mutable_data<T>(place);
......@@ -215,7 +213,7 @@ struct CSoftmaxWithCrossEntropyFunctor<phi::GPUContext, T> {
eigen_softmax.device(*dev_ctx.eigen_device()) = eigen_softmax.exp();
// step 5, obtain sum_exp_logits
Tensor sum_exp_logits;
phi::DenseTensor sum_exp_logits;
sum_exp_logits = ctx.AllocateTmpTensor<T, phi::GPUContext>({N, 1}, dev_ctx);
void* sum_exp_logits_buff = sum_exp_logits.mutable_data<T>(place);
......@@ -278,7 +276,7 @@ struct CSoftmaxWithCrossEntropyProcessGroupFunctor<phi::GPUContext, T> {
const int N = phi::funcs::SizeToAxis(axis, logits_dims);
const int D = phi::funcs::SizeFromAxis(axis, logits_dims);
Tensor logits_2d, softmax_2d, loss_2d;
phi::DenseTensor logits_2d, softmax_2d, loss_2d;
logits_2d.ShareDataWith(*logits).Resize({N, D});
softmax_2d.ShareDataWith(*softmax).Resize({N, D});
loss_2d.ShareDataWith(*loss).Resize({N, 1});
......@@ -287,7 +285,7 @@ struct CSoftmaxWithCrossEntropyProcessGroupFunctor<phi::GPUContext, T> {
auto eigen_softmax = math::EigenMatrix<T>::From(softmax_2d);
// step 1, obtain logit_max
Tensor logits_max;
phi::DenseTensor logits_max;
logits_max = ctx.AllocateTmpTensor<T, phi::GPUContext>({N, 1}, dev_ctx);
auto eigen_logits_max = math::EigenMatrix<T>::From(logits_max);
......@@ -309,7 +307,7 @@ struct CSoftmaxWithCrossEntropyProcessGroupFunctor<phi::GPUContext, T> {
.unaryExpr(math::ValueClip<T>());
// step 3, obtain predict target
Tensor predicted_logits;
phi::DenseTensor predicted_logits;
predicted_logits =
ctx.AllocateTmpTensor<T, phi::GPUContext>({N, 1}, dev_ctx);
predicted_logits.mutable_data<T>(place);
......@@ -355,7 +353,7 @@ struct CSoftmaxWithCrossEntropyProcessGroupFunctor<phi::GPUContext, T> {
eigen_softmax.device(*dev_ctx.eigen_device()) = eigen_softmax.exp();
// step 5, obtain sum_exp_logits
Tensor sum_exp_logits;
phi::DenseTensor sum_exp_logits;
sum_exp_logits = ctx.AllocateTmpTensor<T, phi::GPUContext>({N, 1}, dev_ctx);
void* sum_exp_logits_buff = sum_exp_logits.mutable_data<T>(place);
......@@ -405,7 +403,7 @@ class CSoftmaxWithCrossEntropyGradCUDAKernel : public framework::OpKernel<T> {
const int N = phi::funcs::SizeToAxis(axis, sofrmax_dims);
const int D = phi::funcs::SizeFromAxis(axis, sofrmax_dims);
Tensor logit_grad_2d;
phi::DenseTensor logit_grad_2d;
logit_grad_2d.ShareDataWith(*logit_grad).Resize({N, D});
int blocks = NumBlocks(N * D);
......
......@@ -26,7 +26,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
class ConcatOp : public framework::OperatorWithKernel {
public:
......
......@@ -119,7 +119,7 @@ class ConcatGradMLUKernel : public framework::OpKernel<T> {
out_grad->dims().size()));
// get output tensor that the name is not kEmptyVarName
std::vector<void*> outputs_vec;
std::vector<Tensor> tmp_outputs_vec;
std::vector<phi::DenseTensor> tmp_outputs_vec;
std::vector<MLUCnnlTensorDesc> output_descs;
std::vector<cnnlTensorDescriptor_t> descs_vec;
for (size_t j = 0; j < outs.size(); ++j) {
......@@ -129,7 +129,7 @@ class ConcatGradMLUKernel : public framework::OpKernel<T> {
output_descs.emplace_back(MLUCnnlTensorDesc(*outs[j]));
outputs_vec.push_back(GetBasePtr(outs[j]));
} else {
Tensor tmp_tensor;
phi::DenseTensor tmp_tensor;
tmp_tensor.mutable_data<T>(ins[j]->dims(), ctx.GetPlace());
tmp_outputs_vec.push_back(tmp_tensor);
output_descs.emplace_back(MLUCnnlTensorDesc(*ins[j]));
......
......@@ -18,8 +18,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
template <typename T, cnnlLogicOp_t log_method>
class LogicalMLUKernel : public framework::OpKernel<T> {
public:
......
......@@ -15,8 +15,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
template <typename DeviceContext, typename T>
class LogicalNotNPUKernel : public framework::OpKernel<T> {
public:
......
......@@ -29,8 +29,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
// Base convolution operator definations for other conv
// like operators to reuse the implementation.
inline int ConvOutputSize(
......
......@@ -18,7 +18,6 @@
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
using DataLayout = phi::DataLayout;
template <typename T>
......@@ -56,8 +55,8 @@ class MLUConvOpKernel : public framework::OpKernel<T> {
UpdatePaddingAndDilation(
&paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize);
Tensor input_tensor(input->type());
Tensor output_tensor(output->type());
phi::DenseTensor input_tensor(input->type());
phi::DenseTensor output_tensor(output->type());
const std::vector<int> perm_to_nhwc = {0, 2, 3, 1};
if (channel_last) {
input_tensor.ShareDataWith(*input);
......@@ -78,7 +77,7 @@ class MLUConvOpKernel : public framework::OpKernel<T> {
output_tensor.set_layout(DataLayout::kNHWC);
// transpose filter from MCHW to MHWC
Tensor trans_filter(filter->type());
phi::DenseTensor trans_filter(filter->type());
TransposeFromMLUTensor<T>(ctx,
perm_to_nhwc,
filter,
......@@ -166,8 +165,8 @@ class MLUConvGradOpKernel : public framework::OpKernel<T> {
UpdatePaddingAndDilation(
&paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize);
Tensor input_tensor(input->type());
Tensor output_grad_tensor(output_grad->type());
phi::DenseTensor input_tensor(input->type());
phi::DenseTensor output_grad_tensor(output_grad->type());
const std::vector<int> perm_to_nhwc = {0, 2, 3, 1};
const std::vector<int> perm_to_nchw = {0, 3, 1, 2};
if (channel_last) {
......@@ -193,7 +192,7 @@ class MLUConvGradOpKernel : public framework::OpKernel<T> {
filter_grad->mutable_data<T>(ctx.GetPlace());
auto filter_grad_dims = filter_grad->dims();
Tensor temp_filter_grad(filter_grad->type());
phi::DenseTensor temp_filter_grad(filter_grad->type());
temp_filter_grad.mutable_data<T>({filter_grad_dims[0],
filter_grad_dims[2],
filter_grad_dims[3],
......@@ -234,7 +233,7 @@ class MLUConvGradOpKernel : public framework::OpKernel<T> {
if (input_grad) {
input_grad->mutable_data<T>(ctx.GetPlace());
Tensor input_grad_tensor(input_grad->type());
phi::DenseTensor input_grad_tensor(input_grad->type());
if (channel_last) {
input_grad_tensor.ShareDataWith(*input_grad);
} else {
......@@ -248,7 +247,7 @@ class MLUConvGradOpKernel : public framework::OpKernel<T> {
input_grad_tensor.set_layout(DataLayout::kNHWC);
// transpose filter from MCHW to MHWC
Tensor trans_filter(filter->type());
phi::DenseTensor trans_filter(filter->type());
TransposeFromMLUTensor<T>(ctx,
perm_to_nhwc,
filter,
......@@ -326,8 +325,8 @@ class MLUDepthwiseConvOpKernel : public framework::OpKernel<T> {
UpdatePaddingAndDilation(
&paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize);
Tensor input_tensor(input->type());
Tensor output_tensor(output->type());
phi::DenseTensor input_tensor(input->type());
phi::DenseTensor output_tensor(output->type());
const std::vector<int> perm_to_nhwc = {0, 2, 3, 1};
if (channel_last) {
groups = in_dims[3];
......@@ -350,7 +349,7 @@ class MLUDepthwiseConvOpKernel : public framework::OpKernel<T> {
output_tensor.set_layout(DataLayout::kNHWC);
// transpose filter from MCHW to MHWC
Tensor trans_filter(filter->type());
phi::DenseTensor trans_filter(filter->type());
TransposeFromMLUTensor<T>(ctx,
perm_to_nhwc,
filter,
......@@ -438,8 +437,8 @@ class MLUDepthwiseConvGradOpKernel : public framework::OpKernel<T> {
UpdatePaddingAndDilation(
&paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize);
Tensor input_tensor(input->type());
Tensor output_grad_tensor(output_grad->type());
phi::DenseTensor input_tensor(input->type());
phi::DenseTensor output_grad_tensor(output_grad->type());
const std::vector<int> perm_to_nhwc = {0, 2, 3, 1};
const std::vector<int> perm_to_nchw = {0, 3, 1, 2};
const std::vector<int> perm_hwcm_to_mchw = {3, 2, 0, 1};
......@@ -469,7 +468,7 @@ class MLUDepthwiseConvGradOpKernel : public framework::OpKernel<T> {
filter_grad->mutable_data<T>(ctx.GetPlace());
auto filter_grad_dims = filter_grad->dims();
Tensor temp_filter_grad(filter_grad->type());
phi::DenseTensor temp_filter_grad(filter_grad->type());
// Details about setting diff_w hwcn for better performance, see the CNNL
// documentation.
temp_filter_grad.mutable_data<T>({filter_grad_dims[perm_mchw_to_hwcm[0]],
......@@ -512,7 +511,7 @@ class MLUDepthwiseConvGradOpKernel : public framework::OpKernel<T> {
if (input_grad) {
input_grad->mutable_data<T>(ctx.GetPlace());
Tensor input_grad_tensor(input_grad->type());
phi::DenseTensor input_grad_tensor(input_grad->type());
if (channel_last) {
input_grad_tensor.ShareDataWith(*input_grad);
} else {
......@@ -526,7 +525,7 @@ class MLUDepthwiseConvGradOpKernel : public framework::OpKernel<T> {
input_grad_tensor.set_layout(DataLayout::kNHWC);
// transpose filter from MCHW to MHWC
Tensor trans_filter(filter->type());
phi::DenseTensor trans_filter(filter->type());
TransposeFromMLUTensor<T>(ctx,
perm_to_nhwc,
filter,
......
......@@ -18,7 +18,6 @@
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
using NPUDeviceContext = platform::NPUDeviceContext;
static void CastToFP16(const framework::ExecutionContext& ctx,
const aclrtStream& stream,
......@@ -104,7 +103,7 @@ class DepthwiseConvNPUKernel : public framework::OpKernel<T> {
std::vector<int> strides(4, 1);
std::vector<int> dilations(4, 1);
Tensor input_tensor, output_tensor;
phi::DenseTensor input_tensor, output_tensor;
input_tensor.ShareDataWith(*input);
output_tensor.ShareDataWith(*output);
......@@ -125,7 +124,7 @@ class DepthwiseConvNPUKernel : public framework::OpKernel<T> {
auto stream = ctx.template device_context<NPUDeviceContext>().stream();
// Transform filter (n, 1, h, w) --> (1, n, h, w)
Tensor transformed_filter(filter->type());
phi::DenseTensor transformed_filter(filter->type());
transformed_filter.mutable_data<T>({filter->dims()[1],
filter->dims()[0],
filter->dims()[2],
......@@ -189,7 +188,7 @@ class DepthwiseConvGradNPUKernel : public framework::OpKernel<T> {
auto stream = ctx.template device_context<NPUDeviceContext>().stream();
// Transform filter (n, 1, h, w) --> (1, n, h, w)
Tensor transformed_filter(filter->type());
phi::DenseTensor transformed_filter(filter->type());
transformed_filter.mutable_data<T>({filter->dims()[1],
filter->dims()[0],
filter->dims()[2],
......@@ -204,7 +203,7 @@ class DepthwiseConvGradNPUKernel : public framework::OpKernel<T> {
std::vector<int> strides(4, 1);
std::vector<int> dilations(4, 1);
Tensor input_tensor, output_grad_tensor;
phi::DenseTensor input_tensor, output_grad_tensor;
input_tensor.ShareDataWith(*input);
output_grad_tensor.ShareDataWith(*output_grad);
if (channel_last) {
......@@ -247,7 +246,7 @@ class DepthwiseConvGradNPUKernel : public framework::OpKernel<T> {
}
if (input_grad) {
input_grad->mutable_data<T>(ctx.GetPlace());
Tensor input_grad_tensor;
phi::DenseTensor input_grad_tensor;
input_grad_tensor.ShareDataWith(*input_grad);
if (channel_last) {
input_grad_tensor.set_layout(DataLayout::kNHWC);
......@@ -305,7 +304,7 @@ class NPUConvOpKernel : public framework::OpKernel<T> {
std::vector<int> strides_vec(4, 1);
std::vector<int> dilations_vec(4, 1);
Tensor input_tensor, output_tensor;
phi::DenseTensor input_tensor, output_tensor;
input_tensor.ShareDataWith(*input);
output_tensor.ShareDataWith(*output);
if (channel_last) {
......@@ -378,7 +377,7 @@ class NPUConvGradOpKernel : public framework::OpKernel<T> {
std::vector<int> strides_vec(4, 1);
std::vector<int> dilations_vec(4, 1);
Tensor input_tensor, output_grad_tensor;
phi::DenseTensor input_tensor, output_grad_tensor;
input_tensor.ShareDataWith(*input);
output_grad_tensor.ShareDataWith(*output_grad);
if (channel_last) {
......@@ -400,7 +399,7 @@ class NPUConvGradOpKernel : public framework::OpKernel<T> {
filter_grad->mutable_data<T>(ctx.GetPlace());
std::vector<int> filter_shape_vec = phi::vectorize<int>(filter->dims());
Tensor filter_grad_fp32(experimental::DataType::FLOAT32);
phi::DenseTensor filter_grad_fp32(experimental::DataType::FLOAT32);
filter_grad_fp32.Resize(filter_grad->dims());
if (framework::TransToProtoVarType(input->dtype()) ==
......@@ -430,7 +429,7 @@ class NPUConvGradOpKernel : public framework::OpKernel<T> {
input_grad->mutable_data<T>(ctx.GetPlace());
std::vector<int> input_shape_vec = phi::vectorize<int>(input->dims());
Tensor input_grad_tensor;
phi::DenseTensor input_grad_tensor;
input_grad_tensor.ShareDataWith(*input_grad);
if (channel_last) {
input_grad_tensor.set_layout(DataLayout::kNHWC);
......@@ -617,8 +616,9 @@ class NPUConv3dGradKernel : public framework::OpKernel<T> {
filter_grad->mutable_data<T>(ctx.GetPlace());
std::vector<int> filter_shape_vec = phi::vectorize<int>(filter->dims());
Tensor filter_grad_tensor = ctx.AllocateTmpTensor<T, NPUDeviceContext>(
filter_grad->dims(), dev_ctx);
phi::DenseTensor filter_grad_tensor =
ctx.AllocateTmpTensor<T, NPUDeviceContext>(filter_grad->dims(),
dev_ctx);
filter_grad_tensor.ShareDataWith(*filter_grad);
filter_grad_tensor.set_layout(DataLayout::kNCDHW);
......@@ -638,8 +638,9 @@ class NPUConv3dGradKernel : public framework::OpKernel<T> {
input_grad->mutable_data<T>(ctx.GetPlace());
std::vector<int> input_shape_vec = phi::vectorize<int>(input->dims());
Tensor input_grad_tensor = ctx.AllocateTmpTensor<T, NPUDeviceContext>(
input_grad->dims(), dev_ctx);
phi::DenseTensor input_grad_tensor =
ctx.AllocateTmpTensor<T, NPUDeviceContext>(input_grad->dims(),
dev_ctx);
input_grad_tensor.ShareDataWith(*input_grad);
input_grad_tensor.set_layout(DataLayout::kNCDHW);
......
......@@ -20,7 +20,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
using DataLayout = phi::DataLayout;
template <typename T>
......@@ -61,8 +60,8 @@ class Conv2DTransposeMLUKernel : public framework::OpKernel<T> {
phi::UpdatePaddingAndDilation(
&paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize);
Tensor input_tensor(input->type());
Tensor output_tensor(output->type());
phi::DenseTensor input_tensor(input->type());
phi::DenseTensor output_tensor(output->type());
input_tensor.set_layout(DataLayout::kNHWC);
output_tensor.set_layout(DataLayout::kNHWC);
const std::vector<int> perm_to_nhwc = {0, 2, 3, 1};
......@@ -84,7 +83,7 @@ class Conv2DTransposeMLUKernel : public framework::OpKernel<T> {
}
// transpose filter from MCHW to MHWC
Tensor trans_filter(filter->type());
phi::DenseTensor trans_filter(filter->type());
TransposeFromMLUTensor<T>(ctx,
perm_to_nhwc,
filter,
......@@ -168,8 +167,8 @@ class Conv2DTransposeGradMLUKernel : public framework::OpKernel<T> {
phi::UpdatePaddingAndDilation(
&paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize);
Tensor input_tensor(input->type());
Tensor output_grad_tensor(output_grad->type());
phi::DenseTensor input_tensor(input->type());
phi::DenseTensor output_grad_tensor(output_grad->type());
output_grad_tensor.set_layout(DataLayout::kNHWC);
const std::vector<int> perm_to_nhwc = {0, 2, 3, 1};
......@@ -191,7 +190,7 @@ class Conv2DTransposeGradMLUKernel : public framework::OpKernel<T> {
}
// transpose filter from MCHW to MHWC
Tensor trans_filter(filter->type());
phi::DenseTensor trans_filter(filter->type());
TransposeFromMLUTensor<T>(ctx,
perm_to_nhwc,
filter,
......@@ -217,7 +216,7 @@ class Conv2DTransposeGradMLUKernel : public framework::OpKernel<T> {
if (filter_grad) {
filter_grad->mutable_data<T>(ctx.GetPlace());
Tensor filter_grad_tensor(filter_grad->type());
phi::DenseTensor filter_grad_tensor(filter_grad->type());
// filter_grad always MCHW
// filter_grad_tensor always MHWC
auto filter_grad_dims = filter_grad->dims();
......@@ -253,7 +252,7 @@ class Conv2DTransposeGradMLUKernel : public framework::OpKernel<T> {
if (input_grad) {
input_grad->mutable_data<T>(ctx.GetPlace());
Tensor input_grad_tensor(input_grad->type());
phi::DenseTensor input_grad_tensor(input_grad->type());
input_tensor.set_layout(DataLayout::kNHWC);
if (channel_last) {
......
......@@ -20,7 +20,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
using NPUDeviceContext = platform::NPUDeviceContext;
template <typename T>
......@@ -65,7 +64,7 @@ class Conv2DTransposeNPUKernel : public framework::OpKernel<T> {
std::vector<int> strides(4, 1);
std::vector<int> dilations(4, 1);
Tensor input_tensor, output_tensor;
phi::DenseTensor input_tensor, output_tensor;
input_tensor.ShareDataWith(*input);
output_tensor.ShareDataWith(*output);
......@@ -148,7 +147,7 @@ class Conv2DTransposeGradNPUKernel : public framework::OpKernel<T> {
std::vector<int> strides_vec(4, 1);
std::vector<int> dilations_vec(4, 1);
Tensor input_tensor, output_grad_tensor;
phi::DenseTensor input_tensor, output_grad_tensor;
input_tensor.ShareDataWith(*input);
output_grad_tensor.ShareDataWith(*output_grad);
if (channel_last) {
......@@ -182,7 +181,7 @@ class Conv2DTransposeGradNPUKernel : public framework::OpKernel<T> {
}
if (input_grad) {
input_grad->mutable_data<T>(ctx.GetPlace());
Tensor input_grad_tensor;
phi::DenseTensor input_grad_tensor;
input_grad_tensor.ShareDataWith(*input_grad);
if (channel_last) {
input_grad_tensor.set_layout(DataLayout::kNHWC);
......@@ -248,7 +247,7 @@ class Conv3DTransposeNPUKernel : public framework::OpKernel<T> {
std::vector<int> strides(5, 1);
std::vector<int> dilations(5, 1);
Tensor input_tensor, output_tensor, filter_tensor;
phi::DenseTensor input_tensor, output_tensor, filter_tensor;
input_tensor.Resize(input->dims());
input_tensor.ShareDataWith(*input);
output_tensor.Resize(output->dims());
......
......@@ -30,8 +30,6 @@ class OpBase;
} // namespace imperative
} // namespace paddle
using Tensor = phi::DenseTensor;
namespace paddle {
namespace operators {
......
......@@ -22,8 +22,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
inline std::vector<int64_t> CorrelationOutputSize(int batch,
int input_height,
int input_width,
......
......@@ -21,13 +21,11 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
template <typename DeviceContext, typename T>
class CosSimKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
// get Tensor
// get phi::DenseTensor
auto* in_x = context.Input<phi::DenseTensor>("X");
auto* in_y = context.Input<phi::DenseTensor>("Y");
auto* out_z = context.Output<phi::DenseTensor>("Out");
......@@ -74,7 +72,7 @@ template <typename DeviceContext, typename T>
class CosSimGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
// get Tensor
// get phi::DenseTensor
auto* in_x = context.Input<phi::DenseTensor>("X");
auto* in_y = context.Input<phi::DenseTensor>("Y");
auto* in_z = context.Input<phi::DenseTensor>("Out");
......
......@@ -18,8 +18,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
template <typename DeviceContext, typename T>
class CropNPUKernel : public framework::OpKernel<T> {
public:
......@@ -71,7 +69,7 @@ class CropNPUKernel : public framework::OpKernel<T> {
x->dims().size()));
// shape memory maybe have gc.
Tensor tmp_shape(*shape);
phi::DenseTensor tmp_shape(*shape);
tmp_shape.mutable_data<T>(ctx.GetPlace());
const auto& runner =
......@@ -90,7 +88,7 @@ class CropNPUKernel : public framework::OpKernel<T> {
"(%d) of the Input(X).",
shape_size.size(),
x->dims().size()));
Tensor tmp_shape(x->dtype());
phi::DenseTensor tmp_shape(x->dtype());
tmp_shape.Resize(phi::make_ddim(shape_size));
tmp_shape.mutable_data<T>(ctx.GetPlace());
const auto& runner =
......
......@@ -23,8 +23,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
template <typename DeviceContext, typename T>
class CrossEntropyOpKernel : public framework::OpKernel<T> {
public:
......@@ -36,8 +34,8 @@ class CrossEntropyOpKernel : public framework::OpKernel<T> {
int rank = x->dims().size();
auto label_dims = labels->dims();
Tensor x_2d = framework::ReshapeToMatrix(*x, rank - 1);
Tensor labels_2d, y_2d;
phi::DenseTensor x_2d = framework::ReshapeToMatrix(*x, rank - 1);
phi::DenseTensor labels_2d, y_2d;
if (label_dims.size() < rank) {
labels_2d.ShareDataWith(*labels);
labels_2d.Resize({phi::product(label_dims), 1});
......
......@@ -24,8 +24,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
template <typename DeviceContext, typename T>
class CTCAlignKernel : public framework::OpKernel<T> {
public:
......
......@@ -26,8 +26,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
template <typename T, typename Type>
bool is_continuous(const Type &weight_list) {
bool continuous = true;
......@@ -41,7 +39,7 @@ bool is_continuous(const Type &weight_list) {
return continuous;
}
int size_sum(const std::vector<const Tensor *> &weight_list) {
int size_sum(const std::vector<const phi::DenseTensor *> &weight_list) {
int size = 0;
for (size_t i = 0; i < weight_list.size(); ++i) {
auto in_size = weight_list[i]->numel();
......@@ -53,8 +51,8 @@ int size_sum(const std::vector<const Tensor *> &weight_list) {
template <typename T>
void weight_to_tensor(const platform::Place &place,
gpuStream_t stream,
const std::vector<const Tensor *> &weight_list,
Tensor *weight) {
const std::vector<const phi::DenseTensor *> &weight_list,
phi::DenseTensor *weight) {
auto weight_data = weight->data<T>();
int weight_offset = 0;
for (size_t i = 0; i < weight_list.size(); ++i) {
......@@ -72,11 +70,12 @@ void weight_to_tensor(const platform::Place &place,
}
template <typename T>
void weight_to_tensor_list(const platform::Place &place,
gpuStream_t stream,
std::vector<Tensor *> *weight_grad,
const std::vector<const Tensor *> &weight_input,
const Tensor *weight) {
void weight_to_tensor_list(
const platform::Place &place,
gpuStream_t stream,
std::vector<phi::DenseTensor *> *weight_grad,
const std::vector<const phi::DenseTensor *> &weight_input,
const phi::DenseTensor *weight) {
int weight_offset = 0;
auto *weight_data = weight->data<T>();
for (size_t i = 0; i < weight_input.size(); ++i) {
......@@ -204,15 +203,15 @@ template <typename T>
class CudnnLSTMGPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
const Tensor *x = ctx.Input<phi::DenseTensor>("Input");
const Tensor *init_h = ctx.Input<phi::DenseTensor>("InitH");
const Tensor *init_c = ctx.Input<phi::DenseTensor>("InitC");
const phi::DenseTensor *x = ctx.Input<phi::DenseTensor>("Input");
const phi::DenseTensor *init_h = ctx.Input<phi::DenseTensor>("InitH");
const phi::DenseTensor *init_c = ctx.Input<phi::DenseTensor>("InitC");
Tensor *out = ctx.Output<phi::DenseTensor>("Out");
Tensor *last_h = ctx.Output<phi::DenseTensor>("LastH");
Tensor *last_c = ctx.Output<phi::DenseTensor>("LastC");
Tensor *reserve = ctx.Output<phi::DenseTensor>("Reserve");
Tensor *state_out = ctx.Output<phi::DenseTensor>("StateOut");
phi::DenseTensor *out = ctx.Output<phi::DenseTensor>("Out");
phi::DenseTensor *last_h = ctx.Output<phi::DenseTensor>("LastH");
phi::DenseTensor *last_c = ctx.Output<phi::DenseTensor>("LastC");
phi::DenseTensor *reserve = ctx.Output<phi::DenseTensor>("Reserve");
phi::DenseTensor *state_out = ctx.Output<phi::DenseTensor>("StateOut");
const T *x_data = x->data<T>();
const T *init_h_data = init_h->data<T>();
......@@ -256,7 +255,7 @@ class CudnnLSTMGPUKernel : public framework::OpKernel<T> {
size_t workspace_size;
size_t reserve_size;
Tensor weight_whole;
phi::DenseTensor weight_whole;
T *w_data = nullptr;
int weight_numel;
bool w_initialized = false;
......@@ -272,7 +271,7 @@ class CudnnLSTMGPUKernel : public framework::OpKernel<T> {
if (!w_initialized) {
auto weight_list = ctx.MultiInput<phi::DenseTensor>("WeightList");
bool continuous =
is_continuous<T, std::vector<const Tensor *>>(weight_list);
is_continuous<T, std::vector<const phi::DenseTensor *>>(weight_list);
weight_numel = size_sum(weight_list);
if (!continuous) {
......@@ -288,7 +287,7 @@ class CudnnLSTMGPUKernel : public framework::OpKernel<T> {
for (size_t i = 0; i < weight_list.size(); ++i) {
size_t len = weight_list[i]->numel();
auto dim = weight_list[i]->dims();
const_cast<Tensor *>(weight_list[i])
const_cast<phi::DenseTensor *>(weight_list[i])
->ShareDataWith(
weight_whole.Slice(static_cast<int64_t>(offset),
static_cast<int64_t>(offset + len)))
......@@ -481,12 +480,12 @@ class CudnnLSTMGPUGradKernel : public framework::OpKernel<T> {
auto place = ctx.GetPlace();
int weight_numel = size_sum(weight_list);
bool continuous =
is_continuous<T, std::vector<const Tensor *>>(weight_list);
is_continuous<T, std::vector<const phi::DenseTensor *>>(weight_list);
auto stream =
reinterpret_cast<const phi::GPUContext &>(ctx.device_context())
.stream();
Tensor weight_whole;
phi::DenseTensor weight_whole;
T *weight_data = nullptr;
if (!continuous) {
......@@ -497,7 +496,7 @@ class CudnnLSTMGPUGradKernel : public framework::OpKernel<T> {
weight_data = const_cast<T *>(weight_list[0]->data<T>());
}
Tensor weight_grad;
phi::DenseTensor weight_grad;
phi::funcs::SetConstant<phi::GPUContext, T> zero;
weight_grad.mutable_data<T>({weight_numel}, ctx.GetPlace());
zero(dev_ctx, &weight_grad, static_cast<T>(0.0));
......@@ -559,7 +558,7 @@ class CudnnLSTMGPUGradKernel : public framework::OpKernel<T> {
SequenceLength,
&workspace_size,
&reserve_size,
const_cast<Tensor *>(state_out));
const_cast<phi::DenseTensor *>(state_out));
phi::DenseTensor workspace_data_;
workspace_data_.mutable_data<uint8_t>(
......
......@@ -18,8 +18,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
template <typename T>
class CumSumMLUKernel : public framework::OpKernel<T> {
public:
......@@ -34,7 +32,7 @@ class CumSumMLUKernel : public framework::OpKernel<T> {
out->mutable_data<T>(ctx.GetPlace());
phi::DenseTensor* input_ptr = const_cast<phi::DenseTensor*>(x);
Tensor flat_x(x->type());
phi::DenseTensor flat_x(x->type());
if (flatten) {
PADDLE_ENFORCE_EQ(
axis,
......
......@@ -19,8 +19,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
static void CumsumImp(const phi::DenseTensor& input,
phi::DenseTensor* output,
const framework::NPUAttributeMap& attr_input,
......@@ -30,7 +28,7 @@ static void CumsumImp(const phi::DenseTensor& input,
.stream();
if (framework::TransToProtoVarType(input.dtype()) ==
framework::proto::VarType::INT64) {
Tensor tmp_input;
phi::DenseTensor tmp_input;
tmp_input.mutable_data<float>(input.dims(), ctx.GetPlace());
auto dst_acl_dtype =
ConvertToNpuDtype(framework::TransToProtoVarType(tmp_input.type()));
......@@ -41,7 +39,7 @@ static void CumsumImp(const phi::DenseTensor& input,
{{"dst_type", static_cast<int>(dst_acl_dtype)}});
cast_runner_1.Run(stream);
Tensor tmp_output;
phi::DenseTensor tmp_output;
tmp_output.mutable_data<float>(output->dims(), ctx.GetPlace());
const auto& runner =
NpuOpRunner("CumsumD", {tmp_input}, {tmp_output}, attr_input);
......@@ -86,7 +84,7 @@ class CumSumNPUKernel : public framework::OpKernel<T> {
-1,
axis));
Tensor new_x(x->type());
phi::DenseTensor new_x(x->type());
new_x.ShareDataWith(*x);
new_x.Resize(phi::make_ddim({x->numel()}));
......
......@@ -21,8 +21,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
class CVMOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
......
......@@ -22,7 +22,6 @@ namespace paddle {
namespace operators {
using phi::PADDLE_CUDA_NUM_THREADS;
using Tensor = phi::DenseTensor;
template <typename T>
__global__ void CvmComputeKernel(const bool use_cvm,
......
......@@ -19,8 +19,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
template <typename T>
void CvmComputeKernel(const bool use_cvm,
const int64_t item_width,
......
......@@ -23,7 +23,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
using DataLayout = phi::DataLayout;
template <typename T>
......@@ -483,9 +482,9 @@ class DataNormGradOp : public framework::OperatorWithKernel {
PADDLE_THROW(platform::errors::InvalidArgument(
"Y@GRAD can not be found for computation"));
}
const Tensor *t = nullptr;
if (var->IsType<Tensor>()) {
t = &var->Get<Tensor>();
const phi::DenseTensor *t = nullptr;
if (var->IsType<phi::DenseTensor>()) {
t = &var->Get<phi::DenseTensor>();
} else if (var->IsType<phi::DenseTensor>()) {
t = &var->Get<phi::DenseTensor>();
}
......@@ -523,7 +522,7 @@ class DataNormGradKernel<phi::CPUContext, T> : public framework::OpKernel<T> {
(data_layout == DataLayout::kNCHW ? x_dims[1]
: x_dims[x_dims.size() - 1]);
// init output
Tensor *d_x = nullptr;
phi::DenseTensor *d_x = nullptr;
if (ctx.HasOutput(framework::GradVarName("X"))) {
d_x = ctx.Output<phi::DenseTensor>(framework::GradVarName("X"));
}
......@@ -587,12 +586,12 @@ class DataNormGradKernel<phi::CPUContext, T> : public framework::OpKernel<T> {
EigenVectorArrayMap<T> d_bias_arr(d_bias_data, C);
EigenVectorArrayMap<T> d_scale_arr(d_scale_data, C);
Tensor dy_sum;
phi::DenseTensor dy_sum;
dy_sum.Resize({C});
dy_sum.mutable_data<T>(ctx.GetPlace());
EigenVectorArrayMap<T> dy_sum_arr(
dy_sum.mutable_data<T>(ctx.GetPlace()), C);
Tensor dy_mul_x_sub_mean_mul_invstd_sum;
phi::DenseTensor dy_mul_x_sub_mean_mul_invstd_sum;
dy_mul_x_sub_mean_mul_invstd_sum.Resize({C});
dy_mul_x_sub_mean_mul_invstd_sum.mutable_data<T>(ctx.GetPlace());
EigenVectorArrayMap<T> dy_mul_x_sub_mean_mul_invstd_sum_arr(
......
......@@ -26,7 +26,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
using DataLayout = phi::DataLayout;
using phi::PADDLE_CUDA_NUM_THREADS;
......@@ -166,7 +165,7 @@ class DataNormGradKernel<phi::GPUContext, T> : public framework::OpKernel<T> {
const int C = x_dims[1];
// init output
Tensor *d_x = nullptr;
phi::DenseTensor *d_x = nullptr;
if (ctx.HasOutput(framework::GradVarName("X"))) {
d_x = ctx.Output<phi::DenseTensor>(framework::GradVarName("X"));
}
......
......@@ -18,8 +18,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
template <typename T>
class DeformableConvMLUKernel : public framework::OpKernel<T> {
public:
......@@ -58,29 +56,29 @@ class DeformableConvMLUKernel : public framework::OpKernel<T> {
im2col_step);
const std::vector<int> perm_to_nhwc = {0, 2, 3, 1};
Tensor trans_input(input->dtype());
phi::DenseTensor trans_input(input->dtype());
TransposeFromMLUTensor<T>(
ctx, perm_to_nhwc, input, &trans_input, true /*need_reshape_or_alloc*/);
Tensor trans_offset(offset->dtype());
phi::DenseTensor trans_offset(offset->dtype());
TransposeFromMLUTensor<T>(ctx,
perm_to_nhwc,
offset,
&trans_offset,
true /*need_reshape_or_alloc*/);
Tensor trans_mask(mask->dtype());
phi::DenseTensor trans_mask(mask->dtype());
TransposeFromMLUTensor<T>(
ctx, perm_to_nhwc, mask, &trans_mask, true /*need_reshape_or_alloc*/);
Tensor trans_filter(filter->dtype());
phi::DenseTensor trans_filter(filter->dtype());
TransposeFromMLUTensor<T>(ctx,
perm_to_nhwc,
filter,
&trans_filter,
true /*need_reshape_or_alloc*/);
Tensor tmp_output(output->dtype());
phi::DenseTensor tmp_output(output->dtype());
auto output_dims = output->dims();
tmp_output.mutable_data<T>(
{output_dims[0], output_dims[2], output_dims[3], output_dims[1]},
......@@ -167,54 +165,54 @@ class DeformableConvGradMLUKernel : public framework::OpKernel<T> {
groups,
im2col_step);
Tensor tmp_input_grad;
phi::DenseTensor tmp_input_grad;
auto input_dims = input->dims();
tmp_input_grad.mutable_data<T>(
{input_dims[0], input_dims[2], input_dims[3], input_dims[1]},
ctx.GetPlace());
Tensor tmp_filter_grad;
phi::DenseTensor tmp_filter_grad;
auto filter_dims = filter->dims();
tmp_filter_grad.mutable_data<T>(
{filter_dims[0], filter_dims[2], filter_dims[3], filter_dims[1]},
ctx.GetPlace());
Tensor tmp_offset_grad;
phi::DenseTensor tmp_offset_grad;
auto offset_dims = offset->dims();
tmp_offset_grad.mutable_data<T>(
{offset_dims[0], offset_dims[2], offset_dims[3], offset_dims[1]},
ctx.GetPlace());
Tensor tmp_mask_grad;
phi::DenseTensor tmp_mask_grad;
auto mask_dims = mask->dims();
tmp_mask_grad.mutable_data<T>(
{mask_dims[0], mask_dims[2], mask_dims[3], mask_dims[1]},
ctx.GetPlace());
const std::vector<int> perm_to_nhwc = {0, 2, 3, 1};
Tensor trans_output_grad(output_grad->dtype());
phi::DenseTensor trans_output_grad(output_grad->dtype());
TransposeFromMLUTensor<T>(ctx,
perm_to_nhwc,
output_grad,
&trans_output_grad,
true /*need_reshape_or_alloc*/);
Tensor trans_input(input->dtype());
phi::DenseTensor trans_input(input->dtype());
TransposeFromMLUTensor<T>(
ctx, perm_to_nhwc, input, &trans_input, true /*need_reshape_or_alloc*/);
Tensor trans_offset(offset->dtype());
phi::DenseTensor trans_offset(offset->dtype());
TransposeFromMLUTensor<T>(ctx,
perm_to_nhwc,
offset,
&trans_offset,
true /*need_reshape_or_alloc*/);
Tensor trans_mask(mask->dtype());
phi::DenseTensor trans_mask(mask->dtype());
TransposeFromMLUTensor<T>(
ctx, perm_to_nhwc, mask, &trans_mask, true /*need_reshape_or_alloc*/);
Tensor trans_filter(filter->dtype());
phi::DenseTensor trans_filter(filter->dtype());
TransposeFromMLUTensor<T>(ctx,
perm_to_nhwc,
filter,
......
......@@ -39,7 +39,6 @@
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
using phi::PADDLE_CUDA_NUM_THREADS;
static inline int GET_BLOCKS(const int N) {
......
......@@ -33,8 +33,6 @@
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
template <typename T>
T bilinear_interp(
const T* data, const T x, const T y, const int width, const int height) {
......@@ -518,7 +516,7 @@ class DeformablePSROIPoolGradCPUKernel : public framework::OpKernel<T> {
const int num_classes = no_trans ? 1 : channels_trans / 2;
const int channels_each_class =
no_trans ? output_dim : output_dim / num_classes;
Tensor roi_batch_id_list;
phi::DenseTensor roi_batch_id_list;
roi_batch_id_list.Resize({num_rois});
int* roi_batch_id_data =
roi_batch_id_list.mutable_data<int>(ctx.GetPlace());
......
......@@ -30,8 +30,6 @@ namespace cub = hipcub;
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
int const kThreadsPerBlock = sizeof(uint64_t) * 8;
......@@ -47,11 +45,11 @@ struct RangeInitFunctor {
template <typename T>
static void SortDescending(const phi::GPUContext &ctx,
const Tensor &value,
Tensor *value_out,
Tensor *index_out) {
const phi::DenseTensor &value,
phi::DenseTensor *value_out,
phi::DenseTensor *index_out) {
int num = static_cast<int>(value.numel());
Tensor index_in_t;
phi::DenseTensor index_in_t;
int *idx_in = index_in_t.mutable_data<int>({num}, ctx.GetPlace());
platform::ForRange<phi::GPUContext> for_range(ctx, num);
for_range(RangeInitFunctor{0, 1, idx_in});
......@@ -287,10 +285,10 @@ static __global__ void NMSKernel(const int n_boxes,
template <typename T>
static void NMS(const phi::GPUContext &ctx,
const Tensor &proposals,
const Tensor &sorted_indices,
const phi::DenseTensor &proposals,
const phi::DenseTensor &sorted_indices,
const T nms_threshold,
Tensor *keep_out,
phi::DenseTensor *keep_out,
bool pixel_offset = true) {
int boxes_num = proposals.dims()[0];
const int col_blocks = DIVUP(boxes_num, kThreadsPerBlock);
......
......@@ -18,8 +18,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
class BipartiteMatchOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
......@@ -234,7 +232,7 @@ class BipartiteMatchKernel : public framework::OpKernel<T> {
auto lod = dist_mat->lod().back();
for (size_t i = 0; i < lod.size() - 1; ++i) {
if (lod[i + 1] > lod[i]) {
Tensor one_ins = dist_mat->Slice(lod[i], lod[i + 1]);
phi::DenseTensor one_ins = dist_mat->Slice(lod[i], lod[i + 1]);
BipartiteMatch(one_ins, indices + i * col, dist + i * col);
if (type == "per_prediction") {
ArgMaxMatch(one_ins, indices + i * col, dist + i * col, threshold);
......
......@@ -22,7 +22,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
using LoDTenso = phi::DenseTensor;
static constexpr int ImInfoSize = 3;
......
......@@ -19,8 +19,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
template <typename DeviceContext, typename T>
class BoxClipKernel : public framework::OpKernel<T> {
public:
......@@ -42,9 +40,10 @@ class BoxClipKernel : public framework::OpKernel<T> {
auto box_lod = input_box->lod().back();
int64_t n = static_cast<int64_t>(box_lod.size() - 1);
for (int i = 0; i < n; ++i) {
Tensor im_info_slice = im_info->Slice(i, i + 1);
Tensor box_slice = input_box->Slice(box_lod[i], box_lod[i + 1]);
Tensor output_slice = output_box->Slice(box_lod[i], box_lod[i + 1]);
phi::DenseTensor im_info_slice = im_info->Slice(i, i + 1);
phi::DenseTensor box_slice = input_box->Slice(box_lod[i], box_lod[i + 1]);
phi::DenseTensor output_slice =
output_box->Slice(box_lod[i], box_lod[i + 1]);
ClipTiledBoxes<T>(dev_ctx, im_info_slice, box_slice, &output_slice);
}
}
......
......@@ -18,8 +18,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
template <typename T>
struct BoxCoderFunction {
public:
......@@ -28,31 +26,31 @@ struct BoxCoderFunction {
stream = ctx.template device_context<paddle::platform::NPUDeviceContext>()
.stream();
}
Tensor Adds(const phi::DenseTensor& x, float scalar) {
Tensor y;
phi::DenseTensor Adds(const phi::DenseTensor& x, float scalar) {
phi::DenseTensor y;
y.mutable_data<T>(x.dims(), place);
const auto& runner = NpuOpRunner("Adds", {x}, {y}, {{"value", scalar}});
runner.Run(stream);
return y;
}
Tensor Muls(const phi::DenseTensor& x, float scalar) {
Tensor y;
phi::DenseTensor Muls(const phi::DenseTensor& x, float scalar) {
phi::DenseTensor y;
y.mutable_data<T>(x.dims(), place);
const auto& runner = NpuOpRunner("Muls", {x}, {y}, {{"value", scalar}});
runner.Run(stream);
return y;
}
Tensor Mul(const phi::DenseTensor& x, const phi::DenseTensor& y) {
Tensor z;
phi::DenseTensor Mul(const phi::DenseTensor& x, const phi::DenseTensor& y) {
phi::DenseTensor z;
z.mutable_data<T>(x.dims(), place);
const auto& runner = NpuOpRunner("Mul", {x, y}, {z}, {});
runner.Run(stream);
return z;
}
Tensor SubWithBroadCast(const phi::DenseTensor& x,
const phi::DenseTensor& y,
const framework::DDim& shape) {
Tensor z;
phi::DenseTensor SubWithBroadCast(const phi::DenseTensor& x,
const phi::DenseTensor& y,
const framework::DDim& shape) {
phi::DenseTensor z;
z.mutable_data<T>(shape, place);
const auto& runner = NpuOpRunner("Sub", {x, y}, {z}, {});
runner.Run(stream);
......@@ -66,10 +64,10 @@ struct BoxCoderFunction {
const auto& runner = NpuOpRunner("Div", {x, y}, {*z}, {});
runner.Run(stream);
}
Tensor DivWithBroadCast(const phi::DenseTensor& x,
const phi::DenseTensor& y,
const framework::DDim& shape) {
Tensor z;
phi::DenseTensor DivWithBroadCast(const phi::DenseTensor& x,
const phi::DenseTensor& y,
const framework::DDim& shape) {
phi::DenseTensor z;
DivWithBroadCastVoid(x, y, shape, &z);
return z;
}
......@@ -81,10 +79,10 @@ struct BoxCoderFunction {
const auto& runner = NpuOpRunner("Mul", {x, y}, {*z}, {});
runner.Run(stream);
}
Tensor MulWithBroadCast(const phi::DenseTensor& x,
const phi::DenseTensor& y,
const framework::DDim& shape) {
Tensor z;
phi::DenseTensor MulWithBroadCast(const phi::DenseTensor& x,
const phi::DenseTensor& y,
const framework::DDim& shape) {
phi::DenseTensor z;
MulWithBroadCastVoid(x, y, shape, &z);
return z;
}
......@@ -96,36 +94,36 @@ struct BoxCoderFunction {
const auto& runner = NpuOpRunner("AddV2", {x, y}, {*z}, {});
runner.Run(stream);
}
Tensor AddWithBroadCast(const phi::DenseTensor& x,
const phi::DenseTensor& y,
const framework::DDim& shape) {
Tensor z;
phi::DenseTensor AddWithBroadCast(const phi::DenseTensor& x,
const phi::DenseTensor& y,
const framework::DDim& shape) {
phi::DenseTensor z;
AddWithBroadCastVoid(x, y, shape, &z);
return z;
}
Tensor Abs(const phi::DenseTensor& x) {
Tensor y;
phi::DenseTensor Abs(const phi::DenseTensor& x) {
phi::DenseTensor y;
y.mutable_data<T>(x.dims(), place);
const auto& runner = NpuOpRunner("Abs", {x}, {y}, {});
runner.Run(stream);
return y;
}
Tensor Log(const phi::DenseTensor& x) {
Tensor t_x_m1 = Adds(x, -1);
Tensor y;
phi::DenseTensor Log(const phi::DenseTensor& x) {
phi::DenseTensor t_x_m1 = Adds(x, -1);
phi::DenseTensor y;
y.mutable_data<T>(x.dims(), place);
const auto& runner = NpuOpRunner("Log1p", {t_x_m1}, {y}, {});
runner.Run(stream);
return y;
}
Tensor Exp(const phi::DenseTensor& x) {
Tensor y;
phi::DenseTensor Exp(const phi::DenseTensor& x) {
phi::DenseTensor y;
y.mutable_data<T>(x.dims(), place);
const auto& runner = NpuOpRunner("Exp", {x}, {y}, {});
runner.Run(stream);
return y;
}
Tensor Dot(const phi::DenseTensor& x, const phi::DenseTensor& y) {
phi::DenseTensor Dot(const phi::DenseTensor& x, const phi::DenseTensor& y) {
auto dim_x = x.dims();
auto dim_y = y.dims();
PADDLE_ENFORCE_EQ(
......@@ -145,7 +143,7 @@ struct BoxCoderFunction {
"got dim_x[1] = %d, dim_y[0] = %d.",
dim_x[1],
dim_y[0]));
Tensor z;
phi::DenseTensor z;
z.mutable_data<T>({dim_x[0], dim_y[1]}, place);
const auto& runner =
NpuOpRunner("MatMul",
......@@ -155,7 +153,7 @@ struct BoxCoderFunction {
runner.Run(stream);
return z;
}
void ConcatVoid(const std::vector<Tensor>& inputs,
void ConcatVoid(const std::vector<phi::DenseTensor>& inputs,
const framework::DDim& shape_out,
int axis,
phi::DenseTensor* output) {
......@@ -172,18 +170,18 @@ struct BoxCoderFunction {
runner.AddInputNames(names);
runner.Run(stream);
}
Tensor Concat(const std::vector<Tensor>& inputs,
const framework::DDim& shape_out,
int axis) {
Tensor output;
phi::DenseTensor Concat(const std::vector<phi::DenseTensor>& inputs,
const framework::DDim& shape_out,
int axis) {
phi::DenseTensor output;
ConcatVoid(inputs, shape_out, axis, &output);
return output;
}
Tensor Slice(const phi::DenseTensor& x,
const std::vector<int>& offsets,
const std::vector<int>& size,
const framework::DDim& shape) {
Tensor y;
phi::DenseTensor Slice(const phi::DenseTensor& x,
const std::vector<int>& offsets,
const std::vector<int>& size,
const framework::DDim& shape) {
phi::DenseTensor y;
y.mutable_data<T>(shape, place);
const auto& runner =
NpuOpRunner("SliceD", {x}, {y}, {{"offsets", offsets}, {"size", size}});
......@@ -218,8 +216,8 @@ void BoxCoderEnc(const framework::ExecutionContext& ctx,
auto M = pb->dims()[0];
auto N = tb->dims()[0];
auto shape_0 = phi::make_ddim({4, 2});
Tensor m_diff;
Tensor m_aver;
phi::DenseTensor m_diff;
phi::DenseTensor m_aver;
std::vector<T> vec_diff = {static_cast<T>(-1),
static_cast<T>(0),
static_cast<T>(0),
......@@ -240,10 +238,10 @@ void BoxCoderEnc(const framework::ExecutionContext& ctx,
Vector2Tensor<T>(ctx, vec_aver, shape_0, &m_aver);
BoxCoderFunction<T> F(ctx);
Tensor pb_xy = F.Adds(F.Dot(*pb, m_aver), (norm ? 0 : 0.5));
Tensor pb_wh = F.Adds(F.Dot(*pb, m_diff), (norm ? 0 : 1));
Tensor tb_xy = F.Dot(*tb, m_aver);
Tensor tb_wh = F.Adds(F.Dot(*tb, m_diff), (norm ? 0 : 1));
phi::DenseTensor pb_xy = F.Adds(F.Dot(*pb, m_aver), (norm ? 0 : 0.5));
phi::DenseTensor pb_wh = F.Adds(F.Dot(*pb, m_diff), (norm ? 0 : 1));
phi::DenseTensor tb_xy = F.Dot(*tb, m_aver);
phi::DenseTensor tb_wh = F.Adds(F.Dot(*tb, m_diff), (norm ? 0 : 1));
pb_xy.Resize({1, M, 2});
pb_wh.Resize({1, M, 2});
......@@ -253,15 +251,16 @@ void BoxCoderEnc(const framework::ExecutionContext& ctx,
auto shape_half = phi::make_ddim({N, M, 2});
auto shape_full = phi::make_ddim({N, M, 4});
Tensor out_xy_0 = F.DivWithBroadCast(
phi::DenseTensor out_xy_0 = F.DivWithBroadCast(
F.SubWithBroadCast(tb_xy, pb_xy, shape_half), pb_wh, shape_half);
Tensor out_wh_0 = F.Log(F.Abs(F.DivWithBroadCast(tb_wh, pb_wh, shape_half)));
Tensor out_0 = F.Concat({out_xy_0, out_wh_0}, shape_full, 2);
phi::DenseTensor out_wh_0 =
F.Log(F.Abs(F.DivWithBroadCast(tb_wh, pb_wh, shape_half)));
phi::DenseTensor out_0 = F.Concat({out_xy_0, out_wh_0}, shape_full, 2);
if (pbv) {
F.DivWithBroadCastVoid(out_0, *pbv, shape_full, out);
} else {
Tensor t_var;
phi::DenseTensor t_var;
std::vector<T> vec_var(4);
for (auto i = 0; i < 4; i++) {
vec_var[i] = static_cast<T>(variance[i]);
......@@ -281,8 +280,8 @@ void BoxCoderDec(const framework::ExecutionContext& ctx,
int axis,
phi::DenseTensor* out) {
auto shape_0 = phi::make_ddim({4, 2});
Tensor m_diff;
Tensor m_aver;
phi::DenseTensor m_diff;
phi::DenseTensor m_aver;
std::vector<T> vec_diff = {static_cast<T>(-1),
static_cast<T>(0),
static_cast<T>(0),
......@@ -303,8 +302,8 @@ void BoxCoderDec(const framework::ExecutionContext& ctx,
Vector2Tensor<T>(ctx, vec_aver, shape_0, &m_aver);
BoxCoderFunction<T> F(ctx);
Tensor pb_xy = F.Adds(F.Dot(*pb, m_aver), (norm ? 0 : 0.5));
Tensor pb_wh = F.Adds(F.Dot(*pb, m_diff), (norm ? 0 : 1));
phi::DenseTensor pb_xy = F.Adds(F.Dot(*pb, m_aver), (norm ? 0 : 0.5));
phi::DenseTensor pb_wh = F.Adds(F.Dot(*pb, m_diff), (norm ? 0 : 1));
auto pb_resize_shape = axis == 0 ? phi::make_ddim({1, pb->dims()[0], 2})
: phi::make_ddim({pb->dims()[0], 1, 2});
pb_xy.Resize(pb_resize_shape);
......@@ -313,18 +312,22 @@ void BoxCoderDec(const framework::ExecutionContext& ctx,
auto tbox_slice_shape = phi::make_ddim({tb->dims()[0], tb->dims()[1], 2});
std::vector<int> tbox_slice_size = {
static_cast<int>(tb->dims()[0]), static_cast<int>(tb->dims()[1]), 2};
Tensor tbox01 = F.Slice(*tb, {0, 0, 0}, tbox_slice_size, tbox_slice_shape);
Tensor tbox23 = F.Slice(*tb, {0, 0, 2}, tbox_slice_size, tbox_slice_shape);
phi::DenseTensor tbox01 =
F.Slice(*tb, {0, 0, 0}, tbox_slice_size, tbox_slice_shape);
phi::DenseTensor tbox23 =
F.Slice(*tb, {0, 0, 2}, tbox_slice_size, tbox_slice_shape);
Tensor tb_xy;
Tensor tb_wh;
phi::DenseTensor tb_xy;
phi::DenseTensor tb_wh;
if (pbv) {
auto pbvt_slice_shape = phi::make_ddim({pbv->dims()[0], 2});
auto pbvt_resize_shape = axis == 0 ? phi::make_ddim({1, pbv->dims()[0], 2})
: phi::make_ddim({pbv->dims()[0], 1, 2});
std::vector<int> pbvt_slice_size = {static_cast<int>(pbv->dims()[0]), 2};
Tensor pbv_t01 = F.Slice(*pbv, {0, 0}, pbvt_slice_size, pbvt_slice_shape);
Tensor pbv_t23 = F.Slice(*pbv, {0, 2}, pbvt_slice_size, pbvt_slice_shape);
phi::DenseTensor pbv_t01 =
F.Slice(*pbv, {0, 0}, pbvt_slice_size, pbvt_slice_shape);
phi::DenseTensor pbv_t23 =
F.Slice(*pbv, {0, 2}, pbvt_slice_size, pbvt_slice_shape);
pbv_t01.Resize(pbvt_resize_shape);
pbv_t23.Resize(pbvt_resize_shape);
......@@ -345,7 +348,7 @@ void BoxCoderDec(const framework::ExecutionContext& ctx,
&tb_xy);
F.MulWithBroadCastVoid(F.Exp(tbox23), pb_wh, tbox_slice_shape, &tb_wh);
} else {
Tensor t_var01, t_var23;
phi::DenseTensor t_var01, t_var23;
auto t_var_shape = phi::make_ddim({1, 1, 2});
std::vector<T> vec_var01 = {static_cast<T>(variance[0]),
static_cast<T>(variance[1])};
......@@ -366,9 +369,9 @@ void BoxCoderDec(const framework::ExecutionContext& ctx,
tbox_slice_shape,
&tb_wh);
}
Tensor obox01 =
phi::DenseTensor obox01 =
F.AddWithBroadCast(tb_xy, F.Muls(tb_wh, -0.5), tbox_slice_shape);
Tensor obox23 =
phi::DenseTensor obox23 =
F.Adds(F.AddWithBroadCast(tb_xy, F.Muls(tb_wh, 0.5), tbox_slice_shape),
(norm ? 0 : -1));
F.ConcatVoid({obox01, obox23}, out->dims(), 2, out);
......
......@@ -16,7 +16,6 @@ limitations under the License.*/
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
class CollectFpnProposalsOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
......
......@@ -33,8 +33,6 @@ namespace cub = hipcub;
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
static constexpr int kNumCUDAThreads = 64;
static constexpr int kNumMaxinumNumBlocks = 4096;
......@@ -74,13 +72,13 @@ class GPUCollectFpnProposalsOpKernel : public framework::OpKernel<T> {
int real_post_num = min(post_nms_topN, total_roi_num);
fpn_rois->mutable_data<T>({real_post_num, kBBoxSize}, dev_ctx.GetPlace());
Tensor concat_rois;
Tensor concat_scores;
phi::DenseTensor concat_rois;
phi::DenseTensor concat_scores;
T* concat_rois_data = concat_rois.mutable_data<T>(
{total_roi_num, kBBoxSize}, dev_ctx.GetPlace());
T* concat_scores_data =
concat_scores.mutable_data<T>({total_roi_num, 1}, dev_ctx.GetPlace());
Tensor roi_batch_id_list;
phi::DenseTensor roi_batch_id_list;
roi_batch_id_list.Resize({total_roi_num});
int* roi_batch_id_data =
roi_batch_id_list.mutable_data<int>(platform::CPUPlace());
......@@ -130,20 +128,20 @@ class GPUCollectFpnProposalsOpKernel : public framework::OpKernel<T> {
}
// copy batch id list to GPU
Tensor roi_batch_id_list_gpu;
phi::DenseTensor roi_batch_id_list_gpu;
framework::TensorCopy(
roi_batch_id_list, dev_ctx.GetPlace(), &roi_batch_id_list_gpu);
Tensor index_in_t;
phi::DenseTensor index_in_t;
int* idx_in =
index_in_t.mutable_data<int>({total_roi_num}, dev_ctx.GetPlace());
platform::ForRange<phi::GPUContext> for_range_total(dev_ctx, total_roi_num);
for_range_total(RangeInitFunctor{0, 1, idx_in});
Tensor keys_out_t;
phi::DenseTensor keys_out_t;
T* keys_out =
keys_out_t.mutable_data<T>({total_roi_num}, dev_ctx.GetPlace());
Tensor index_out_t;
phi::DenseTensor index_out_t;
int* idx_out =
index_out_t.mutable_data<int>({total_roi_num}, dev_ctx.GetPlace());
......@@ -175,21 +173,21 @@ class GPUCollectFpnProposalsOpKernel : public framework::OpKernel<T> {
sizeof(T) * 8,
dev_ctx.stream());
index_out_t.Resize({real_post_num});
Tensor sorted_rois;
phi::DenseTensor sorted_rois;
sorted_rois.mutable_data<T>({real_post_num, kBBoxSize}, dev_ctx.GetPlace());
Tensor sorted_batch_id;
phi::DenseTensor sorted_batch_id;
sorted_batch_id.mutable_data<int>({real_post_num}, dev_ctx.GetPlace());
phi::funcs::GPUGather<T>(dev_ctx, concat_rois, index_out_t, &sorted_rois);
phi::funcs::GPUGather<int>(
dev_ctx, roi_batch_id_list_gpu, index_out_t, &sorted_batch_id);
Tensor batch_index_t;
phi::DenseTensor batch_index_t;
int* batch_idx_in =
batch_index_t.mutable_data<int>({real_post_num}, dev_ctx.GetPlace());
platform::ForRange<phi::GPUContext> for_range_post(dev_ctx, real_post_num);
for_range_post(RangeInitFunctor{0, 1, batch_idx_in});
Tensor out_id_t;
phi::DenseTensor out_id_t;
int* out_id_data =
out_id_t.mutable_data<int>({real_post_num}, dev_ctx.GetPlace());
// Determine temporary device storage requirements
......@@ -222,7 +220,7 @@ class GPUCollectFpnProposalsOpKernel : public framework::OpKernel<T> {
phi::funcs::GPUGather<T>(dev_ctx, sorted_rois, index_out_t, fpn_rois);
Tensor length_lod;
phi::DenseTensor length_lod;
int* length_lod_data =
length_lod.mutable_data<int>({lod_size}, dev_ctx.GetPlace());
phi::funcs::SetConstant<phi::GPUContext, int> set_zero;
......
......@@ -15,7 +15,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
using fp16 = paddle::platform::float16;
template <typename T>
......@@ -89,7 +88,7 @@ struct DensityPriorBoxFunction {
const auto& runner = NpuOpRunner("Minimum", {*x, *y}, {*z}, {});
runner.Run(stream);
}
void Concat(const std::vector<Tensor>& inputs,
void Concat(const std::vector<phi::DenseTensor>& inputs,
int axis,
phi::DenseTensor* output) {
// output should be init first
......@@ -131,14 +130,14 @@ struct DensityPriorBoxFunction {
platform::Place place;
aclrtStream stream;
const framework::ExecutionContext& ctx;
Tensor t0;
Tensor t1;
Tensor tn;
phi::DenseTensor t0;
phi::DenseTensor t1;
phi::DenseTensor tn;
};
template <>
void DensityPriorBoxFunction<fp16>::Arange(int n, phi::DenseTensor* x) {
Tensor x_fp32(experimental::DataType::FLOAT32);
phi::DenseTensor x_fp32(experimental::DataType::FLOAT32);
x_fp32.mutable_data<float>(x->dims(), place);
FillNpuTensorWithConstant<float>(&tn, static_cast<float>(n));
const auto& runner = NpuOpRunner("Range", {t0, tn, t1}, {x_fp32}, {});
......@@ -149,7 +148,7 @@ void DensityPriorBoxFunction<fp16>::Arange(int n, phi::DenseTensor* x) {
template <>
void DensityPriorBoxFunction<fp16>::FloatVec2Tsr(const std::vector<float>& vec,
phi::DenseTensor* tsr_dst) {
Tensor tsr_fp32(experimental::DataType::FLOAT32);
phi::DenseTensor tsr_fp32(experimental::DataType::FLOAT32);
tsr_fp32.mutable_data<float>(tsr_dst->dims(), place);
framework::TensorFromVector<float>(vec, ctx.device_context(), &tsr_fp32);
ctx.template device_context<paddle::platform::NPUDeviceContext>().Wait();
......@@ -185,9 +184,9 @@ class DensityPriorBoxOpNPUKernel : public framework::OpKernel<T> {
auto place = ctx.GetPlace();
DensityPriorBoxFunction<T> F(ctx);
Tensor h(_type);
phi::DenseTensor h(_type);
h.mutable_data<T>({layer_h}, place);
Tensor w(_type);
phi::DenseTensor w(_type);
w.mutable_data<T>({layer_w}, place);
F.Arange(layer_h, &h);
F.Arange(layer_w, &w);
......@@ -203,11 +202,11 @@ class DensityPriorBoxOpNPUKernel : public framework::OpKernel<T> {
for (size_t i = 0; i < densities.size(); ++i) {
num_priors_per_ratio += densities[i] * densities[i];
}
Tensor di(_type);
Tensor dj(_type);
Tensor shifts(_type);
Tensor box_w_ratio(_type);
Tensor box_h_ratio(_type);
phi::DenseTensor di(_type);
phi::DenseTensor dj(_type);
phi::DenseTensor shifts(_type);
phi::DenseTensor box_w_ratio(_type);
phi::DenseTensor box_h_ratio(_type);
di.mutable_data<T>({ratios_size * num_priors_per_ratio}, place);
dj.mutable_data<T>({ratios_size * num_priors_per_ratio}, place);
shifts.mutable_data<T>({ratios_size * num_priors_per_ratio}, place);
......@@ -220,19 +219,21 @@ class DensityPriorBoxOpNPUKernel : public framework::OpKernel<T> {
// Range = start:start+ratios_size*density_sqr, density = densities[i]
int density_sqr = densities[i] * densities[i];
// shifts[Range] = [step_average/density]*ratios_size*density_sqr
Tensor shifts_part =
phi::DenseTensor shifts_part =
shifts.Slice(start, start + ratios_size * density_sqr);
FillNpuTensorWithConstant<T>(&shifts_part,
static_cast<T>(step_average / densities[i]));
// di[Range] = [ i // density for i in range(density_sqr) ] * ratios_size
// dj[Range] = [ i % density for i in range(density_sqr) ] * ratios_size
Tensor di_part = di.Slice(start, start + ratios_size * density_sqr);
Tensor dj_part = dj.Slice(start, start + ratios_size * density_sqr);
phi::DenseTensor di_part =
di.Slice(start, start + ratios_size * density_sqr);
phi::DenseTensor dj_part =
dj.Slice(start, start + ratios_size * density_sqr);
if (densities[i] > 1) {
di_part.Resize({ratios_size, densities[i], densities[i]});
dj_part.Resize({ratios_size, densities[i], densities[i]});
Tensor range_n(_type);
phi::DenseTensor range_n(_type);
range_n.mutable_data<T>({densities[i]}, place);
F.Arange(densities[i], &range_n);
range_n.Resize({1, densities[i], 1});
......@@ -254,9 +255,9 @@ class DensityPriorBoxOpNPUKernel : public framework::OpKernel<T> {
// Range_mini = start_box_ratio:start_box_ratio+density_sqr
// box_h_ratio[Range_mini] = [fixed_sizes[i] * sqrt(ar)] * density_sqr
// box_w_ratio[Range_mini] = [fixed_sizes[i] / sqrt(ar)] * density_sqr
Tensor box_h_ratio_part =
phi::DenseTensor box_h_ratio_part =
box_h_ratio.Slice(start_box_ratio, start_box_ratio + density_sqr);
Tensor box_w_ratio_part =
phi::DenseTensor box_w_ratio_part =
box_w_ratio.Slice(start_box_ratio, start_box_ratio + density_sqr);
FillNpuTensorWithConstant<T>(&box_w_ratio_part,
static_cast<T>(fixed_sizes[i] * sqrt(ar)));
......@@ -274,8 +275,8 @@ class DensityPriorBoxOpNPUKernel : public framework::OpKernel<T> {
// c_x = (w+offset)*step_w - 0.5*step_average + 0.5*shifts + dj*shifts
// c_y = (h+offset)*step_h - 0.5*step_average + 0.5*shifts + di*shifts
Tensor c_x(_type);
Tensor c_y(_type);
phi::DenseTensor c_x(_type);
phi::DenseTensor c_y(_type);
auto dim0 =
phi::make_ddim({1, layer_w, ratios_size * num_priors_per_ratio, 1});
auto dim1 =
......@@ -301,17 +302,17 @@ class DensityPriorBoxOpNPUKernel : public framework::OpKernel<T> {
F.Muls(&box_w_ratio, static_cast<float>(0.5), &box_w_ratio);
F.Muls(&box_h_ratio, static_cast<float>(0.5), &box_h_ratio);
Tensor zero_t(_type);
Tensor one_t(_type);
phi::DenseTensor zero_t(_type);
phi::DenseTensor one_t(_type);
zero_t.mutable_data<T>({1}, place);
one_t.mutable_data<T>({1}, place);
FillNpuTensorWithConstant<T>(&zero_t, static_cast<T>(0));
FillNpuTensorWithConstant<T>(&one_t, static_cast<T>(1));
Tensor outbox0(_type);
Tensor outbox1(_type);
Tensor outbox2(_type);
Tensor outbox3(_type);
phi::DenseTensor outbox0(_type);
phi::DenseTensor outbox1(_type);
phi::DenseTensor outbox2(_type);
phi::DenseTensor outbox3(_type);
outbox0.mutable_data<T>(dim0, place);
outbox1.mutable_data<T>(dim1, place);
outbox2.mutable_data<T>(dim0, place);
......@@ -349,17 +350,17 @@ class DensityPriorBoxOpNPUKernel : public framework::OpKernel<T> {
{layer_h, layer_w, ratios_size * num_priors_per_ratio, 4});
boxes->mutable_data<T>(place);
vars->mutable_data<T>(place);
Tensor boxes_share(_type);
Tensor vars_share(_type);
phi::DenseTensor boxes_share(_type);
phi::DenseTensor vars_share(_type);
boxes_share.ShareDataWith(*boxes);
boxes_share.Resize(out_dim);
vars_share.ShareDataWith(*vars);
vars_share.Resize(out_dim);
Tensor box0(_type);
Tensor box1(_type);
Tensor box2(_type);
Tensor box3(_type);
phi::DenseTensor box0(_type);
phi::DenseTensor box1(_type);
phi::DenseTensor box2(_type);
phi::DenseTensor box3(_type);
// out_dim = {layer_h, layer_w, ratios_size*num_priors_per_ratio, 1}
out_dim[3] = 1;
box0.mutable_data<T>(out_dim, place);
......@@ -377,7 +378,7 @@ class DensityPriorBoxOpNPUKernel : public framework::OpKernel<T> {
std::vector<int> multiples = {
layer_h, layer_w, ratios_size * num_priors_per_ratio, 1};
Tensor variances_t(_type);
phi::DenseTensor variances_t(_type);
// variances.size() == 4
variances_t.mutable_data<T>({4}, place);
F.FloatVec2Tsr(variances, &variances_t);
......
......@@ -25,7 +25,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
const int kBoxDim = 4;
template <typename T>
......@@ -151,16 +150,17 @@ static inline void ExpandMaskTarget(const phi::CPUContext& ctx,
}
template <typename T>
std::vector<Tensor> SampleMaskForOneImage(const phi::CPUContext& ctx,
const phi::DenseTensor& im_info,
const phi::DenseTensor& gt_classes,
const phi::DenseTensor& is_crowd,
const phi::DenseTensor& gt_segms,
const phi::DenseTensor& rois,
const phi::DenseTensor& label_int32,
const int num_classes,
const int resolution,
const framework::LoD& segm_length) {
std::vector<phi::DenseTensor> SampleMaskForOneImage(
const phi::CPUContext& ctx,
const phi::DenseTensor& im_info,
const phi::DenseTensor& gt_classes,
const phi::DenseTensor& is_crowd,
const phi::DenseTensor& gt_segms,
const phi::DenseTensor& rois,
const phi::DenseTensor& label_int32,
const int num_classes,
const int resolution,
const framework::LoD& segm_length) {
// Prepare the mask targets by associating one gt mask to each training roi
// that has a fg (non-bg) class label.
const int64_t gt_size = static_cast<int64_t>(gt_classes.dims()[0]);
......@@ -218,15 +218,15 @@ std::vector<Tensor> SampleMaskForOneImage(const phi::CPUContext& ctx,
int gt_num = mask_gt_inds.size();
int fg_num = fg_inds.size();
Tensor boxes_from_polys;
phi::DenseTensor boxes_from_polys;
boxes_from_polys.mutable_data<T>({gt_num, 4}, platform::CPUPlace());
Poly2Boxes(gt_polys, boxes_from_polys.data<T>());
std::vector<int> roi_has_mask =
std::vector<int>(fg_inds.begin(), fg_inds.end());
Tensor mask_class_labels;
Tensor masks;
Tensor rois_fg;
phi::DenseTensor mask_class_labels;
phi::DenseTensor masks;
phi::DenseTensor rois_fg;
auto im_scale = im_info.data<T>()[2];
if (fg_num > 0) {
......@@ -251,7 +251,7 @@ std::vector<Tensor> SampleMaskForOneImage(const phi::CPUContext& ctx,
rois_fg_data[k] = rois_fg_data[k] / im_scale;
}
Tensor overlaps_bbfg_bbpolys;
phi::DenseTensor overlaps_bbfg_bbpolys;
overlaps_bbfg_bbpolys.mutable_data<T>({fg_num, gt_num}, ctx.GetPlace());
BboxOverlaps<T>(rois_fg, boxes_from_polys, &overlaps_bbfg_bbpolys);
......@@ -306,7 +306,7 @@ std::vector<Tensor> SampleMaskForOneImage(const phi::CPUContext& ctx,
roi_has_mask = std::vector<int>(bg_inds.begin(), bg_inds.end());
}
Tensor masks_expand;
phi::DenseTensor masks_expand;
ExpandMaskTarget<T>(
ctx, masks, mask_class_labels, resolution, num_classes, &masks_expand);
......@@ -315,13 +315,13 @@ std::vector<Tensor> SampleMaskForOneImage(const phi::CPUContext& ctx,
rois_fg_data[k] = rois_fg_data[k] * im_scale;
}
Tensor roi_has_mask_t;
phi::DenseTensor roi_has_mask_t;
int roi_has_mask_size = roi_has_mask.size();
int* roi_has_mask_data =
roi_has_mask_t.mutable_data<int>({roi_has_mask_size, 1}, ctx.GetPlace());
std::copy(roi_has_mask.begin(), roi_has_mask.end(), roi_has_mask_data);
std::vector<Tensor> res;
std::vector<phi::DenseTensor> res;
res.emplace_back(rois_fg);
res.emplace_back(roi_has_mask_t);
res.emplace_back(masks_expand);
......@@ -405,23 +405,23 @@ class GenerateMaskLabelsKernel : public framework::OpKernel<T> {
lod0.emplace_back(num_mask);
continue;
}
Tensor im_info_slice = im_info->Slice(i, i + 1);
Tensor gt_classes_slice =
phi::DenseTensor im_info_slice = im_info->Slice(i, i + 1);
phi::DenseTensor gt_classes_slice =
gt_classes->Slice(gt_classes_lod[i], gt_classes_lod[i + 1]);
Tensor is_crowd_slice =
phi::DenseTensor is_crowd_slice =
is_crowd->Slice(is_crowd_lod[i], is_crowd_lod[i + 1]);
Tensor label_int32_slice =
phi::DenseTensor label_int32_slice =
label_int32->Slice(label_int32_lod[i], label_int32_lod[i + 1]);
Tensor rois_slice = rois->Slice(rois_lod[i], rois_lod[i + 1]);
phi::DenseTensor rois_slice = rois->Slice(rois_lod[i], rois_lod[i + 1]);
auto sub_lod_and_offset =
framework::GetSubLoDAndAbsoluteOffset(gt_segms_lod, i, i + 1, 0);
auto lod_length = sub_lod_and_offset.first;
size_t s = sub_lod_and_offset.second.first;
size_t e = sub_lod_and_offset.second.second;
Tensor gt_segms_slice = gt_segms->Slice(s, e);
phi::DenseTensor gt_segms_slice = gt_segms->Slice(s, e);
std::vector<Tensor> tensor_output =
std::vector<phi::DenseTensor> tensor_output =
SampleMaskForOneImage<T>(dev_ctx,
im_info_slice,
gt_classes_slice,
......@@ -433,9 +433,9 @@ class GenerateMaskLabelsKernel : public framework::OpKernel<T> {
resolution,
lod_length);
Tensor sampled_mask_rois = tensor_output[0];
Tensor sampled_roi_has_mask_int32 = tensor_output[1];
Tensor sampled_mask_int32 = tensor_output[2];
phi::DenseTensor sampled_mask_rois = tensor_output[0];
phi::DenseTensor sampled_roi_has_mask_int32 = tensor_output[1];
phi::DenseTensor sampled_mask_int32 = tensor_output[2];
AppendMask<T>(mask_rois, kBoxDim * num_mask, &sampled_mask_rois);
AppendMask<int>(
......
......@@ -25,7 +25,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
const int kBoxDim = 4;
template <typename T>
......@@ -174,7 +173,7 @@ void Concat(const phi::CPUContext& context,
const phi::DenseTensor& in_tensor_b,
phi::DenseTensor* out_tensor) {
int axis = 0;
std::vector<Tensor> inputs;
std::vector<phi::DenseTensor> inputs;
inputs.emplace_back(in_tensor_a);
inputs.emplace_back(in_tensor_b);
math::ConcatFunctor<phi::CPUContext, T> concat_functor;
......@@ -300,7 +299,7 @@ void GatherBoxesLabels(const phi::CPUContext& context,
phi::DenseTensor* sampled_max_overlap) {
int fg_num = fg_inds.size();
int bg_num = bg_inds.size();
Tensor fg_inds_t, bg_inds_t, gt_box_inds_t, gt_label_inds_t;
phi::DenseTensor fg_inds_t, bg_inds_t, gt_box_inds_t, gt_label_inds_t;
int* fg_inds_data = fg_inds_t.mutable_data<int>({fg_num}, context.GetPlace());
int* bg_inds_data = bg_inds_t.mutable_data<int>({bg_num}, context.GetPlace());
int* gt_box_inds_data =
......@@ -312,7 +311,7 @@ void GatherBoxesLabels(const phi::CPUContext& context,
std::copy(gt_inds.begin(), gt_inds.end(), gt_box_inds_data);
std::copy(gt_inds.begin(), gt_inds.end(), gt_label_inds_data);
Tensor fg_boxes, bg_boxes, fg_labels, bg_labels;
phi::DenseTensor fg_boxes, bg_boxes, fg_labels, bg_labels;
fg_boxes.mutable_data<T>({fg_num, kBoxDim}, context.GetPlace());
phi::funcs::CPUGather<T>(context, boxes, fg_inds_t, &fg_boxes);
bg_boxes.mutable_data<T>({bg_num, kBoxDim}, context.GetPlace());
......@@ -325,7 +324,7 @@ void GatherBoxesLabels(const phi::CPUContext& context,
phi::funcs::set_constant(context, &bg_labels, 0);
Concat<int>(context, fg_labels, bg_labels, sampled_labels);
Tensor fg_max_overlap, bg_max_overlap;
phi::DenseTensor fg_max_overlap, bg_max_overlap;
fg_max_overlap.mutable_data<T>({fg_num}, context.GetPlace());
phi::funcs::CPUGather<T>(context, max_overlap, fg_inds_t, &fg_max_overlap);
bg_max_overlap.mutable_data<T>({bg_num}, context.GetPlace());
......@@ -334,7 +333,7 @@ void GatherBoxesLabels(const phi::CPUContext& context,
}
template <typename T>
std::vector<Tensor> SampleRoisForOneImage(
std::vector<phi::DenseTensor> SampleRoisForOneImage(
const phi::CPUContext& context,
const phi::DenseTensor& rpn_rois_in,
const phi::DenseTensor& gt_classes,
......@@ -355,7 +354,7 @@ std::vector<Tensor> SampleRoisForOneImage(
const phi::DenseTensor& max_overlap) {
// 1.1 map to original image
auto im_scale = im_info.data<T>()[2];
Tensor rpn_rois;
phi::DenseTensor rpn_rois;
rpn_rois.mutable_data<T>(rpn_rois_in.dims(), context.GetPlace());
const T* rpn_rois_in_dt = rpn_rois_in.data<T>();
T* rpn_rois_dt = rpn_rois.data<T>();
......@@ -367,10 +366,10 @@ std::vector<Tensor> SampleRoisForOneImage(
int proposals_num = 1;
if (is_cascade_rcnn) {
Tensor keep;
phi::DenseTensor keep;
FilterRoIs<T>(context, rpn_rois, max_overlap, &keep);
Tensor roi_filter;
// Tensor box_filter;
phi::DenseTensor roi_filter;
// phi::DenseTensor box_filter;
if (keep.numel() == 0) {
phi::funcs::SetConstant<phi::CPUContext, T> set_zero;
roi_filter.mutable_data<T>({proposals_num, kBoxDim}, context.GetPlace());
......@@ -389,16 +388,16 @@ std::vector<Tensor> SampleRoisForOneImage(
// 1.2 compute overlaps
proposals_num += gt_boxes.dims()[0];
Tensor proposal_to_gt_overlaps;
phi::DenseTensor proposal_to_gt_overlaps;
proposal_to_gt_overlaps.mutable_data<T>({proposals_num, gt_boxes.dims()[0]},
context.GetPlace());
Tensor boxes;
phi::DenseTensor boxes;
boxes.mutable_data<T>({proposals_num, kBoxDim}, context.GetPlace());
Concat<T>(context, gt_boxes, rpn_rois, &boxes);
BboxOverlaps<T>(boxes, gt_boxes, &proposal_to_gt_overlaps);
Tensor proposal_with_max_overlap;
phi::DenseTensor proposal_with_max_overlap;
proposal_with_max_overlap.mutable_data<T>({proposals_num},
context.GetPlace());
......@@ -423,7 +422,8 @@ std::vector<Tensor> SampleRoisForOneImage(
std::vector<int> mapped_gt_inds = fg_bg_gt[2]; // mapped_gt_labels
// Gather boxes and labels
Tensor sampled_boxes, sampled_labels, sampled_gts, sampled_max_overlap;
phi::DenseTensor sampled_boxes, sampled_labels, sampled_gts,
sampled_max_overlap;
int fg_num = fg_inds.size();
int bg_num = bg_inds.size();
int boxes_num = fg_num + bg_num;
......@@ -446,7 +446,7 @@ std::vector<Tensor> SampleRoisForOneImage(
&sampled_max_overlap);
// Compute targets
Tensor bbox_targets_single;
phi::DenseTensor bbox_targets_single;
bbox_targets_single.mutable_data<T>(bbox_dim, context.GetPlace());
BoxToDelta<T>(fg_num,
sampled_boxes,
......@@ -456,14 +456,14 @@ std::vector<Tensor> SampleRoisForOneImage(
&bbox_targets_single);
// Scale rois
Tensor sampled_rois;
phi::DenseTensor sampled_rois;
sampled_rois.mutable_data<T>(sampled_boxes.dims(), context.GetPlace());
auto sampled_rois_et = framework::EigenTensor<T, 2>::From(sampled_rois);
auto sampled_boxes_et = framework::EigenTensor<T, 2>::From(sampled_boxes);
sampled_rois_et = sampled_boxes_et * im_scale;
// Expand box targets
Tensor bbox_targets, bbox_inside_weights, bbox_outside_weights;
phi::DenseTensor bbox_targets, bbox_inside_weights, bbox_outside_weights;
framework::DDim bbox_expand_dim({boxes_num, kBoxDim * class_nums});
bbox_targets.mutable_data<T>(bbox_expand_dim, context.GetPlace());
bbox_inside_weights.mutable_data<T>(bbox_expand_dim, context.GetPlace());
......@@ -500,7 +500,7 @@ std::vector<Tensor> SampleRoisForOneImage(
bbox_outside_weights_data[dst_idx + 3] = 1;
}
}
std::vector<Tensor> res;
std::vector<phi::DenseTensor> res;
res.emplace_back(sampled_rois);
res.emplace_back(sampled_labels);
res.emplace_back(bbox_targets);
......@@ -610,16 +610,16 @@ class GenerateProposalLabelsKernel : public framework::OpKernel<T> {
lod0.emplace_back(num_rois);
continue;
}
Tensor rpn_rois_slice =
phi::DenseTensor rpn_rois_slice =
rpn_rois->Slice(rpn_rois_lod[i], rpn_rois_lod[i + 1]);
Tensor gt_classes_slice =
phi::DenseTensor gt_classes_slice =
gt_classes->Slice(gt_classes_lod[i], gt_classes_lod[i + 1]);
Tensor is_crowd_slice =
phi::DenseTensor is_crowd_slice =
is_crowd->Slice(is_crowd_lod[i], is_crowd_lod[i + 1]);
Tensor gt_boxes_slice =
phi::DenseTensor gt_boxes_slice =
gt_boxes->Slice(gt_boxes_lod[i], gt_boxes_lod[i + 1]);
Tensor im_info_slice = im_info->Slice(i, i + 1);
Tensor max_overlap_slice;
phi::DenseTensor im_info_slice = im_info->Slice(i, i + 1);
phi::DenseTensor max_overlap_slice;
if (is_cascade_rcnn) {
auto* max_overlap = context.Input<phi::DenseTensor>("MaxOverlap");
max_overlap_slice =
......@@ -628,7 +628,7 @@ class GenerateProposalLabelsKernel : public framework::OpKernel<T> {
max_overlap_slice.mutable_data<T>({rpn_rois_slice.dims()[0]},
context.GetPlace());
}
std::vector<Tensor> tensor_output =
std::vector<phi::DenseTensor> tensor_output =
SampleRoisForOneImage<T>(dev_ctx,
rpn_rois_slice,
gt_classes_slice,
......@@ -647,12 +647,12 @@ class GenerateProposalLabelsKernel : public framework::OpKernel<T> {
is_cascade_rcnn,
is_cls_agnostic,
max_overlap_slice);
Tensor sampled_rois = tensor_output[0];
Tensor sampled_labels_int32 = tensor_output[1];
Tensor sampled_bbox_targets = tensor_output[2];
Tensor sampled_bbox_inside_weights = tensor_output[3];
Tensor sampled_bbox_outside_weights = tensor_output[4];
Tensor sampled_max_overlap = tensor_output[5];
phi::DenseTensor sampled_rois = tensor_output[0];
phi::DenseTensor sampled_labels_int32 = tensor_output[1];
phi::DenseTensor sampled_bbox_targets = tensor_output[2];
phi::DenseTensor sampled_bbox_inside_weights = tensor_output[3];
phi::DenseTensor sampled_bbox_outside_weights = tensor_output[4];
phi::DenseTensor sampled_max_overlap = tensor_output[5];
AppendRois<T>(rois, kBoxDim * num_rois, &sampled_rois);
AppendRois<int>(labels_int32, num_rois, &sampled_labels_int32);
......
......@@ -27,8 +27,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
class GenerateProposalsOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
......@@ -115,7 +113,7 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
context.GetPlace());
rpn_roi_probs->mutable_data<T>({scores->numel(), 1}, context.GetPlace());
Tensor bbox_deltas_swap, scores_swap;
phi::DenseTensor bbox_deltas_swap, scores_swap;
bbox_deltas_swap.mutable_data<T>({num, h_bbox, w_bbox, c_bbox},
dev_ctx.GetPlace());
scores_swap.mutable_data<T>({num, h_score, w_score, c_score},
......@@ -136,14 +134,14 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
int64_t num_proposals = 0;
for (int64_t i = 0; i < num; ++i) {
Tensor im_info_slice = im_info->Slice(i, i + 1);
Tensor bbox_deltas_slice = bbox_deltas_swap.Slice(i, i + 1);
Tensor scores_slice = scores_swap.Slice(i, i + 1);
phi::DenseTensor im_info_slice = im_info->Slice(i, i + 1);
phi::DenseTensor bbox_deltas_slice = bbox_deltas_swap.Slice(i, i + 1);
phi::DenseTensor scores_slice = scores_swap.Slice(i, i + 1);
bbox_deltas_slice.Resize({h_bbox * w_bbox * c_bbox / 4, 4});
scores_slice.Resize({h_score * w_score * c_score, 1});
std::pair<Tensor, Tensor> tensor_pair =
std::pair<phi::DenseTensor, phi::DenseTensor> tensor_pair =
ProposalForOneImage(dev_ctx,
im_info_slice,
anchors,
......@@ -155,8 +153,8 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
nms_thresh,
min_size,
eta);
Tensor &proposals = tensor_pair.first;
Tensor &scores = tensor_pair.second;
phi::DenseTensor &proposals = tensor_pair.first;
phi::DenseTensor &scores = tensor_pair.second;
AppendProposals(rpn_rois, 4 * num_proposals, proposals);
AppendProposals(rpn_roi_probs, num_proposals, scores);
......@@ -179,13 +177,13 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
rpn_roi_probs->Resize({num_proposals, 1});
}
std::pair<Tensor, Tensor> ProposalForOneImage(
std::pair<phi::DenseTensor, phi::DenseTensor> ProposalForOneImage(
const phi::CPUContext &ctx,
const Tensor &im_info_slice,
const Tensor &anchors,
const Tensor &variances,
const Tensor &bbox_deltas_slice, // [M, 4]
const Tensor &scores_slice, // [N, 1]
const phi::DenseTensor &im_info_slice,
const phi::DenseTensor &anchors,
const phi::DenseTensor &variances,
const phi::DenseTensor &bbox_deltas_slice, // [M, 4]
const phi::DenseTensor &scores_slice, // [N, 1]
int pre_nms_top_n,
int post_nms_top_n,
float nms_thresh,
......@@ -194,7 +192,7 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
auto *scores_data = scores_slice.data<T>();
// Sort index
Tensor index_t;
phi::DenseTensor index_t;
index_t.Resize({scores_slice.numel()});
int *index = index_t.mutable_data<int>(ctx.GetPlace());
for (int i = 0; i < scores_slice.numel(); ++i) {
......@@ -212,7 +210,7 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
index_t.Resize({pre_nms_top_n});
}
Tensor scores_sel, bbox_sel, anchor_sel, var_sel;
phi::DenseTensor scores_sel, bbox_sel, anchor_sel, var_sel;
scores_sel.mutable_data<T>({index_t.numel(), 1}, ctx.GetPlace());
bbox_sel.mutable_data<T>({index_t.numel(), 4}, ctx.GetPlace());
anchor_sel.mutable_data<T>({index_t.numel(), 4}, ctx.GetPlace());
......@@ -223,26 +221,26 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
phi::funcs::CPUGather<T>(ctx, anchors, index_t, &anchor_sel);
phi::funcs::CPUGather<T>(ctx, variances, index_t, &var_sel);
Tensor proposals;
phi::DenseTensor proposals;
proposals.mutable_data<T>({index_t.numel(), 4}, ctx.GetPlace());
BoxCoder<T>(ctx, &anchor_sel, &bbox_sel, &var_sel, &proposals);
ClipTiledBoxes<T>(ctx, im_info_slice, proposals, &proposals, false);
Tensor keep;
phi::DenseTensor keep;
FilterBoxes<T>(ctx, &proposals, min_size, im_info_slice, true, &keep);
// Handle the case when there is no keep index left
if (keep.numel() == 0) {
phi::funcs::SetConstant<phi::CPUContext, T> set_zero;
bbox_sel.mutable_data<T>({1, 4}, ctx.GetPlace());
set_zero(ctx, &bbox_sel, static_cast<T>(0));
Tensor scores_filter;
phi::DenseTensor scores_filter;
scores_filter.mutable_data<T>({1, 1}, ctx.GetPlace());
set_zero(ctx, &scores_filter, static_cast<T>(0));
return std::make_pair(bbox_sel, scores_filter);
}
Tensor scores_filter;
phi::DenseTensor scores_filter;
bbox_sel.mutable_data<T>({keep.numel(), 4}, ctx.GetPlace());
scores_filter.mutable_data<T>({keep.numel(), 1}, ctx.GetPlace());
phi::funcs::CPUGather<T>(ctx, proposals, keep, &bbox_sel);
......@@ -251,7 +249,7 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
return std::make_pair(bbox_sel, scores_filter);
}
Tensor keep_nms =
phi::DenseTensor keep_nms =
phi::funcs::NMS<T>(ctx, &bbox_sel, &scores_filter, nms_thresh, eta);
if (post_nms_top_n > 0 && post_nms_top_n < keep_nms.numel()) {
......
......@@ -28,24 +28,22 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
namespace {
template <typename T>
static std::pair<Tensor, Tensor> ProposalForOneImage(
static std::pair<phi::DenseTensor, phi::DenseTensor> ProposalForOneImage(
const phi::GPUContext &ctx,
const Tensor &im_info,
const Tensor &anchors,
const Tensor &variances,
const Tensor &bbox_deltas, // [M, 4]
const Tensor &scores, // [N, 1]
const phi::DenseTensor &im_info,
const phi::DenseTensor &anchors,
const phi::DenseTensor &variances,
const phi::DenseTensor &bbox_deltas, // [M, 4]
const phi::DenseTensor &scores, // [N, 1]
int pre_nms_top_n,
int post_nms_top_n,
float nms_thresh,
float min_size,
float eta) {
// 1. pre nms
Tensor scores_sort, index_sort;
phi::DenseTensor scores_sort, index_sort;
SortDescending<T>(ctx, scores, &scores_sort, &index_sort);
int num = scores.numel();
int pre_nms_num = (pre_nms_top_n <= 0 || pre_nms_top_n > num) ? scores.numel()
......@@ -54,7 +52,7 @@ static std::pair<Tensor, Tensor> ProposalForOneImage(
index_sort.Resize({pre_nms_num, 1});
// 2. box decode and clipping
Tensor proposals;
phi::DenseTensor proposals;
proposals.mutable_data<T>({pre_nms_num, 4}, ctx.GetPlace());
{
......@@ -68,7 +66,7 @@ static std::pair<Tensor, Tensor> ProposalForOneImage(
}
// 3. filter
Tensor keep_index, keep_num_t;
phi::DenseTensor keep_index, keep_num_t;
keep_index.mutable_data<int>({pre_nms_num}, ctx.GetPlace());
keep_num_t.mutable_data<int>({1}, ctx.GetPlace());
min_size = std::max(min_size, 1.0f);
......@@ -90,7 +88,7 @@ static std::pair<Tensor, Tensor> ProposalForOneImage(
ctx.Wait();
keep_index.Resize({keep_num});
Tensor scores_filter, proposals_filter;
phi::DenseTensor scores_filter, proposals_filter;
// Handle the case when there is no keep index left
if (keep_num == 0) {
phi::funcs::SetConstant<phi::GPUContext, T> set_zero;
......@@ -110,13 +108,13 @@ static std::pair<Tensor, Tensor> ProposalForOneImage(
}
// 4. nms
Tensor keep_nms;
phi::DenseTensor keep_nms;
NMS<T>(ctx, proposals_filter, keep_index, nms_thresh, &keep_nms);
if (post_nms_top_n > 0 && post_nms_top_n < keep_nms.numel()) {
keep_nms.Resize({post_nms_top_n});
}
Tensor scores_nms, proposals_nms;
phi::DenseTensor scores_nms, proposals_nms;
proposals_nms.mutable_data<T>({keep_nms.numel(), 4}, ctx.GetPlace());
scores_nms.mutable_data<T>({keep_nms.numel(), 1}, ctx.GetPlace());
phi::funcs::GPUGather<T>(ctx, proposals_filter, keep_nms, &proposals_nms);
......@@ -171,7 +169,7 @@ class CUDAGenerateProposalsKernel : public framework::OpKernel<T> {
int64_t h_bbox = bbox_dim[2];
int64_t w_bbox = bbox_dim[3];
Tensor bbox_deltas_swap, scores_swap;
phi::DenseTensor bbox_deltas_swap, scores_swap;
bbox_deltas_swap.mutable_data<T>({num, h_bbox, w_bbox, c_bbox},
dev_ctx.GetPlace());
scores_swap.mutable_data<T>({num, h_score, w_score, c_score},
......@@ -200,14 +198,14 @@ class CUDAGenerateProposalsKernel : public framework::OpKernel<T> {
std::vector<int> tmp_num;
for (int64_t i = 0; i < num; ++i) {
Tensor im_info_slice = im_info->Slice(i, i + 1);
Tensor bbox_deltas_slice = bbox_deltas_swap.Slice(i, i + 1);
Tensor scores_slice = scores_swap.Slice(i, i + 1);
phi::DenseTensor im_info_slice = im_info->Slice(i, i + 1);
phi::DenseTensor bbox_deltas_slice = bbox_deltas_swap.Slice(i, i + 1);
phi::DenseTensor scores_slice = scores_swap.Slice(i, i + 1);
bbox_deltas_slice.Resize({h_bbox * w_bbox * c_bbox / 4, 4});
scores_slice.Resize({h_score * w_score * c_score, 1});
std::pair<Tensor, Tensor> box_score_pair =
std::pair<phi::DenseTensor, phi::DenseTensor> box_score_pair =
ProposalForOneImage<T>(dev_ctx,
im_info_slice,
anchors,
......@@ -220,8 +218,8 @@ class CUDAGenerateProposalsKernel : public framework::OpKernel<T> {
min_size,
eta);
Tensor &proposals = box_score_pair.first;
Tensor &scores = box_score_pair.second;
phi::DenseTensor &proposals = box_score_pair.first;
phi::DenseTensor &scores = box_score_pair.second;
memory::Copy(place,
rpn_rois_data + num_proposals * 4,
......
......@@ -29,8 +29,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
class GenerateProposalsV2Op : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
......
......@@ -18,8 +18,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
template <typename T>
struct IouFunction {
public:
......@@ -182,21 +180,21 @@ class IouSimilarityMLUKernel : public framework::OpKernel<T> {
auto M = y->dims()[0];
out->mutable_data<T>({N, M}, place);
Tensor xt(_type);
Tensor yt(_type);
phi::DenseTensor xt(_type);
phi::DenseTensor yt(_type);
xt.mutable_data<T>({4, N}, place);
yt.mutable_data<T>({4, M}, place);
std::vector<int> vec_trans = {1, 0};
F.Transpose(x, &xt, vec_trans);
F.Transpose(y, &yt, vec_trans);
Tensor xmin1 = xt.Slice(0, 1);
Tensor ymin1 = xt.Slice(1, 2);
Tensor xmax1 = xt.Slice(2, 3);
Tensor ymax1 = xt.Slice(3, 4);
Tensor xmin2 = yt.Slice(0, 1);
Tensor ymin2 = yt.Slice(1, 2);
Tensor xmax2 = yt.Slice(2, 3);
Tensor ymax2 = yt.Slice(3, 4);
phi::DenseTensor xmin1 = xt.Slice(0, 1);
phi::DenseTensor ymin1 = xt.Slice(1, 2);
phi::DenseTensor xmax1 = xt.Slice(2, 3);
phi::DenseTensor ymax1 = xt.Slice(3, 4);
phi::DenseTensor xmin2 = yt.Slice(0, 1);
phi::DenseTensor ymin2 = yt.Slice(1, 2);
phi::DenseTensor xmax2 = yt.Slice(2, 3);
phi::DenseTensor ymax2 = yt.Slice(3, 4);
xmin1.Resize({N, 1});
ymin1.Resize({N, 1});
xmax1.Resize({N, 1});
......@@ -206,12 +204,12 @@ class IouSimilarityMLUKernel : public framework::OpKernel<T> {
xmax2.Resize({1, M});
ymax2.Resize({1, M});
Tensor w1(_type);
Tensor h1(_type);
Tensor w2(_type);
Tensor h2(_type);
Tensor area1(_type);
Tensor area2(_type);
phi::DenseTensor w1(_type);
phi::DenseTensor h1(_type);
phi::DenseTensor w2(_type);
phi::DenseTensor h2(_type);
phi::DenseTensor area1(_type);
phi::DenseTensor area2(_type);
w1.mutable_data<T>({N, 1}, place);
h1.mutable_data<T>({N, 1}, place);
w2.mutable_data<T>({1, M}, place);
......@@ -231,10 +229,10 @@ class IouSimilarityMLUKernel : public framework::OpKernel<T> {
F.Mul(&w1, &h1, &area1);
F.Mul(&w2, &h2, &area2);
Tensor inter_xmax(_type);
Tensor inter_ymax(_type);
Tensor inter_xmin(_type);
Tensor inter_ymin(_type);
phi::DenseTensor inter_xmax(_type);
phi::DenseTensor inter_ymax(_type);
phi::DenseTensor inter_xmin(_type);
phi::DenseTensor inter_ymin(_type);
inter_xmax.mutable_data<T>({N, M}, place);
inter_ymax.mutable_data<T>({N, M}, place);
inter_xmin.mutable_data<T>({N, M}, place);
......@@ -244,8 +242,8 @@ class IouSimilarityMLUKernel : public framework::OpKernel<T> {
F.Maximum(&xmin1, &xmin2, &inter_xmin);
F.Maximum(&ymin1, &ymin2, &inter_ymin);
Tensor inter_w(_type);
Tensor inter_h(_type);
phi::DenseTensor inter_w(_type);
phi::DenseTensor inter_h(_type);
inter_w.mutable_data<T>({N, M}, place);
inter_h.mutable_data<T>({N, M}, place);
F.Sub(&inter_xmax, &inter_xmin, &inter_w);
......@@ -255,14 +253,14 @@ class IouSimilarityMLUKernel : public framework::OpKernel<T> {
F.Adds(&inter_w, 1.0f, &inter_w);
F.Adds(&inter_h, 1.0f, &inter_h);
}
Tensor zeros(_type);
phi::DenseTensor zeros(_type);
zeros.mutable_data<T>({1}, place);
FillMLUTensorWithHostValue<T>(ctx, static_cast<T>(0), &zeros);
F.Maximum(&inter_w, &zeros, &inter_w);
F.Maximum(&inter_h, &zeros, &inter_h);
F.Mul(&inter_w, &inter_h, out);
Tensor union_area(_type);
phi::DenseTensor union_area(_type);
union_area.mutable_data<T>({N, M}, place);
F.Add(&area1, &area2, &union_area);
F.Sub(&union_area, out, &union_area);
......
......@@ -18,8 +18,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
template <typename T>
struct IouFunction {
public:
......@@ -108,21 +106,21 @@ class IouSimilarityNPUKernel : public framework::OpKernel<T> {
auto M = y->dims()[0];
out->mutable_data<T>({N, M}, place);
Tensor xt(_type);
Tensor yt(_type);
phi::DenseTensor xt(_type);
phi::DenseTensor yt(_type);
xt.mutable_data<T>({4, N}, place);
yt.mutable_data<T>({4, M}, place);
std::vector<int> vec_trans = {1, 0};
F.Transpose(x, &xt, vec_trans);
F.Transpose(y, &yt, vec_trans);
Tensor xmin1 = xt.Slice(0, 1);
Tensor ymin1 = xt.Slice(1, 2);
Tensor xmax1 = xt.Slice(2, 3);
Tensor ymax1 = xt.Slice(3, 4);
Tensor xmin2 = yt.Slice(0, 1);
Tensor ymin2 = yt.Slice(1, 2);
Tensor xmax2 = yt.Slice(2, 3);
Tensor ymax2 = yt.Slice(3, 4);
phi::DenseTensor xmin1 = xt.Slice(0, 1);
phi::DenseTensor ymin1 = xt.Slice(1, 2);
phi::DenseTensor xmax1 = xt.Slice(2, 3);
phi::DenseTensor ymax1 = xt.Slice(3, 4);
phi::DenseTensor xmin2 = yt.Slice(0, 1);
phi::DenseTensor ymin2 = yt.Slice(1, 2);
phi::DenseTensor xmax2 = yt.Slice(2, 3);
phi::DenseTensor ymax2 = yt.Slice(3, 4);
xmin1.Resize({N, 1});
ymin1.Resize({N, 1});
xmax1.Resize({N, 1});
......@@ -132,12 +130,12 @@ class IouSimilarityNPUKernel : public framework::OpKernel<T> {
xmax2.Resize({1, M});
ymax2.Resize({1, M});
Tensor w1(_type);
Tensor h1(_type);
Tensor w2(_type);
Tensor h2(_type);
Tensor area1(_type);
Tensor area2(_type);
phi::DenseTensor w1(_type);
phi::DenseTensor h1(_type);
phi::DenseTensor w2(_type);
phi::DenseTensor h2(_type);
phi::DenseTensor area1(_type);
phi::DenseTensor area2(_type);
w1.mutable_data<T>({N, 1}, place);
h1.mutable_data<T>({N, 1}, place);
w2.mutable_data<T>({1, M}, place);
......@@ -157,10 +155,10 @@ class IouSimilarityNPUKernel : public framework::OpKernel<T> {
F.Mul(&w1, &h1, &area1);
F.Mul(&w2, &h2, &area2);
Tensor inter_xmax(_type);
Tensor inter_ymax(_type);
Tensor inter_xmin(_type);
Tensor inter_ymin(_type);
phi::DenseTensor inter_xmax(_type);
phi::DenseTensor inter_ymax(_type);
phi::DenseTensor inter_xmin(_type);
phi::DenseTensor inter_ymin(_type);
inter_xmax.mutable_data<T>({N, M}, place);
inter_ymax.mutable_data<T>({N, M}, place);
inter_xmin.mutable_data<T>({N, M}, place);
......@@ -170,8 +168,8 @@ class IouSimilarityNPUKernel : public framework::OpKernel<T> {
F.Maximum(&xmin1, &xmin2, &inter_xmin);
F.Maximum(&ymin1, &ymin2, &inter_ymin);
Tensor inter_w(_type);
Tensor inter_h(_type);
phi::DenseTensor inter_w(_type);
phi::DenseTensor inter_h(_type);
inter_w.mutable_data<T>({N, M}, place);
inter_h.mutable_data<T>({N, M}, place);
F.Sub(&inter_xmax, &inter_xmin, &inter_w);
......@@ -181,14 +179,14 @@ class IouSimilarityNPUKernel : public framework::OpKernel<T> {
F.Adds(&inter_w, 1.0f, &inter_w);
F.Adds(&inter_h, 1.0f, &inter_h);
}
Tensor zeros(_type);
phi::DenseTensor zeros(_type);
zeros.mutable_data<T>({1}, place);
FillNpuTensorWithConstant<T>(&zeros, static_cast<T>(0));
F.Maximum(&inter_w, &zeros, &inter_w);
F.Maximum(&inter_h, &zeros, &inter_h);
F.Mul(&inter_w, &inter_h, out);
Tensor union_area(_type);
phi::DenseTensor union_area(_type);
union_area.mutable_data<T>({N, M}, place);
F.Add(&area1, &area2, &union_area);
F.Sub(&union_area, out, &union_area);
......
......@@ -19,8 +19,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
class LocalityAwareNMSOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
......@@ -252,7 +250,7 @@ class LocalityAwareNMSKernel : public framework::OpKernel<T> {
int num_det = 0;
int64_t class_num = scores->dims()[0];
Tensor bbox_slice, score_slice;
phi::DenseTensor bbox_slice, score_slice;
for (int64_t c = 0; c < class_num; ++c) {
if (c == background_label) continue;
......@@ -325,7 +323,7 @@ class LocalityAwareNMSKernel : public framework::OpKernel<T> {
auto* bboxes_data = bboxes.data<T>();
auto* odata = outs->data<T>();
const T* sdata;
Tensor bbox;
phi::DenseTensor bbox;
bbox.Resize({scores.dims()[0], box_size});
int count = 0;
for (const auto& it : selected_indices) {
......@@ -370,7 +368,7 @@ class LocalityAwareNMSKernel : public framework::OpKernel<T> {
int64_t box_dim = boxes.dims()[2];
int64_t out_dim = box_dim + 2;
int num_nmsed_out = 0;
Tensor boxes_slice, scores_slice;
phi::DenseTensor boxes_slice, scores_slice;
int n = batch_size;
for (int i = 0; i < n; ++i) {
scores_slice = scores.Slice(i, i + 1);
......@@ -407,7 +405,7 @@ class LocalityAwareNMSKernel : public framework::OpKernel<T> {
int64_t s = batch_starts[i];
int64_t e = batch_starts[i + 1];
if (e > s) {
Tensor out = outs->Slice(s, e);
phi::DenseTensor out = outs->Slice(s, e);
LocalityAwareNMSOutput(dev_ctx,
scores_slice,
boxes_slice,
......
......@@ -20,8 +20,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
class MatrixNMSOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
......
......@@ -21,8 +21,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
inline std::vector<size_t> GetNmsLodFromRoisNum(
const phi::DenseTensor* rois_num) {
std::vector<size_t> rois_lod;
......@@ -228,7 +226,7 @@ class MultiClassNMSKernel : public framework::OpKernel<T> {
int num_det = 0;
int64_t class_num = scores_size == 3 ? scores.dims()[0] : scores.dims()[1];
Tensor bbox_slice, score_slice;
phi::DenseTensor bbox_slice, score_slice;
for (int64_t c = 0; c < class_num; ++c) {
if (c == background_label) continue;
if (scores_size == 3) {
......@@ -319,7 +317,7 @@ class MultiClassNMSKernel : public framework::OpKernel<T> {
auto* bboxes_data = bboxes.data<T>();
auto* odata = outs->data<T>();
const T* sdata;
Tensor bbox;
phi::DenseTensor bbox;
bbox.Resize({scores.dims()[0], box_size});
int count = 0;
for (const auto& it : selected_indices) {
......@@ -373,7 +371,7 @@ class MultiClassNMSKernel : public framework::OpKernel<T> {
int64_t box_dim = boxes->dims()[2];
int64_t out_dim = box_dim + 2;
int num_nmsed_out = 0;
Tensor boxes_slice, scores_slice;
phi::DenseTensor boxes_slice, scores_slice;
int n = 0;
if (has_roisnum) {
n = score_size == 3 ? batch_size : rois_num->numel();
......@@ -449,7 +447,7 @@ class MultiClassNMSKernel : public framework::OpKernel<T> {
int64_t s = batch_starts[i];
int64_t e = batch_starts[i + 1];
if (e > s) {
Tensor out = outs->Slice(s, e);
phi::DenseTensor out = outs->Slice(s, e);
if (return_index) {
int* output_idx =
index->mutable_data<int>({num_kept, 1}, ctx.GetPlace());
......
......@@ -17,8 +17,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
template <typename DeviceContext, typename T>
class PolygonBoxTransformCPUKernel : public framework::OpKernel<T> {
public:
......
......@@ -19,7 +19,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
using phi::PADDLE_CUDA_NUM_THREADS;
#define CUDA_BLOCK_SIZE 16
......
......@@ -18,8 +18,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
template <typename DeviceContext, typename T>
class PriorBoxNPUKernel : public framework::OpKernel<T> {
public:
......@@ -50,7 +48,7 @@ class PriorBoxNPUKernel : public framework::OpKernel<T> {
auto place = ctx.GetPlace();
Tensor out(input->type());
phi::DenseTensor out(input->type());
auto out_dims = phi::vectorize(boxes->dims());
out_dims.insert(out_dims.begin(), 2);
out.Resize(phi::make_ddim(out_dims));
......@@ -75,8 +73,8 @@ class PriorBoxNPUKernel : public framework::OpKernel<T> {
runner.Run(stream);
out.Resize(phi::make_ddim({out.numel()}));
Tensor out_boxes = out.Slice(0, boxes->numel());
Tensor out_variances = out.Slice(boxes->numel(), out.numel());
phi::DenseTensor out_boxes = out.Slice(0, boxes->numel());
phi::DenseTensor out_variances = out.Slice(boxes->numel(), out.numel());
out_boxes.Resize(boxes->dims());
out_variances.Resize(variances->dims());
......
......@@ -18,8 +18,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
class RetinanetDetectionOutputOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
......@@ -409,9 +407,9 @@ class RetinanetDetectionOutputKernel : public framework::OpKernel<T> {
}
void RetinanetDetectionOutput(const framework::ExecutionContext& ctx,
const std::vector<Tensor>& scores,
const std::vector<Tensor>& bboxes,
const std::vector<Tensor>& anchors,
const std::vector<phi::DenseTensor>& scores,
const std::vector<phi::DenseTensor>& bboxes,
const std::vector<phi::DenseTensor>& anchors,
const phi::DenseTensor& im_info,
std::vector<std::vector<T>>* nmsed_out,
int* num_nmsed_out) const {
......@@ -425,11 +423,11 @@ class RetinanetDetectionOutputKernel : public framework::OpKernel<T> {
std::map<int, std::vector<std::vector<T>>> preds;
for (size_t l = 0; l < scores.size(); ++l) {
// Fetch per level score
Tensor scores_per_level = scores[l];
phi::DenseTensor scores_per_level = scores[l];
// Fetch per level bbox
Tensor bboxes_per_level = bboxes[l];
phi::DenseTensor bboxes_per_level = bboxes[l];
// Fetch per level anchor
Tensor anchors_per_level = anchors[l];
phi::DenseTensor anchors_per_level = anchors[l];
int64_t scores_num = scores_per_level.numel();
int64_t bboxes_num = bboxes_per_level.numel();
......@@ -492,9 +490,9 @@ class RetinanetDetectionOutputKernel : public framework::OpKernel<T> {
auto* im_info = ctx.Input<phi::DenseTensor>("ImInfo");
auto* outs = ctx.Output<phi::DenseTensor>("Out");
std::vector<Tensor> boxes_list(boxes.size());
std::vector<Tensor> scores_list(scores.size());
std::vector<Tensor> anchors_list(anchors.size());
std::vector<phi::DenseTensor> boxes_list(boxes.size());
std::vector<phi::DenseTensor> scores_list(scores.size());
std::vector<phi::DenseTensor> anchors_list(anchors.size());
for (size_t j = 0; j < boxes_list.size(); ++j) {
boxes_list[j] = *boxes[j];
scores_list[j] = *scores[j];
......@@ -512,8 +510,8 @@ class RetinanetDetectionOutputKernel : public framework::OpKernel<T> {
std::vector<size_t> batch_starts = {0};
for (int i = 0; i < batch_size; ++i) {
int num_nmsed_out = 0;
std::vector<Tensor> box_per_batch_list(boxes_list.size());
std::vector<Tensor> score_per_batch_list(scores_list.size());
std::vector<phi::DenseTensor> box_per_batch_list(boxes_list.size());
std::vector<phi::DenseTensor> score_per_batch_list(scores_list.size());
for (size_t j = 0; j < boxes_list.size(); ++j) {
const auto& score_dims = scores_list[j].dims();
score_per_batch_list[j] = scores_list[j].Slice(i, i + 1);
......@@ -521,7 +519,7 @@ class RetinanetDetectionOutputKernel : public framework::OpKernel<T> {
box_per_batch_list[j] = boxes_list[j].Slice(i, i + 1);
box_per_batch_list[j].Resize({score_dims[1], box_dim});
}
Tensor im_info_slice = im_info->Slice(i, i + 1);
phi::DenseTensor im_info_slice = im_info->Slice(i, i + 1);
std::vector<std::vector<T>> nmsed_out;
RetinanetDetectionOutput(ctx,
......@@ -544,7 +542,7 @@ class RetinanetDetectionOutputKernel : public framework::OpKernel<T> {
int64_t s = batch_starts[i];
int64_t e = batch_starts[i + 1];
if (e > s) {
Tensor out = outs->Slice(s, e);
phi::DenseTensor out = outs->Slice(s, e);
MultiClassOutput(dev_ctx, all_nmsed_out[i], &out);
}
}
......@@ -563,7 +561,8 @@ class RetinanetDetectionOutputOpMaker
void Make() override {
AddInput("BBoxes",
"(List) A list of tensors from multiple FPN levels. Each "
"element is a 3-D Tensor with shape [N, Mi, 4] represents the "
"element is a 3-D phi::DenseTensor with shape [N, Mi, 4] "
"represents the "
"predicted locations of Mi bounding boxes, N is the batch size. "
"Mi is the number of bounding boxes from i-th FPN level. Each "
"bounding box has four coordinate values and the layout is "
......@@ -571,18 +570,20 @@ class RetinanetDetectionOutputOpMaker
.AsDuplicable();
AddInput("Scores",
"(List) A list of tensors from multiple FPN levels. Each "
"element is a 3-D Tensor with shape [N, Mi, C] represents the "
"element is a 3-D phi::DenseTensor with shape [N, Mi, C] "
"represents the "
"predicted confidence from its FPN level. N is the batch size, "
"C is the class number (excluding background), Mi is the number "
"of bounding boxes from i-th FPN level. For each bounding box, "
"there are total C scores.")
.AsDuplicable();
AddInput("Anchors",
"(List) A list of tensors from multiple FPN levels. Each"
"element is a 2-D Tensor with shape [Mi, 4] represents the "
"locations of Mi anchor boxes from i-th FPN level. Each "
"bounding box has four coordinate values and the layout is "
"[xmin, ymin, xmax, ymax].")
AddInput(
"Anchors",
"(List) A list of tensors from multiple FPN levels. Each"
"element is a 2-D phi::DenseTensor with shape [Mi, 4] represents the "
"locations of Mi anchor boxes from i-th FPN level. Each "
"bounding box has four coordinate values and the layout is "
"[xmin, ymin, xmax, ymax].")
.AsDuplicable();
AddInput("ImInfo",
"(phi::DenseTensor) A 2-D phi::DenseTensor with shape [N, 3] "
......
......@@ -22,8 +22,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
template <typename T>
bool GT_E(T a, T b) {
return (a > b) || fabs(a - b) < 1e-4;
......@@ -600,7 +598,7 @@ class ROIPerspectiveTransformOpMaker
public:
void Make() override {
AddInput("X",
"(Tensor), "
"(phi::DenseTensor), "
"the input of ROIPerspectiveTransformOp. "
"The format of input tensor is NCHW. Where N is batch size, "
"C is the number of input channels, "
......@@ -617,28 +615,28 @@ class ROIPerspectiveTransformOpMaker
"(x4, y4) is the bottom left coordinates.");
AddOutput(
"Out",
"(Tensor), "
"(phi::DenseTensor), "
"The output of ROIPerspectiveTransformOp is a 4-D tensor with shape "
"(num_rois, channels, transformed_h, transformed_w).");
AddOutput("Mask",
"(Tensor), "
"(phi::DenseTensor), "
"The output mask of ROIPerspectiveTransformOp is a 4-D tensor "
"with shape "
"(num_rois, 1, transformed_h, transformed_w).");
AddOutput("TransformMatrix",
"(Tensor), "
"(phi::DenseTensor), "
"The output transform matrix of ROIPerspectiveTransformOp is a "
"1-D tensor with shape "
"(num_rois, 9).");
AddOutput("Out2InIdx",
"(Tensor), "
"(phi::DenseTensor), "
"An intermediate tensor used to map indexes of input feature map "
"and indexes of output feature map."
"The shape of the tensor is [out_size, 4] and out_size is the "
"number of elements in output feature map.")
.AsIntermediate();
AddOutput("Out2InWeights",
"(Tensor), "
"(phi::DenseTensor), "
"An intermediate tensor used to record the weights of bilinear "
"interpolatein for each element in output. The shape of the "
"tensor is [out_size, 4] and out_size is the number of elements "
......
......@@ -21,7 +21,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
template <typename T,
int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
......@@ -113,11 +112,12 @@ void AppendRpns(phi::DenseTensor* out,
}
template <typename T>
std::vector<Tensor> FilterStraddleAnchor(const phi::CPUContext& context,
const phi::DenseTensor* anchor,
const float rpn_straddle_thresh,
T im_height,
T im_width) {
std::vector<phi::DenseTensor> FilterStraddleAnchor(
const phi::CPUContext& context,
const phi::DenseTensor* anchor,
const float rpn_straddle_thresh,
T im_height,
T im_width) {
std::vector<int> inds_inside;
int anchor_num = anchor->dims()[0];
auto* anchor_data = anchor->data<T>();
......@@ -138,25 +138,25 @@ std::vector<Tensor> FilterStraddleAnchor(const phi::CPUContext& context,
}
}
int inside_num = inds_inside.size();
Tensor inds_inside_t;
phi::DenseTensor inds_inside_t;
int* inds_inside_data =
inds_inside_t.mutable_data<int>({inside_num}, context.GetPlace());
std::copy(inds_inside.begin(), inds_inside.end(), inds_inside_data);
Tensor inside_anchor_t;
phi::DenseTensor inside_anchor_t;
T* inside_anchor_data =
inside_anchor_t.mutable_data<T>({inside_num, 4}, context.GetPlace());
Gather<T>(
anchor->data<T>(), 4, inds_inside_data, inside_num, inside_anchor_data);
std::vector<Tensor> res;
std::vector<phi::DenseTensor> res;
res.emplace_back(inds_inside_t);
res.emplace_back(inside_anchor_t);
return res;
}
template <typename T>
Tensor FilterCrowdGt(const phi::CPUContext& context,
phi::DenseTensor* gt_boxes,
phi::DenseTensor* is_crowd) {
phi::DenseTensor FilterCrowdGt(const phi::CPUContext& context,
phi::DenseTensor* gt_boxes,
phi::DenseTensor* is_crowd) {
int gt_num = gt_boxes->dims()[0];
std::vector<int> not_crowd_inds;
auto* is_crowd_data = is_crowd->data<int>();
......@@ -166,7 +166,7 @@ Tensor FilterCrowdGt(const phi::CPUContext& context,
}
}
int ncrowd_num = not_crowd_inds.size();
Tensor ncrowd_gt_boxes;
phi::DenseTensor ncrowd_gt_boxes;
T* ncrowd_gt_boxes_data =
ncrowd_gt_boxes.mutable_data<T>({ncrowd_num, 4}, context.GetPlace());
Gather<T>(gt_boxes->data<T>(),
......@@ -300,7 +300,7 @@ void ScoreAssign(const T* anchor_by_gt_overlap_data,
}
template <typename T>
std::vector<Tensor> SampleRpnFgBgGt(
std::vector<phi::DenseTensor> SampleRpnFgBgGt(
const phi::CPUContext& ctx,
const phi::DenseTensor& anchor_by_gt_overlap,
const int rpn_batch_size_per_im,
......@@ -322,7 +322,7 @@ std::vector<Tensor> SampleRpnFgBgGt(
// Calculate the max IoU between anchors and gt boxes
// Map from anchor to gt box that has highest overlap
auto place = ctx.GetPlace();
Tensor anchor_to_gt_max, anchor_to_gt_argmax, gt_to_anchor_max;
phi::DenseTensor anchor_to_gt_max, anchor_to_gt_argmax, gt_to_anchor_max;
anchor_to_gt_max.mutable_data<T>({anchor_num}, place);
int* argmax = anchor_to_gt_argmax.mutable_data<int>({anchor_num}, place);
gt_to_anchor_max.mutable_data<T>({gt_num}, place);
......@@ -365,7 +365,8 @@ std::vector<Tensor> SampleRpnFgBgGt(
for (int i = 0; i < fg_fake_num; ++i) {
gt_inds.emplace_back(argmax[fg_fake[i]]);
}
Tensor loc_index_t, score_index_t, tgt_lbl_t, gt_inds_t, bbox_inside_weight_t;
phi::DenseTensor loc_index_t, score_index_t, tgt_lbl_t, gt_inds_t,
bbox_inside_weight_t;
int* loc_index_data = loc_index_t.mutable_data<int>({fg_fake_num}, place);
int* score_index_data =
score_index_t.mutable_data<int>({fg_num + bg_num}, place);
......@@ -381,7 +382,7 @@ std::vector<Tensor> SampleRpnFgBgGt(
std::copy(bbox_inside_weight.begin(),
bbox_inside_weight.end(),
bbox_inside_weight_data);
std::vector<Tensor> loc_score_tgtlbl_gt;
std::vector<phi::DenseTensor> loc_score_tgtlbl_gt;
loc_score_tgtlbl_gt.emplace_back(loc_index_t);
loc_score_tgtlbl_gt.emplace_back(score_index_t);
loc_score_tgtlbl_gt.emplace_back(tgt_lbl_t);
......@@ -455,30 +456,30 @@ class RpnTargetAssignKernel : public framework::OpKernel<T> {
auto gt_boxes_lod = gt_boxes->lod().back();
auto is_crowd_lod = is_crowd->lod().back();
for (int i = 0; i < batch_num; ++i) {
Tensor gt_boxes_slice =
phi::DenseTensor gt_boxes_slice =
gt_boxes->Slice(gt_boxes_lod[i], gt_boxes_lod[i + 1]);
Tensor is_crowd_slice =
phi::DenseTensor is_crowd_slice =
is_crowd->Slice(is_crowd_lod[i], is_crowd_lod[i + 1]);
Tensor im_info_slice = im_info->Slice(i, i + 1);
phi::DenseTensor im_info_slice = im_info->Slice(i, i + 1);
auto* im_info_data = im_info_slice.data<T>();
auto im_height = im_info_data[0];
auto im_width = im_info_data[1];
auto im_scale = im_info_data[2];
// Filter straddle anchor
std::vector<Tensor> filter_output = FilterStraddleAnchor<T>(
std::vector<phi::DenseTensor> filter_output = FilterStraddleAnchor<T>(
dev_ctx, anchor, rpn_straddle_thresh, im_height, im_width);
Tensor inds_inside = filter_output[0];
Tensor inside_anchor = filter_output[1];
phi::DenseTensor inds_inside = filter_output[0];
phi::DenseTensor inside_anchor = filter_output[1];
// Filter crowd gt
Tensor ncrowd_gt_boxes =
phi::DenseTensor ncrowd_gt_boxes =
FilterCrowdGt<T>(dev_ctx, &gt_boxes_slice, &is_crowd_slice);
auto ncrowd_gt_boxes_et =
framework::EigenTensor<T, 2>::From(ncrowd_gt_boxes);
ncrowd_gt_boxes_et = ncrowd_gt_boxes_et * im_scale;
Tensor anchor_by_gt_overlap;
phi::DenseTensor anchor_by_gt_overlap;
anchor_by_gt_overlap.mutable_data<T>(
{inside_anchor.dims()[0], ncrowd_gt_boxes.dims()[0]}, place);
BboxOverlaps<T>(inside_anchor, ncrowd_gt_boxes, &anchor_by_gt_overlap);
......@@ -492,16 +493,16 @@ class RpnTargetAssignKernel : public framework::OpKernel<T> {
engine,
use_random);
Tensor sampled_loc_index = loc_score_tgtlbl_gt[0];
Tensor sampled_score_index = loc_score_tgtlbl_gt[1];
Tensor sampled_tgtlbl = loc_score_tgtlbl_gt[2];
Tensor sampled_gt_index = loc_score_tgtlbl_gt[3];
Tensor sampled_bbox_inside_weight = loc_score_tgtlbl_gt[4];
phi::DenseTensor sampled_loc_index = loc_score_tgtlbl_gt[0];
phi::DenseTensor sampled_score_index = loc_score_tgtlbl_gt[1];
phi::DenseTensor sampled_tgtlbl = loc_score_tgtlbl_gt[2];
phi::DenseTensor sampled_gt_index = loc_score_tgtlbl_gt[3];
phi::DenseTensor sampled_bbox_inside_weight = loc_score_tgtlbl_gt[4];
int loc_num = sampled_loc_index.dims()[0];
int score_num = sampled_score_index.dims()[0];
// unmap to all anchor
Tensor sampled_loc_index_unmap, sampled_score_index_unmap;
phi::DenseTensor sampled_loc_index_unmap, sampled_score_index_unmap;
sampled_loc_index_unmap.mutable_data<int>({loc_num}, place);
sampled_score_index_unmap.mutable_data<int>({score_num}, place);
Gather<int>(inds_inside.data<int>(),
......@@ -516,7 +517,7 @@ class RpnTargetAssignKernel : public framework::OpKernel<T> {
sampled_score_index_unmap.data<int>());
// get target bbox deltas
Tensor sampled_anchor, sampled_gt, sampled_tgt_bbox;
phi::DenseTensor sampled_anchor, sampled_gt, sampled_tgt_bbox;
auto* sampled_anchor_data =
sampled_anchor.mutable_data<T>({loc_num, 4}, place);
auto* sampled_gt_data = sampled_gt.mutable_data<T>({loc_num, 4}, place);
......@@ -859,10 +860,11 @@ class RetinanetTargetAssignOp : public framework::OperatorWithKernel {
};
template <typename T>
std::vector<Tensor> FilterCrowdGtBoxLabel(const phi::CPUContext& context,
phi::DenseTensor* gt_boxes,
phi::DenseTensor* gt_labels,
phi::DenseTensor* is_crowd) {
std::vector<phi::DenseTensor> FilterCrowdGtBoxLabel(
const phi::CPUContext& context,
phi::DenseTensor* gt_boxes,
phi::DenseTensor* gt_labels,
phi::DenseTensor* is_crowd) {
int gt_num = gt_boxes->dims()[0];
std::vector<int> not_crowd_inds;
auto* is_crowd_data = is_crowd->data<int>();
......@@ -872,7 +874,7 @@ std::vector<Tensor> FilterCrowdGtBoxLabel(const phi::CPUContext& context,
}
}
int ncrowd_num = not_crowd_inds.size();
Tensor ncrowd_gt_boxes, ncrowd_gt_labels;
phi::DenseTensor ncrowd_gt_boxes, ncrowd_gt_labels;
T* ncrowd_gt_boxes_data =
ncrowd_gt_boxes.mutable_data<T>({ncrowd_num, 4}, context.GetPlace());
int* ncrowd_gt_labels_data =
......@@ -887,19 +889,20 @@ std::vector<Tensor> FilterCrowdGtBoxLabel(const phi::CPUContext& context,
not_crowd_inds.data(),
ncrowd_num,
ncrowd_gt_labels_data);
std::vector<Tensor> res;
std::vector<phi::DenseTensor> res;
res.emplace_back(ncrowd_gt_boxes);
res.emplace_back(ncrowd_gt_labels);
return res;
}
template <typename T>
std::vector<Tensor> GetAllFgBgGt(const phi::CPUContext& ctx,
const phi::DenseTensor& anchor_by_gt_overlap,
const phi::DenseTensor& ncrowd_gt_labels,
const float positive_overlap,
const float negative_overlap,
std::minstd_rand engine) {
std::vector<phi::DenseTensor> GetAllFgBgGt(
const phi::CPUContext& ctx,
const phi::DenseTensor& anchor_by_gt_overlap,
const phi::DenseTensor& ncrowd_gt_labels,
const float positive_overlap,
const float negative_overlap,
std::minstd_rand engine) {
auto* anchor_by_gt_overlap_data = anchor_by_gt_overlap.data<T>();
int anchor_num = anchor_by_gt_overlap.dims()[0];
int gt_num = anchor_by_gt_overlap.dims()[1];
......@@ -913,7 +916,7 @@ std::vector<Tensor> GetAllFgBgGt(const phi::CPUContext& ctx,
// Calculate the max IoU between anchors and gt boxes
// Map from anchor to gt box that has highest overlap
auto place = ctx.GetPlace();
Tensor anchor_to_gt_max, anchor_to_gt_argmax, gt_to_anchor_max;
phi::DenseTensor anchor_to_gt_max, anchor_to_gt_argmax, gt_to_anchor_max;
anchor_to_gt_max.mutable_data<T>({anchor_num}, place);
int* argmax = anchor_to_gt_argmax.mutable_data<int>({anchor_num}, place);
gt_to_anchor_max.mutable_data<T>({gt_num}, place);
......@@ -961,8 +964,9 @@ std::vector<Tensor> GetAllFgBgGt(const phi::CPUContext& ctx,
gt_inds.emplace_back(argmax[fg_fake[i]]);
}
Tensor loc_index_t, score_index_t, tgt_lbl_t, gt_inds_t, bbox_inside_weight_t;
Tensor fg_num_t;
phi::DenseTensor loc_index_t, score_index_t, tgt_lbl_t, gt_inds_t,
bbox_inside_weight_t;
phi::DenseTensor fg_num_t;
int* loc_index_data = loc_index_t.mutable_data<int>({fg_fake_num}, place);
int* score_index_data =
score_index_t.mutable_data<int>({fg_num + bg_num}, place);
......@@ -980,7 +984,7 @@ std::vector<Tensor> GetAllFgBgGt(const phi::CPUContext& ctx,
bbox_inside_weight.end(),
bbox_inside_weight_data);
fg_num_data[0] = fg_fake.size() + 1;
std::vector<Tensor> loc_score_tgtlbl_gt;
std::vector<phi::DenseTensor> loc_score_tgtlbl_gt;
loc_score_tgtlbl_gt.emplace_back(loc_index_t);
loc_score_tgtlbl_gt.emplace_back(score_index_t);
loc_score_tgtlbl_gt.emplace_back(tgt_lbl_t);
......@@ -1065,35 +1069,35 @@ class RetinanetTargetAssignKernel : public framework::OpKernel<T> {
auto gt_labels_lod = gt_labels->lod().back();
auto is_crowd_lod = is_crowd->lod().back();
for (int i = 0; i < batch_num; ++i) {
Tensor gt_boxes_slice =
phi::DenseTensor gt_boxes_slice =
gt_boxes->Slice(gt_boxes_lod[i], gt_boxes_lod[i + 1]);
Tensor gt_labels_slice =
phi::DenseTensor gt_labels_slice =
gt_labels->Slice(gt_labels_lod[i], gt_labels_lod[i + 1]);
Tensor is_crowd_slice =
phi::DenseTensor is_crowd_slice =
is_crowd->Slice(is_crowd_lod[i], is_crowd_lod[i + 1]);
Tensor im_info_slice = im_info->Slice(i, i + 1);
phi::DenseTensor im_info_slice = im_info->Slice(i, i + 1);
auto* im_info_data = im_info_slice.data<T>();
auto im_height = im_info_data[0];
auto im_width = im_info_data[1];
auto im_scale = im_info_data[2];
// Filter straddle anchor
std::vector<Tensor> filter_output =
std::vector<phi::DenseTensor> filter_output =
FilterStraddleAnchor<T>(dev_ctx, anchor, -1, im_height, im_width);
Tensor inds_inside = filter_output[0];
Tensor inside_anchor = filter_output[1];
phi::DenseTensor inds_inside = filter_output[0];
phi::DenseTensor inside_anchor = filter_output[1];
// Filter crowd gt
std::vector<Tensor> ncrowd_output = FilterCrowdGtBoxLabel<T>(
std::vector<phi::DenseTensor> ncrowd_output = FilterCrowdGtBoxLabel<T>(
dev_ctx, &gt_boxes_slice, &gt_labels_slice, &is_crowd_slice);
Tensor ncrowd_gt_boxes = ncrowd_output[0];
Tensor ncrowd_gt_labels = ncrowd_output[1];
phi::DenseTensor ncrowd_gt_boxes = ncrowd_output[0];
phi::DenseTensor ncrowd_gt_labels = ncrowd_output[1];
auto ncrowd_gt_boxes_et =
framework::EigenTensor<T, 2>::From(ncrowd_gt_boxes);
ncrowd_gt_boxes_et = ncrowd_gt_boxes_et * im_scale;
Tensor anchor_by_gt_overlap;
phi::DenseTensor anchor_by_gt_overlap;
anchor_by_gt_overlap.mutable_data<T>(
{inside_anchor.dims()[0], ncrowd_gt_boxes.dims()[0]}, place);
BboxOverlaps<T>(inside_anchor, ncrowd_gt_boxes, &anchor_by_gt_overlap);
......@@ -1105,17 +1109,17 @@ class RetinanetTargetAssignKernel : public framework::OpKernel<T> {
negative_overlap,
engine);
Tensor sampled_loc_index = loc_score_tgtlbl_gt[0];
Tensor sampled_score_index = loc_score_tgtlbl_gt[1];
Tensor sampled_tgtlbl = loc_score_tgtlbl_gt[2];
Tensor sampled_gt_index = loc_score_tgtlbl_gt[3];
Tensor sampled_bbox_inside_weight = loc_score_tgtlbl_gt[4];
Tensor sampled_fg_num = loc_score_tgtlbl_gt[5];
phi::DenseTensor sampled_loc_index = loc_score_tgtlbl_gt[0];
phi::DenseTensor sampled_score_index = loc_score_tgtlbl_gt[1];
phi::DenseTensor sampled_tgtlbl = loc_score_tgtlbl_gt[2];
phi::DenseTensor sampled_gt_index = loc_score_tgtlbl_gt[3];
phi::DenseTensor sampled_bbox_inside_weight = loc_score_tgtlbl_gt[4];
phi::DenseTensor sampled_fg_num = loc_score_tgtlbl_gt[5];
int loc_num = sampled_loc_index.dims()[0];
int score_num = sampled_score_index.dims()[0];
// unmap to all anchor
Tensor sampled_loc_index_unmap, sampled_score_index_unmap;
phi::DenseTensor sampled_loc_index_unmap, sampled_score_index_unmap;
sampled_loc_index_unmap.mutable_data<int>({loc_num}, place);
sampled_score_index_unmap.mutable_data<int>({score_num}, place);
Gather<int>(inds_inside.data<int>(),
......@@ -1130,7 +1134,7 @@ class RetinanetTargetAssignKernel : public framework::OpKernel<T> {
sampled_score_index_unmap.data<int>());
// get target bbox deltas
Tensor sampled_anchor, sampled_gt, sampled_tgt_bbox;
phi::DenseTensor sampled_anchor, sampled_gt, sampled_tgt_bbox;
auto* sampled_anchor_data =
sampled_anchor.mutable_data<T>({loc_num, 4}, place);
auto* sampled_gt_data = sampled_gt.mutable_data<T>({loc_num, 4}, place);
......
......@@ -19,8 +19,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
static constexpr int kNumCUDAThreads = 512;
static constexpr int kNumMaxinumNumBlocks = 4096;
......@@ -123,10 +121,10 @@ template <typename DeviceContext, typename T>
class GPUSigmoidFocalLossKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &context) const override {
const Tensor *X = context.Input<phi::DenseTensor>("X");
const Tensor *Labels = context.Input<phi::DenseTensor>("Label");
const Tensor *FgNum = context.Input<phi::DenseTensor>("FgNum");
Tensor *Out = context.Output<phi::DenseTensor>("Out");
const phi::DenseTensor *X = context.Input<phi::DenseTensor>("X");
const phi::DenseTensor *Labels = context.Input<phi::DenseTensor>("Label");
const phi::DenseTensor *FgNum = context.Input<phi::DenseTensor>("FgNum");
phi::DenseTensor *Out = context.Output<phi::DenseTensor>("Out");
T gamma = static_cast<T>(context.Attr<float>("gamma"));
T alpha = static_cast<T>(context.Attr<float>("alpha"));
auto x_dims = X->dims();
......@@ -154,12 +152,13 @@ template <typename DeviceContext, typename T>
class GPUSigmoidFocalLossGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &context) const override {
const Tensor *X = context.Input<phi::DenseTensor>("X");
const Tensor *Labels = context.Input<phi::DenseTensor>("Label");
const Tensor *FgNum = context.Input<phi::DenseTensor>("FgNum");
const Tensor *dOut =
const phi::DenseTensor *X = context.Input<phi::DenseTensor>("X");
const phi::DenseTensor *Labels = context.Input<phi::DenseTensor>("Label");
const phi::DenseTensor *FgNum = context.Input<phi::DenseTensor>("FgNum");
const phi::DenseTensor *dOut =
context.Input<phi::DenseTensor>(framework::GradVarName("Out"));
Tensor *dX = context.Output<phi::DenseTensor>(framework::GradVarName("X"));
phi::DenseTensor *dX =
context.Output<phi::DenseTensor>(framework::GradVarName("X"));
auto dx_data = dX->mutable_data<T>(context.GetPlace());
T gamma = static_cast<T>(context.Attr<float>("gamma"));
T alpha = static_cast<T>(context.Attr<float>("alpha"));
......
......@@ -22,16 +22,14 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
template <typename DeviceContext, typename T>
class SigmoidFocalLossKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &context) const override {
const Tensor *X = context.Input<phi::DenseTensor>("X");
const Tensor *Labels = context.Input<phi::DenseTensor>("Label");
const Tensor *FgNum = context.Input<phi::DenseTensor>("FgNum");
Tensor *Out = context.Output<phi::DenseTensor>("Out");
const phi::DenseTensor *X = context.Input<phi::DenseTensor>("X");
const phi::DenseTensor *Labels = context.Input<phi::DenseTensor>("Label");
const phi::DenseTensor *FgNum = context.Input<phi::DenseTensor>("FgNum");
phi::DenseTensor *Out = context.Output<phi::DenseTensor>("Out");
T gamma = static_cast<T>(context.Attr<float>("gamma"));
T alpha = static_cast<T>(context.Attr<float>("alpha"));
auto out_data = Out->mutable_data<T>(context.GetPlace());
......@@ -79,12 +77,13 @@ template <typename DeviceContext, typename T>
class SigmoidFocalLossGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &context) const override {
const Tensor *X = context.Input<phi::DenseTensor>("X");
const Tensor *Labels = context.Input<phi::DenseTensor>("Label");
const Tensor *FgNum = context.Input<phi::DenseTensor>("FgNum");
const Tensor *dOut =
const phi::DenseTensor *X = context.Input<phi::DenseTensor>("X");
const phi::DenseTensor *Labels = context.Input<phi::DenseTensor>("Label");
const phi::DenseTensor *FgNum = context.Input<phi::DenseTensor>("FgNum");
const phi::DenseTensor *dOut =
context.Input<phi::DenseTensor>(framework::GradVarName("Out"));
Tensor *dX = context.Output<phi::DenseTensor>(framework::GradVarName("X"));
phi::DenseTensor *dX =
context.Output<phi::DenseTensor>(framework::GradVarName("X"));
auto dx_data = dX->mutable_data<T>(context.GetPlace());
T gamma = static_cast<T>(context.Attr<float>("gamma"));
T alpha = static_cast<T>(context.Attr<float>("alpha"));
......
......@@ -77,7 +77,7 @@ class YoloBoxMLUKernel : public framework::OpKernel<T> {
MLUOpTensorDesc x_desc(*x, MLUOP_LAYOUT_ARRAY, ToMluOpDataType<T>());
MLUOpTensorDesc img_size_desc(
*img_size, MLUOP_LAYOUT_ARRAY, ToMluOpDataType<int32_t>());
Tensor anchors_temp(framework::TransToPhiDataType(VT::INT32));
phi::DenseTensor anchors_temp(framework::TransToPhiDataType(VT::INT32));
anchors_temp.Resize({size});
paddle::framework::TensorFromVector(
anchors, ctx.device_context(), &anchors_temp);
......
......@@ -19,8 +19,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
class DetectionMAPOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
......
......@@ -21,8 +21,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
template <typename DeviceContext, typename T>
class DGCClipByNormKernel : public framework::OpKernel<T> {
public:
......
......@@ -18,8 +18,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
template <typename T>
class DropoutMLUKernel : public framework::OpKernel<T> {
public:
......@@ -106,8 +104,8 @@ class DropoutMLUKernel : public framework::OpKernel<T> {
}
// In downgrade_in_infer mode, need to multiply (1.0f - dropout_prob).
Tensor scale_tensor(x->dtype());
Tensor bias_tensor(x->dtype());
phi::DenseTensor scale_tensor(x->dtype());
phi::DenseTensor bias_tensor(x->dtype());
scale_tensor.mutable_data<T>({1}, ctx.GetPlace());
bias_tensor.mutable_data<T>({1}, ctx.GetPlace());
MLUCnnlTensorDesc scale_desc(scale_tensor);
......@@ -157,7 +155,7 @@ class DropoutGradMLUKernel : public framework::OpKernel<T> {
}
// cast mask from uint8 to float32/float16
Tensor cast_mask(grad_x->dtype());
phi::DenseTensor cast_mask(grad_x->dtype());
cast_mask.Resize(mask->dims());
cast_mask.mutable_data<T>(ctx.GetPlace());
......
......@@ -23,8 +23,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
template <typename DeviceContext, typename T>
class DropoutNPUKernel : public framework::OpKernel<T> {
public:
......@@ -56,8 +54,8 @@ class DropoutNPUKernel : public framework::OpKernel<T> {
// only achieve the default `upscale_in_train` method
if (!is_test) {
Tensor tmp_x(x->dtype());
Tensor tmp_out(out->dtype());
phi::DenseTensor tmp_x(x->dtype());
phi::DenseTensor tmp_out(out->dtype());
tmp_x.ShareDataWith(*x);
tmp_out.ShareDataWith(*out);
if (x->dims().size() == 1) {
......@@ -80,7 +78,7 @@ class DropoutNPUKernel : public framework::OpKernel<T> {
seed = ctx.Attr<bool>("fix_seed") ? ctx.Attr<int>("seed") : 0;
}
Tensor keep_prob_tensor(x->dtype());
phi::DenseTensor keep_prob_tensor(x->dtype());
keep_prob_tensor.mutable_data<T>({1}, ctx.GetPlace());
FillNpuTensorWithConstant<T>(&keep_prob_tensor,
static_cast<T>(keep_prob));
......@@ -89,14 +87,14 @@ class DropoutNPUKernel : public framework::OpKernel<T> {
// mask used in `DropOutGenMask` NPU OP is different from
// the output `Mask`.
Tensor npu_mask(experimental::DataType::UINT8);
phi::DenseTensor npu_mask(experimental::DataType::UINT8);
uint32_t length = (x->numel() + 128 - 1) / 128 * 128;
npu_mask.Resize(phi::make_ddim({length / 8}));
npu_mask.mutable_data<uint8_t>(ctx.GetPlace());
// TODO(pangyoki): `keep_prob` used in `DropOutGenMask` NPU
// OP must be a scalar with shape[0]. At present, the shape
// of the `prob` Tensor of this OP is forced to be set to 0
// of the `prob` phi::DenseTensor of this OP is forced to be set to 0
// in `npu_op_runner.cc`, which needs to be optimized later.
NpuOpRunner runner_gen_mask;
runner_gen_mask.SetType("DropOutGenMask")
......@@ -116,7 +114,7 @@ class DropoutNPUKernel : public framework::OpKernel<T> {
runner_dropout.Run(stream);
// cast `out` from float/float16 to bool
Tensor cast_mask(experimental::DataType::BOOL);
phi::DenseTensor cast_mask(experimental::DataType::BOOL);
cast_mask.Resize(mask->dims());
cast_mask.mutable_data<bool>(ctx.GetPlace());
auto dst_dtype_bool =
......@@ -176,7 +174,7 @@ class DropoutGradNPUKernel : public framework::OpKernel<T> {
}
// cast mask from uint8 to float32/float16
Tensor cast_mask(dx->dtype());
phi::DenseTensor cast_mask(dx->dtype());
cast_mask.Resize(mask->dims());
cast_mask.mutable_data<T>(ctx.GetPlace());
auto dst_dtype =
......
......@@ -16,7 +16,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
template <typename T>
class ElementwiseAddMLUKernel : public framework::OpKernel<T> {
......
......@@ -21,7 +21,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
template <typename T>
class ElementwiseAddNPUKernel : public framework::OpKernel<T> {
......@@ -53,7 +52,7 @@ class ElementwiseAddNPUKernel : public framework::OpKernel<T> {
const auto& runner = NpuOpRunner("Add", {*x, *y}, {*out}, {});
runner.Run(dev_ctx.stream());
} else {
Tensor transformed_x, transformed_y;
phi::DenseTensor transformed_x, transformed_y;
NpuElementWiseOpBroadcast<T>(
dev_ctx, x, y, axis, &transformed_x, &transformed_y);
const auto& runner =
......@@ -96,7 +95,7 @@ class ElementwiseAddGradNPUKernel : public framework::OpKernel<T> {
}
}
if (!reduce_axes.empty()) {
Tensor tmp;
phi::DenseTensor tmp;
tmp.ShareDataWith(*dx);
tmp.Resize(phi::make_ddim(dst_dims_vec));
const auto& runner =
......@@ -128,7 +127,7 @@ class ElementwiseAddGradNPUKernel : public framework::OpKernel<T> {
}
}
if (!reduce_axes.empty()) {
Tensor tmp;
phi::DenseTensor tmp;
tmp.ShareDataWith(*dy);
tmp.Resize(phi::make_ddim(dst_dims_vec));
const auto& runner =
......
......@@ -24,7 +24,6 @@ namespace operators {
class ElementwiseDivOpDoubleGrad : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
using Tensor = phi::DenseTensor;
void InferShape(framework::InferShapeContext* ctx) const override {
auto y_grad_name = framework::GradVarName("Y");
......
......@@ -21,8 +21,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
template <typename T>
class ElementwiseDivMLUKernel : public framework::OpKernel<T> {
public:
......@@ -66,7 +64,7 @@ class ElementwiseDivGradMLUKernel : public framework::OpKernel<T> {
CNNL_OP_TENSOR_MUL, ToCnnlDataType<T>(), CNNL_NOT_PROPAGATE_NAN);
// compute dout/y == 1/y * dout
Tensor dout_div_y(dout->dtype());
phi::DenseTensor dout_div_y(dout->dtype());
dout_div_y.Resize(dout->dims());
dout_div_y.mutable_data<T>(ctx.GetPlace());
MLUBinary<DIV>(ctx,
......@@ -110,7 +108,7 @@ class ElementwiseDivGradMLUKernel : public framework::OpKernel<T> {
if (dy) {
// compute dy = -out * (dout/y) = -out/y * dout
Tensor neg_out(out->type());
phi::DenseTensor neg_out(out->type());
neg_out.mutable_data<T>(out->dims(), ctx.GetPlace());
MLUCnnlTensorDesc out_desc(*out);
......@@ -121,7 +119,7 @@ class ElementwiseDivGradMLUKernel : public framework::OpKernel<T> {
out_desc.get(),
GetBasePtr(&neg_out));
Tensor dy_temp(y->dtype());
phi::DenseTensor dy_temp(y->dtype());
dy_temp.Resize(dout->dims());
dy_temp.mutable_data<T>(ctx.GetPlace());
......
......@@ -21,8 +21,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
template <typename DeviceContext, typename T>
class ElementwiseDivNPUKernel : public framework::OpKernel<T> {
public:
......@@ -66,38 +64,38 @@ class ElementwiseDivGradNPUKernel : public framework::OpKernel<T> {
if (dx) {
dx->mutable_data<T>(place);
Tensor tensor_one(y->type());
phi::DenseTensor tensor_one(y->type());
tensor_one.mutable_data<float>({1}, place);
FillNpuTensorWithConstant<float>(&tensor_one, static_cast<float>(1.0));
// Use `Div` CANN OP to achieve `1/y` instead of `Power` CANN OP.
// Because `Power` will cause precision overflow, that is, `float_status`
// will be set to 1.
Tensor y_div(y->type());
phi::DenseTensor y_div(y->type());
y_div.mutable_data<T>(y->dims(), place);
const auto& runner_one_div_y =
NpuOpRunner("Div", {tensor_one, *y}, {y_div}, {});
runner_one_div_y.Run(stream);
Tensor tensor_zeros(x->type());
phi::DenseTensor tensor_zeros(x->type());
tensor_zeros.mutable_data<T>(x->dims(), place);
const auto& runner_tensor_zeros =
NpuOpRunner("ZerosLike", {*x}, {tensor_zeros}, {});
runner_tensor_zeros.Run(stream);
Tensor x_zero(experimental::DataType::BOOL);
phi::DenseTensor x_zero(experimental::DataType::BOOL);
x_zero.mutable_data<bool>(x->dims(), place);
const auto& runner_x_zero =
NpuOpRunner("Equal", {*x, tensor_zeros}, {x_zero}, {});
runner_x_zero.Run(stream);
Tensor x_nozero(experimental::DataType::BOOL);
phi::DenseTensor x_nozero(experimental::DataType::BOOL);
x_nozero.mutable_data<bool>(x->dims(), place);
const auto& runner_x_nonzero =
NpuOpRunner("LogicalNot", {x_zero}, {x_nozero}, {});
runner_x_nonzero.Run(stream);
Tensor x_nozero_f(x->type());
phi::DenseTensor x_nozero_f(x->type());
x_nozero_f.mutable_data<T>(x->dims(), place);
const auto& runner_x_nonzero_f =
NpuOpRunner("Cast",
......@@ -106,7 +104,7 @@ class ElementwiseDivGradNPUKernel : public framework::OpKernel<T> {
{{"dst_type", static_cast<int32_t>(0)}});
runner_x_nonzero_f.Run(stream);
Tensor x_grad_w(x->type());
phi::DenseTensor x_grad_w(x->type());
x_grad_w.mutable_data<T>(x->dims(), place);
const auto& runner_x_grad_w =
NpuOpRunner("Mul", {x_nozero_f, y_div}, {x_grad_w}, {});
......@@ -120,19 +118,19 @@ class ElementwiseDivGradNPUKernel : public framework::OpKernel<T> {
if (dy) {
dy->mutable_data<T>(place);
Tensor neg_out(out->type());
phi::DenseTensor neg_out(out->type());
neg_out.mutable_data<T>(out->dims(), place);
const auto& runner_neg_out = NpuOpRunner("Neg", {*out}, {neg_out}, {});
runner_neg_out.Run(stream);
Tensor tmp_mul(out->type());
phi::DenseTensor tmp_mul(out->type());
tmp_mul.mutable_data<T>(out->dims(), place);
const auto& runner_mul =
NpuOpRunner("Mul", {neg_out, *dout}, {tmp_mul}, {});
runner_mul.Run(stream);
if (dy->dims() != dout->dims()) {
Tensor reduced_tmp_mul(y->type());
phi::DenseTensor reduced_tmp_mul(y->type());
reduced_tmp_mul.mutable_data<T>(y->dims(), place);
std::vector<int64_t> axes;
......
......@@ -21,8 +21,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
template <typename T>
class ElementwiseFloorDivNPUKernel : public framework::OpKernel<T> {
public:
......
......@@ -18,8 +18,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = phi::DenseTensor;
template <typename DeviceContext, typename T>
class ElementwiseMaxNPUKernel : public framework::OpKernel<T> {
public:
......@@ -51,7 +49,7 @@ class ElementwiseMaxNPUKernel : public framework::OpKernel<T> {
const auto& runner = NpuOpRunner("Maximum", {*x, *y}, {*out}, {});
runner.Run(stream);
} else {
Tensor transformed_x, transformed_y;
phi::DenseTensor transformed_x, transformed_y;
NpuElementWiseOpBroadcast<T>(
dev_ctx, x, y, axis, &transformed_x, &transformed_y);
const auto& runner =
......@@ -85,7 +83,7 @@ class ElementwiseMaxGradNPUKernel : public framework::OpKernel<T> {
auto x_dims = x->dims();
auto y_dims = y->dims();
axis = (axis == -1 ? std::abs(x_dims.size() - y_dims.size()) : axis);
Tensor transformed_x, transformed_y;
phi::DenseTensor transformed_x, transformed_y;
NpuElementWiseOpBroadcast<T>(
dev_ctx, x, y, axis, &transformed_x, &transformed_y);
......@@ -99,9 +97,9 @@ class ElementwiseMaxGradNPUKernel : public framework::OpKernel<T> {
if (dx && dy) {
dx->mutable_data<T>(ctx.GetPlace());
dy->mutable_data<T>(ctx.GetPlace());
Tensor tmp_dx;
phi::DenseTensor tmp_dx;
tmp_dx.mutable_data<T>(dout_dims, ctx.GetPlace());
Tensor tmp_dy;
phi::DenseTensor tmp_dy;
tmp_dy.mutable_data<T>(dout_dims, ctx.GetPlace());
const auto& runner = NpuOpRunner("MaximumGrad",
......@@ -153,12 +151,12 @@ class ElementwiseMaxGradNPUKernel : public framework::OpKernel<T> {
}
} else if (dx) {
Tensor zero_tensor(dout->type());
phi::DenseTensor zero_tensor(dout->type());
zero_tensor.mutable_data<T>(dout_dims, ctx.GetPlace());
FillNpuTensorWithConstant<T>(&zero_tensor, static_cast<T>(0));
dx->mutable_data<T>(ctx.GetPlace());
Tensor tmp_dx;
phi::DenseTensor tmp_dx;
tmp_dx.mutable_data<T>(dout_dims, ctx.GetPlace());
const auto& runner = NpuOpRunner("MaximumGrad",
......@@ -190,12 +188,12 @@ class ElementwiseMaxGradNPUKernel : public framework::OpKernel<T> {
}
} else if (dy) {
Tensor zero_tensor(dout->type());
phi::DenseTensor zero_tensor(dout->type());
zero_tensor.mutable_data<T>(dout_dims, ctx.GetPlace());
FillNpuTensorWithConstant<T>(&zero_tensor, static_cast<T>(0));
dy->mutable_data<T>(ctx.GetPlace());
Tensor tmp_dy;
phi::DenseTensor tmp_dy;
tmp_dy.mutable_data<T>(dout_dims, ctx.GetPlace());
const auto& runner = NpuOpRunner("MaximumGrad",
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册