未验证 提交 65420271 编写于 作者: 张春乔 提交者: GitHub

[phi::DenseTensor] Replace Tensor with phi::DenseTensor (#48682)

上级 693de9f0
...@@ -644,11 +644,11 @@ void GradientAccumulator::CallGradientHooks() { ...@@ -644,11 +644,11 @@ void GradientAccumulator::CallGradientHooks() {
true, true,
platform::errors::PreconditionNotMet( platform::errors::PreconditionNotMet(
"Only can call gradient hooks after sum gradient completed.")); "Only can call gradient hooks after sum gradient completed."));
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(HasInnerVar(),
HasInnerVar(), true,
true, platform::errors::PreconditionNotMet(
platform::errors::PreconditionNotMet( "Leaf Tensor's inner var is nullptr when "
"Leaf Tensor's inner var is nullptr when call gradient hook.")); "call gradient hook."));
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
inner_var_->Var().IsInitialized(), inner_var_->Var().IsInitialized(),
true, true,
......
...@@ -18,8 +18,6 @@ limitations under the Licnse. */ ...@@ -18,8 +18,6 @@ limitations under the Licnse. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
template <typename T> template <typename T>
class AbsMLUKernel : public framework::OpKernel<T> { class AbsMLUKernel : public framework::OpKernel<T> {
public: public:
...@@ -54,7 +52,7 @@ class AbsGradMLUKernel : public framework::OpKernel<T> { ...@@ -54,7 +52,7 @@ class AbsGradMLUKernel : public framework::OpKernel<T> {
MLUCnnlOpTensorDesc mul_op_desc( MLUCnnlOpTensorDesc mul_op_desc(
CNNL_OP_TENSOR_MUL, ToCnnlDataType<T>(), CNNL_NOT_PROPAGATE_NAN); CNNL_OP_TENSOR_MUL, ToCnnlDataType<T>(), CNNL_NOT_PROPAGATE_NAN);
Tensor sign_x; phi::DenseTensor sign_x;
sign_x.mutable_data<T>(x->dims(), ctx.GetPlace()); sign_x.mutable_data<T>(x->dims(), ctx.GetPlace());
MLUCnnl::Sign(ctx, MLUCnnl::Sign(ctx,
......
...@@ -18,8 +18,6 @@ limitations under the Licnse. */ ...@@ -18,8 +18,6 @@ limitations under the Licnse. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
class AbsNPUKernel : public framework::OpKernel<T> { class AbsNPUKernel : public framework::OpKernel<T> {
public: public:
......
...@@ -21,8 +21,6 @@ limitations under the Licnse. */ ...@@ -21,8 +21,6 @@ limitations under the Licnse. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
template <cnnlActivationMode_t act_mode, typename T> template <cnnlActivationMode_t act_mode, typename T>
class ActivationMLUKernel : public framework::OpKernel<T> { class ActivationMLUKernel : public framework::OpKernel<T> {
public: public:
...@@ -442,7 +440,7 @@ class ReciprocalGradMLUKernel : public framework::OpKernel<T> { ...@@ -442,7 +440,7 @@ class ReciprocalGradMLUKernel : public framework::OpKernel<T> {
auto* dx = ctx.Output<phi::DenseTensor>(framework::GradVarName("X")); auto* dx = ctx.Output<phi::DenseTensor>(framework::GradVarName("X"));
auto place = ctx.GetPlace(); auto place = ctx.GetPlace();
dx->mutable_data<T>(place); dx->mutable_data<T>(place);
Tensor square_out; phi::DenseTensor square_out;
square_out.Resize(out->dims()); square_out.Resize(out->dims());
square_out.mutable_data<T>(place); square_out.mutable_data<T>(place);
MLUCnnlTensorDesc out_desc(*out); MLUCnnlTensorDesc out_desc(*out);
......
...@@ -28,8 +28,6 @@ limitations under the License. */ ...@@ -28,8 +28,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
class AffineGridOp : public framework::OperatorWithKernel { class AffineGridOp : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
......
...@@ -21,8 +21,6 @@ limitations under the License. */ ...@@ -21,8 +21,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
class AllocFloatStatusKernel : public framework::OpKernel<T> { class AllocFloatStatusKernel : public framework::OpKernel<T> {
public: public:
......
...@@ -19,8 +19,6 @@ limitations under the License. */ ...@@ -19,8 +19,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
template <typename T> template <typename T>
class CheckFiniteAndUnscaleMLUKernel : public framework::OpKernel<T> { class CheckFiniteAndUnscaleMLUKernel : public framework::OpKernel<T> {
using MPDType = typename details::MPTypeTrait<T>::Type; using MPDType = typename details::MPTypeTrait<T>::Type;
...@@ -45,7 +43,7 @@ class CheckFiniteAndUnscaleMLUKernel : public framework::OpKernel<T> { ...@@ -45,7 +43,7 @@ class CheckFiniteAndUnscaleMLUKernel : public framework::OpKernel<T> {
out->mutable_data<T>(ctx.GetPlace()); out->mutable_data<T>(ctx.GetPlace());
// check is_finite or is_nan // check is_finite or is_nan
Tensor is_finite(found_inf->type()); phi::DenseTensor is_finite(found_inf->type());
if (i != 0) { if (i != 0) {
is_finite.Resize(phi::make_ddim({1})); is_finite.Resize(phi::make_ddim({1}));
is_finite.mutable_data<bool>(ctx.GetPlace()); is_finite.mutable_data<bool>(ctx.GetPlace());
...@@ -78,8 +76,8 @@ class CheckFiniteAndUnscaleMLUKernel : public framework::OpKernel<T> { ...@@ -78,8 +76,8 @@ class CheckFiniteAndUnscaleMLUKernel : public framework::OpKernel<T> {
// out = in/scale, if found_inf = false // out = in/scale, if found_inf = false
// But when found_inf is true, the data of Out should not be used. // But when found_inf is true, the data of Out should not be used.
// So, on MLU, we always compute out with in/scale. // So, on MLU, we always compute out with in/scale.
Tensor float_x; phi::DenseTensor float_x;
Tensor float_out; phi::DenseTensor float_out;
if (std::is_same<T, paddle::platform::float16>::value) { if (std::is_same<T, paddle::platform::float16>::value) {
float_x.Resize(x->dims()); float_x.Resize(x->dims());
float_out.Resize(out->dims()); float_out.Resize(out->dims());
......
...@@ -22,8 +22,6 @@ limitations under the License. */ ...@@ -22,8 +22,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
// NOTE(zhiqiu): The CheckFiniteAndUnscaleNPUKernel is different from CUDA. // NOTE(zhiqiu): The CheckFiniteAndUnscaleNPUKernel is different from CUDA.
// On NPU, we do not really check the data of input tensors, // On NPU, we do not really check the data of input tensors,
// but use NPUGetFloatStatus to check whether the nan/inf occurs on device, // but use NPUGetFloatStatus to check whether the nan/inf occurs on device,
...@@ -47,13 +45,13 @@ class CheckFiniteAndUnscaleNPUKernel : public framework::OpKernel<T> { ...@@ -47,13 +45,13 @@ class CheckFiniteAndUnscaleNPUKernel : public framework::OpKernel<T> {
.stream(); .stream();
// step1: inverse scale // step1: inverse scale
Tensor const_tensor; phi::DenseTensor const_tensor;
const_tensor.mutable_data<T>({1}, ctx.GetPlace()); const_tensor.mutable_data<T>({1}, ctx.GetPlace());
FillNpuTensorWithConstant<T>(&const_tensor, static_cast<T>(1.0)); FillNpuTensorWithConstant<T>(&const_tensor, static_cast<T>(1.0));
// Inverse(1.0/scale) // Inverse(1.0/scale)
phi::DenseTensor* tmp_inverse_out = const_cast<phi::DenseTensor*>(scale); phi::DenseTensor* tmp_inverse_out = const_cast<phi::DenseTensor*>(scale);
Tensor inverse_out(scale->type()); phi::DenseTensor inverse_out(scale->type());
inverse_out.Resize(scale->dims()); inverse_out.Resize(scale->dims());
inverse_out.mutable_data<T>(ctx.GetPlace()); inverse_out.mutable_data<T>(ctx.GetPlace());
const auto& runner_inverse = const auto& runner_inverse =
...@@ -62,7 +60,7 @@ class CheckFiniteAndUnscaleNPUKernel : public framework::OpKernel<T> { ...@@ -62,7 +60,7 @@ class CheckFiniteAndUnscaleNPUKernel : public framework::OpKernel<T> {
tmp_inverse_out = &inverse_out; tmp_inverse_out = &inverse_out;
// NOTE(zhiqiu): // NOTE(zhiqiu):
Tensor tmp; phi::DenseTensor tmp;
tmp.mutable_data<float>({8}, ctx.GetPlace()); tmp.mutable_data<float>({8}, ctx.GetPlace());
// NOTE(zhiqiu): NPUGetFloatStatus updates data on input in-place. // NOTE(zhiqiu): NPUGetFloatStatus updates data on input in-place.
// tmp is only placeholder. // tmp is only placeholder.
...@@ -73,7 +71,7 @@ class CheckFiniteAndUnscaleNPUKernel : public framework::OpKernel<T> { ...@@ -73,7 +71,7 @@ class CheckFiniteAndUnscaleNPUKernel : public framework::OpKernel<T> {
{{"message", std::string("check_nan_and_inf")}}); {{"message", std::string("check_nan_and_inf")}});
runner_float_status.Run(stream); runner_float_status.Run(stream);
Tensor sum; phi::DenseTensor sum;
sum.mutable_data<float>({1}, ctx.GetPlace()); sum.mutable_data<float>({1}, ctx.GetPlace());
const auto& runner_reduce_sum = const auto& runner_reduce_sum =
NpuOpRunner("ReduceSumD", NpuOpRunner("ReduceSumD",
......
...@@ -31,8 +31,6 @@ limitations under the License. */ ...@@ -31,8 +31,6 @@ limitations under the License. */
namespace f = paddle::framework; namespace f = paddle::framework;
namespace p = paddle::platform; namespace p = paddle::platform;
using Tensor = phi::DenseTensor;
USE_OP_ITSELF(check_finite_and_unscale); USE_OP_ITSELF(check_finite_and_unscale);
USE_OP_DEVICE_KERNEL(check_finite_and_unscale, NPU); USE_OP_DEVICE_KERNEL(check_finite_and_unscale, NPU);
...@@ -110,7 +108,7 @@ void Compare(f::Scope *scope, const p::DeviceContext &ctx) { ...@@ -110,7 +108,7 @@ void Compare(f::Scope *scope, const p::DeviceContext &ctx) {
ctx.Wait(); ctx.Wait();
// out found_inf // out found_inf
Tensor found_inf_tensor; phi::DenseTensor found_inf_tensor;
found_inf_tensor.Resize({1}); found_inf_tensor.Resize({1});
bool *found_inf_data = bool *found_inf_data =
found_inf_tensor.mutable_data<bool>(paddle::platform::CPUPlace()); found_inf_tensor.mutable_data<bool>(paddle::platform::CPUPlace());
......
...@@ -21,8 +21,6 @@ limitations under the License. */ ...@@ -21,8 +21,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
class ClearFloatStatusKernel : public framework::OpKernel<T> { class ClearFloatStatusKernel : public framework::OpKernel<T> {
public: public:
...@@ -35,7 +33,7 @@ class ClearFloatStatusKernel : public framework::OpKernel<T> { ...@@ -35,7 +33,7 @@ class ClearFloatStatusKernel : public framework::OpKernel<T> {
platform::errors::PreconditionNotMet( platform::errors::PreconditionNotMet(
"The input(FloatStatus) and Output(FloatStatusOut) " "The input(FloatStatus) and Output(FloatStatusOut) "
"should be the same.")); "should be the same."));
Tensor tmp; phi::DenseTensor tmp;
tmp.mutable_data<float>({8}, ctx.GetPlace()); tmp.mutable_data<float>({8}, ctx.GetPlace());
const auto& runner = const auto& runner =
NpuOpRunner("NPUClearFloatStatus", {tmp}, {*float_status_out}); NpuOpRunner("NPUClearFloatStatus", {tmp}, {*float_status_out});
......
...@@ -21,8 +21,6 @@ limitations under the License. */ ...@@ -21,8 +21,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
class GetFloatStatusKernel : public framework::OpKernel<T> { class GetFloatStatusKernel : public framework::OpKernel<T> {
public: public:
...@@ -35,7 +33,7 @@ class GetFloatStatusKernel : public framework::OpKernel<T> { ...@@ -35,7 +33,7 @@ class GetFloatStatusKernel : public framework::OpKernel<T> {
platform::errors::PreconditionNotMet( platform::errors::PreconditionNotMet(
"The input(FloatStatus) and Output(FloatStatusOut) " "The input(FloatStatus) and Output(FloatStatusOut) "
"should be the same.")); "should be the same."));
Tensor tmp; phi::DenseTensor tmp;
tmp.mutable_data<float>({8}, ctx.GetPlace()); tmp.mutable_data<float>({8}, ctx.GetPlace());
auto stream = auto stream =
ctx.template device_context<paddle::platform::NPUDeviceContext>() ctx.template device_context<paddle::platform::NPUDeviceContext>()
......
...@@ -25,8 +25,6 @@ DECLARE_int32(min_loss_scaling); ...@@ -25,8 +25,6 @@ DECLARE_int32(min_loss_scaling);
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
template <typename T> template <typename T>
void Update(const platform::NPUDeviceContext& ctx, void Update(const platform::NPUDeviceContext& ctx,
const std::vector<bool> found_inf_vec, const std::vector<bool> found_inf_vec,
...@@ -50,7 +48,7 @@ void Update(const platform::NPUDeviceContext& ctx, ...@@ -50,7 +48,7 @@ void Update(const platform::NPUDeviceContext& ctx,
good_out_tensor->numel() * sizeof(int), good_out_tensor->numel() * sizeof(int),
stream); stream);
// bad_out_data = bad_in_data + 1 // bad_out_data = bad_in_data + 1
Tensor factor_tensor(bad_out_tensor->dtype()); phi::DenseTensor factor_tensor(bad_out_tensor->dtype());
factor_tensor.mutable_data<int>({1}, place); factor_tensor.mutable_data<int>({1}, place);
FillNpuTensorWithConstant<int>(&factor_tensor, static_cast<int>(1)); FillNpuTensorWithConstant<int>(&factor_tensor, static_cast<int>(1));
const auto& runner_p2 = NpuOpRunner( const auto& runner_p2 = NpuOpRunner(
...@@ -106,7 +104,7 @@ void Update(const platform::NPUDeviceContext& ctx, ...@@ -106,7 +104,7 @@ void Update(const platform::NPUDeviceContext& ctx,
stream); stream);
// good_out_data = good_in_data + 1 // good_out_data = good_in_data + 1
Tensor factor_tensor(good_out_tensor->dtype()); phi::DenseTensor factor_tensor(good_out_tensor->dtype());
factor_tensor.mutable_data<int>({1}, place); factor_tensor.mutable_data<int>({1}, place);
FillNpuTensorWithConstant<int>(&factor_tensor, static_cast<int>(1)); FillNpuTensorWithConstant<int>(&factor_tensor, static_cast<int>(1));
const auto& runner_p2 = NpuOpRunner( const auto& runner_p2 = NpuOpRunner(
......
...@@ -18,7 +18,6 @@ limitations under the Licnse. */ ...@@ -18,7 +18,6 @@ limitations under the Licnse. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
using NPUDeviceContext = platform::NPUDeviceContext; using NPUDeviceContext = platform::NPUDeviceContext;
template <typename T> template <typename T>
...@@ -36,7 +35,7 @@ struct VisitDataArgNPUMaxFunctor { ...@@ -36,7 +35,7 @@ struct VisitDataArgNPUMaxFunctor {
auto dtype = ctx.Attr<int>("dtype"); auto dtype = ctx.Attr<int>("dtype");
const bool& flatten = ctx.Attr<bool>("flatten"); const bool& flatten = ctx.Attr<bool>("flatten");
Tensor transformed_x(x.type()); phi::DenseTensor transformed_x(x.type());
transformed_x.ShareDataWith(x); transformed_x.ShareDataWith(x);
if (flatten) { if (flatten) {
transformed_x.Resize(phi::make_ddim({x.numel()})); transformed_x.Resize(phi::make_ddim({x.numel()}));
......
...@@ -17,7 +17,6 @@ limitations under the License. */ ...@@ -17,7 +17,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
class ArgMinNPUKernel : public framework::OpKernel<T> { class ArgMinNPUKernel : public framework::OpKernel<T> {
......
...@@ -18,7 +18,6 @@ limitations under the License. */ ...@@ -18,7 +18,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
using NPUDeviceContext = platform::NPUDeviceContext; using NPUDeviceContext = platform::NPUDeviceContext;
template <typename T> template <typename T>
...@@ -79,16 +78,16 @@ class ArgsortNPUKernel : public framework::OpKernel<T> { ...@@ -79,16 +78,16 @@ class ArgsortNPUKernel : public framework::OpKernel<T> {
framework::NPUAttributeMap attr = {{"axis", -1}, framework::NPUAttributeMap attr = {{"axis", -1},
{"descending", descending}}; {"descending", descending}};
Tensor indices_tmp(experimental::DataType::INT32); phi::DenseTensor indices_tmp(experimental::DataType::INT32);
indices_tmp.Resize(indices->dims()); indices_tmp.Resize(indices->dims());
if (framework::TransToProtoVarType(input->dtype()) == if (framework::TransToProtoVarType(input->dtype()) ==
framework::proto::VarType::INT64) { framework::proto::VarType::INT64) {
Tensor input_fp32(experimental::DataType::FLOAT32); phi::DenseTensor input_fp32(experimental::DataType::FLOAT32);
input_fp32.Resize(input->dims()); input_fp32.Resize(input->dims());
CastToFP32(ctx, stream, *input, &input_fp32); CastToFP32(ctx, stream, *input, &input_fp32);
Tensor output_fp32(experimental::DataType::FLOAT32); phi::DenseTensor output_fp32(experimental::DataType::FLOAT32);
output_fp32.Resize(output->dims()); output_fp32.Resize(output->dims());
if (axis == -1 || axis + 1 == in_dims.size()) { if (axis == -1 || axis + 1 == in_dims.size()) {
...@@ -112,12 +111,12 @@ class ArgsortNPUKernel : public framework::OpKernel<T> { ...@@ -112,12 +111,12 @@ class ArgsortNPUKernel : public framework::OpKernel<T> {
} }
auto trans_dims = phi::make_ddim(shape); auto trans_dims = phi::make_ddim(shape);
Tensor trans_input(input_fp32.type()); phi::DenseTensor trans_input(input_fp32.type());
trans_input.Resize(trans_dims); trans_input.Resize(trans_dims);
TranposeNPU<float>(ctx, stream, &perm, input_fp32, &trans_input); TranposeNPU<float>(ctx, stream, &perm, input_fp32, &trans_input);
Tensor trans_output(input_fp32.type()); phi::DenseTensor trans_output(input_fp32.type());
Tensor trans_indices(experimental::DataType::INT32); phi::DenseTensor trans_indices(experimental::DataType::INT32);
trans_output.mutable_data<float>(trans_dims, ctx.GetPlace()); trans_output.mutable_data<float>(trans_dims, ctx.GetPlace());
trans_indices.mutable_data<int32_t>(trans_dims, ctx.GetPlace()); trans_indices.mutable_data<int32_t>(trans_dims, ctx.GetPlace());
...@@ -150,12 +149,12 @@ class ArgsortNPUKernel : public framework::OpKernel<T> { ...@@ -150,12 +149,12 @@ class ArgsortNPUKernel : public framework::OpKernel<T> {
} }
auto trans_dims = phi::make_ddim(shape); auto trans_dims = phi::make_ddim(shape);
Tensor trans_input(input->type()); phi::DenseTensor trans_input(input->type());
trans_input.Resize(trans_dims); trans_input.Resize(trans_dims);
TranposeNPU<T>(ctx, stream, &perm, *input, &trans_input); TranposeNPU<T>(ctx, stream, &perm, *input, &trans_input);
Tensor trans_output(input->type()); phi::DenseTensor trans_output(input->type());
Tensor trans_indices(experimental::DataType::INT32); phi::DenseTensor trans_indices(experimental::DataType::INT32);
trans_output.mutable_data<T>(trans_dims, ctx.GetPlace()); trans_output.mutable_data<T>(trans_dims, ctx.GetPlace());
trans_indices.mutable_data<int32_t>(trans_dims, ctx.GetPlace()); trans_indices.mutable_data<int32_t>(trans_dims, ctx.GetPlace());
...@@ -183,12 +182,12 @@ static void FullAssignNPU(const framework::ExecutionContext& ctx, ...@@ -183,12 +182,12 @@ static void FullAssignNPU(const framework::ExecutionContext& ctx,
phi::product(phi::slice_ddim(in_dims, 0, in_dims.size() - 1)); phi::product(phi::slice_ddim(in_dims, 0, in_dims.size() - 1));
const int64_t input_width = in_dims[in_dims.size() - 1]; const int64_t input_width = in_dims[in_dims.size() - 1];
Tensor input_tmp; phi::DenseTensor input_tmp;
input_tmp.ShareDataWith(input); input_tmp.ShareDataWith(input);
input_tmp.Resize( input_tmp.Resize(
phi::make_ddim(std::vector<int64_t>{input_height * input_width})); phi::make_ddim(std::vector<int64_t>{input_height * input_width}));
Tensor indices_tmp; phi::DenseTensor indices_tmp;
indices_tmp.ShareDataWith(indices); indices_tmp.ShareDataWith(indices);
indices_tmp.Resize( indices_tmp.Resize(
phi::make_ddim(std::vector<int64_t>{input_height, input_width})); phi::make_ddim(std::vector<int64_t>{input_height, input_width}));
...@@ -197,12 +196,12 @@ static void FullAssignNPU(const framework::ExecutionContext& ctx, ...@@ -197,12 +196,12 @@ static void FullAssignNPU(const framework::ExecutionContext& ctx,
for (Type i = 0; i < input_height; i++) { for (Type i = 0; i < input_height; i++) {
indexs_value.push_back(i * input_width); indexs_value.push_back(i * input_width);
} }
Tensor indexs_tmp(indices.type()); phi::DenseTensor indexs_tmp(indices.type());
framework::TensorFromVector<int64_t>( framework::TensorFromVector<int64_t>(
indexs_value, ctx.device_context(), &indexs_tmp); indexs_value, ctx.device_context(), &indexs_tmp);
indexs_tmp.Resize(phi::make_ddim(std::vector<int64_t>{input_height, 1})); indexs_tmp.Resize(phi::make_ddim(std::vector<int64_t>{input_height, 1}));
Tensor indices_index(indices.type()); phi::DenseTensor indices_index(indices.type());
indices_index.mutable_data<int64_t>(indices_tmp.dims(), ctx.GetPlace()); indices_index.mutable_data<int64_t>(indices_tmp.dims(), ctx.GetPlace());
const auto& runner_add = const auto& runner_add =
NpuOpRunner("Add", {indices_tmp, indexs_tmp}, {indices_index}, {}); NpuOpRunner("Add", {indices_tmp, indexs_tmp}, {indices_index}, {});
...@@ -212,7 +211,7 @@ static void FullAssignNPU(const framework::ExecutionContext& ctx, ...@@ -212,7 +211,7 @@ static void FullAssignNPU(const framework::ExecutionContext& ctx,
phi::make_ddim(std::vector<int64_t>{input_height * input_width})); phi::make_ddim(std::vector<int64_t>{input_height * input_width}));
t_out->mutable_data<T>(ctx.GetPlace()); t_out->mutable_data<T>(ctx.GetPlace());
Tensor out_tmp(t_out->type()); phi::DenseTensor out_tmp(t_out->type());
out_tmp.ShareDataWith(*t_out); out_tmp.ShareDataWith(*t_out);
const auto& runner = NpuOpRunner("TensorScatterUpdate", const auto& runner = NpuOpRunner("TensorScatterUpdate",
...@@ -252,15 +251,15 @@ class ArgsortGradNPUKernel : public framework::OpKernel<T> { ...@@ -252,15 +251,15 @@ class ArgsortGradNPUKernel : public framework::OpKernel<T> {
} }
auto trans_dims = phi::make_ddim(shape); auto trans_dims = phi::make_ddim(shape);
Tensor trans_dout(dO->type()); phi::DenseTensor trans_dout(dO->type());
Tensor trans_ids(indices->type()); phi::DenseTensor trans_ids(indices->type());
trans_dout.Resize(trans_dims); trans_dout.Resize(trans_dims);
trans_ids.Resize(trans_dims); trans_ids.Resize(trans_dims);
TranposeNPU<T>(ctx, stream, &perm, *dO, &trans_dout); TranposeNPU<T>(ctx, stream, &perm, *dO, &trans_dout);
TranposeNPU<int64_t>(ctx, stream, &perm, *indices, &trans_ids); TranposeNPU<int64_t>(ctx, stream, &perm, *indices, &trans_ids);
Tensor trans_dx(dO->type()); phi::DenseTensor trans_dx(dO->type());
trans_dx.Resize(trans_dims); trans_dx.Resize(trans_dims);
FullAssignNPU<T, int64_t>( FullAssignNPU<T, int64_t>(
ctx, stream, trans_dims, trans_dout, trans_ids, &trans_dx); ctx, stream, trans_dims, trans_dout, trans_ids, &trans_dx);
......
...@@ -212,39 +212,41 @@ void AttentionLSTMOpMaker::Make() { ...@@ -212,39 +212,41 @@ void AttentionLSTMOpMaker::Make() {
"this phi::DenseTensor is a matrix with shape (T X M), where T is the " "this phi::DenseTensor is a matrix with shape (T X M), where T is the "
"total time steps in this mini-batch, M is the dim size of x."); "total time steps in this mini-batch, M is the dim size of x.");
AddInput("C0", AddInput("C0",
"(Tensor) LSTM C0" "(phi::DenseTensor) LSTM C0"
"This is a tensor with shape (N x D), where N is the batch size, D " "This is a tensor with shape (N x D), where N is the batch size, D "
"is the gate size." "is the gate size."
"C0 is necessary because of attention."); "C0 is necessary because of attention.");
AddInput("H0", AddInput("H0",
"(Tensor, optional) LSTM H0" "(phi::DenseTensor, optional) LSTM H0"
"This is a tensor with shape (N x D), where N is the " "This is a tensor with shape (N x D), where N is the "
"batch size and D is the gate size.") "batch size and D is the gate size.")
.AsDispensable(); .AsDispensable();
AddInput("AttentionWeight", AddInput("AttentionWeight",
"(Tensor) the weights of attention fc. Always relu the fc result." "(phi::DenseTensor) the weights of attention fc. Always relu the fc "
"result."
"The shape is ((M+D) x 1), where M is the dim size of x, D is the " "The shape is ((M+D) x 1), where M is the dim size of x, D is the "
"gate size of LSTM."); "gate size of LSTM.");
AddInput("AttentionBias", AddInput("AttentionBias",
"(Tensor, optional) the bias of attention fc." "(phi::DenseTensor, optional) the bias of attention fc."
"The shape is (1 x 1)") "The shape is (1 x 1)")
.AsDispensable(); .AsDispensable();
AddInput("AttentionScalar", AddInput("AttentionScalar",
"(Tensor, optional) the scalar on the result of attentioned fc. " "(phi::DenseTensor, optional) the scalar on the result of "
"attentioned fc. "
"Always relu the Scalar." "Always relu the Scalar."
"The shape is (1 x 1)") "The shape is (1 x 1)")
.AsDispensable(); .AsDispensable();
AddInput("AttentionScalarBias", AddInput("AttentionScalarBias",
"(Tensor, optional) the scalar bias of attention fc." "(phi::DenseTensor, optional) the scalar bias of attention fc."
"The shape is (1 x 1)") "The shape is (1 x 1)")
.AsDispensable(); .AsDispensable();
AddInput("LSTMWeight", AddInput("LSTMWeight",
"(Tensor) the combined weight of LSTM" "(phi::DenseTensor) the combined weight of LSTM"
" - The shape is ((D+M) x 4D), where D is the hidden gate size, M " " - The shape is ((D+M) x 4D), where D is the hidden gate size, M "
"is the dim size of x" "is the dim size of x"
" - Weight = {W_forget, W_input, W_output, W_cell}"); " - Weight = {W_forget, W_input, W_output, W_cell}");
AddInput("LSTMBias", AddInput("LSTMBias",
"(Tensor) the combined bias of LSTM, shape (1x4D)." "(phi::DenseTensor) the combined bias of LSTM, shape (1x4D)."
"Note: we should add the bias of hidden and context accorindg to " "Note: we should add the bias of hidden and context accorindg to "
"the same gate: " "the same gate: "
"{B_forget, B_input, B_output, B_cell}"); "{B_forget, B_input, B_output, B_cell}");
...@@ -257,21 +259,22 @@ void AttentionLSTMOpMaker::Make() { ...@@ -257,21 +259,22 @@ void AttentionLSTMOpMaker::Make() {
"(phi::DenseTensor) (same as LSTMOp) the cell state of LSTM operator. " "(phi::DenseTensor) (same as LSTMOp) the cell state of LSTM operator. "
"The shape is (T x D), and lod is the same with the `Input`."); "The shape is (T x D), and lod is the same with the `Input`.");
AddOutput("AttentionedX", AddOutput("AttentionedX",
"(Tensor) shape is (T x 1), the result after X * AttentionWeight," "(phi::DenseTensor) shape is (T x 1), the result after X * "
"AttentionWeight,"
" where T is the total time steps in this mini-batch," " where T is the total time steps in this mini-batch,"
" D is the hidden size.") " D is the hidden size.")
.AsIntermediate(); .AsIntermediate();
AddOutput("AttentionFCOut", AddOutput("AttentionFCOut",
"(Tensor) (max_seq_len, 1), compute at each step.") "(phi::DenseTensor) (max_seq_len, 1), compute at each step.")
.AsIntermediate(); .AsIntermediate();
AddOutput("LSTMX", AddOutput("LSTMX",
"(Tensor) the input X of LSTM for each step." "(phi::DenseTensor) the input X of LSTM for each step."
"Shape is (1 x M), where M is the x frame size") "Shape is (1 x M), where M is the x frame size")
.AsIntermediate(); .AsIntermediate();
AddOutput( AddOutput("LSTMOUT",
"LSTMOUT", "(phi::DenseTensor) the output of LSTM X(1*(D+M))* "
"(Tensor) the output of LSTM X(1*(D+M))* weight((D+M)*4D) for each step." "weight((D+M)*4D) for each step."
"Shape is (1 x 4D), where M is the x frame size") "Shape is (1 x 4D), where M is the x frame size")
.AsIntermediate(); .AsIntermediate();
AddAttr<std::string>("gate_activation", AddAttr<std::string>("gate_activation",
"(string, default: sigmoid)" "(string, default: sigmoid)"
......
...@@ -18,8 +18,6 @@ limitations under the License. */ ...@@ -18,8 +18,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
class AttentionLSTMOp : public framework::OperatorWithKernel { class AttentionLSTMOp : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
......
...@@ -207,7 +207,7 @@ framework::OpKernelType BatchNormOp::GetExpectedKernelType( ...@@ -207,7 +207,7 @@ framework::OpKernelType BatchNormOp::GetExpectedKernelType(
framework::OpKernelType BatchNormOp::GetKernelTypeForVar( framework::OpKernelType BatchNormOp::GetKernelTypeForVar(
const std::string &var_name, const std::string &var_name,
const Tensor &tensor, const phi::DenseTensor &tensor,
const framework::OpKernelType &expected_kernel_type) const { const framework::OpKernelType &expected_kernel_type) const {
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
// Only input require reshaping, weights and // Only input require reshaping, weights and
...@@ -265,7 +265,7 @@ void BatchNormOpMaker::Make() { ...@@ -265,7 +265,7 @@ void BatchNormOpMaker::Make() {
"The global variance (for training) " "The global variance (for training) "
"or estimated Variance (for testing)"); "or estimated Variance (for testing)");
AddInput("MomentumTensor", AddInput("MomentumTensor",
"(Tensor<float32>, optional) If provided, batch_norm will " "(phi::DenseTensor<float32>, optional) If provided, batch_norm will "
"use this as momentum, this has a higher priority than " "use this as momentum, this has a higher priority than "
"attr(momentum), the shape of this tensor MUST BE [1].") "attr(momentum), the shape of this tensor MUST BE [1].")
.AsDispensable(); .AsDispensable();
...@@ -380,9 +380,9 @@ framework::OpKernelType BatchNormGradOp::GetExpectedKernelType( ...@@ -380,9 +380,9 @@ framework::OpKernelType BatchNormGradOp::GetExpectedKernelType(
PADDLE_THROW( PADDLE_THROW(
platform::errors::InvalidArgument("can't find gradient variable of Y")); platform::errors::InvalidArgument("can't find gradient variable of Y"));
} }
const Tensor *t = nullptr; const phi::DenseTensor *t = nullptr;
if (var->IsType<Tensor>()) { if (var->IsType<phi::DenseTensor>()) {
t = &var->Get<Tensor>(); t = &var->Get<phi::DenseTensor>();
} else if (var->IsType<phi::DenseTensor>()) { } else if (var->IsType<phi::DenseTensor>()) {
t = &var->Get<phi::DenseTensor>(); t = &var->Get<phi::DenseTensor>();
} }
...@@ -397,7 +397,7 @@ framework::OpKernelType BatchNormGradOp::GetExpectedKernelType( ...@@ -397,7 +397,7 @@ framework::OpKernelType BatchNormGradOp::GetExpectedKernelType(
framework::OpKernelType BatchNormGradOp::GetKernelTypeForVar( framework::OpKernelType BatchNormGradOp::GetKernelTypeForVar(
const std::string &var_name, const std::string &var_name,
const Tensor &tensor, const phi::DenseTensor &tensor,
const framework::OpKernelType &expected_kernel_type) const { const framework::OpKernelType &expected_kernel_type) const {
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
// Only input require reshaping, weights and // Only input require reshaping, weights and
...@@ -522,9 +522,9 @@ framework::OpKernelType BatchNormDoubleGradOp::GetExpectedKernelType( ...@@ -522,9 +522,9 @@ framework::OpKernelType BatchNormDoubleGradOp::GetExpectedKernelType(
PADDLE_THROW( PADDLE_THROW(
platform::errors::NotFound("cannot find gradient variable of Y")); platform::errors::NotFound("cannot find gradient variable of Y"));
} }
const Tensor *t = nullptr; const phi::DenseTensor *t = nullptr;
if (var->IsType<Tensor>()) { if (var->IsType<phi::DenseTensor>()) {
t = &var->Get<Tensor>(); t = &var->Get<phi::DenseTensor>();
} else if (var->IsType<phi::DenseTensor>()) { } else if (var->IsType<phi::DenseTensor>()) {
t = &var->Get<phi::DenseTensor>(); t = &var->Get<phi::DenseTensor>();
} }
......
...@@ -34,7 +34,6 @@ DECLARE_bool(cudnn_batchnorm_spatial_persistent); ...@@ -34,7 +34,6 @@ DECLARE_bool(cudnn_batchnorm_spatial_persistent);
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
using DataLayout = phi::DataLayout; using DataLayout = phi::DataLayout;
template <typename T> template <typename T>
using CudnnDataType = platform::CudnnDataType<T>; using CudnnDataType = platform::CudnnDataType<T>;
......
...@@ -27,7 +27,6 @@ limitations under the License. */ ...@@ -27,7 +27,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
using DataLayout = phi::DataLayout; using DataLayout = phi::DataLayout;
template <typename T> template <typename T>
......
...@@ -78,8 +78,8 @@ class MLUBatchNormOpKernel : public framework::OpKernel<T> { ...@@ -78,8 +78,8 @@ class MLUBatchNormOpKernel : public framework::OpKernel<T> {
saved_mean->mutable_data<MPDType>(place); saved_mean->mutable_data<MPDType>(place);
saved_variance->mutable_data<MPDType>(place); saved_variance->mutable_data<MPDType>(place);
Tensor transformed_x; phi::DenseTensor transformed_x;
Tensor transformed_y; phi::DenseTensor transformed_y;
const int transformed_dim_size = 4; const int transformed_dim_size = 4;
const int transformed_shape[transformed_dim_size] = {N, sample_size, 1, C}; const int transformed_shape[transformed_dim_size] = {N, sample_size, 1, C};
MLUCnnlTensorDesc transformed_desc(transformed_dim_size, MLUCnnlTensorDesc transformed_desc(transformed_dim_size,
...@@ -116,7 +116,7 @@ class MLUBatchNormOpKernel : public framework::OpKernel<T> { ...@@ -116,7 +116,7 @@ class MLUBatchNormOpKernel : public framework::OpKernel<T> {
if (ctx.HasInput("MomentumTensor")) { if (ctx.HasInput("MomentumTensor")) {
const auto *mom_tensor = ctx.Input<phi::DenseTensor>("MomentumTensor"); const auto *mom_tensor = ctx.Input<phi::DenseTensor>("MomentumTensor");
Tensor mom_cpu; phi::DenseTensor mom_cpu;
framework::TensorCopySync(*mom_tensor, platform::CPUPlace(), &mom_cpu); framework::TensorCopySync(*mom_tensor, platform::CPUPlace(), &mom_cpu);
momentum = mom_cpu.data<float>()[0]; momentum = mom_cpu.data<float>()[0];
} }
...@@ -226,9 +226,9 @@ class MLUBatchNormGradOpKernel : public framework::OpKernel<T> { ...@@ -226,9 +226,9 @@ class MLUBatchNormGradOpKernel : public framework::OpKernel<T> {
: x_dims[x_dims.size() - 1]); : x_dims[x_dims.size() - 1]);
const int sample_size = x->numel() / N / C; const int sample_size = x->numel() / N / C;
Tensor transformed_d_y; phi::DenseTensor transformed_d_y;
Tensor transformed_x; phi::DenseTensor transformed_x;
Tensor transformed_d_x; phi::DenseTensor transformed_d_x;
const int transformed_dim_size = 4; const int transformed_dim_size = 4;
const int transformed_shape[transformed_dim_size] = {N, sample_size, 1, C}; const int transformed_shape[transformed_dim_size] = {N, sample_size, 1, C};
......
...@@ -89,7 +89,7 @@ class NPUBatchNormOpKernel : public framework::OpKernel<T> { ...@@ -89,7 +89,7 @@ class NPUBatchNormOpKernel : public framework::OpKernel<T> {
// is only used in this training branch // is only used in this training branch
if (ctx.HasInput("MomentumTensor")) { if (ctx.HasInput("MomentumTensor")) {
const auto *mom_tensor = ctx.Input<phi::DenseTensor>("MomentumTensor"); const auto *mom_tensor = ctx.Input<phi::DenseTensor>("MomentumTensor");
Tensor mom_cpu; phi::DenseTensor mom_cpu;
paddle::framework::TensorCopySync( paddle::framework::TensorCopySync(
*mom_tensor, platform::CPUPlace(), &mom_cpu); *mom_tensor, platform::CPUPlace(), &mom_cpu);
momentum = mom_cpu.data<float>()[0]; momentum = mom_cpu.data<float>()[0];
......
...@@ -18,8 +18,6 @@ limitations under the License. */ ...@@ -18,8 +18,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
template <typename T> template <typename T>
class BCELossMLUKernel : public framework::OpKernel<T> { class BCELossMLUKernel : public framework::OpKernel<T> {
public: public:
......
...@@ -18,8 +18,6 @@ limitations under the License. */ ...@@ -18,8 +18,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
class BCELossNPUKernel : public framework::OpKernel<T> { class BCELossNPUKernel : public framework::OpKernel<T> {
public: public:
......
...@@ -40,7 +40,7 @@ class CastOpProtoMaker : public framework::OpProtoAndCheckerMaker { ...@@ -40,7 +40,7 @@ class CastOpProtoMaker : public framework::OpProtoAndCheckerMaker {
Cast Operator. Cast Operator.
This Operator casts the input tensor to another data type and This Operator casts the input tensor to another data type and
returns the Output Tensor. It's meaningless if the output dtype equals returns the Output phi::DenseTensor. It's meaningless if the output dtype equals
the input dtype, but it's fine if you do so. the input dtype, but it's fine if you do so.
)DOC"); )DOC");
......
...@@ -19,8 +19,6 @@ limitations under the License. */ ...@@ -19,8 +19,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
template <typename T> template <typename T>
class CastMLUKernel : public framework::OpKernel<T> { class CastMLUKernel : public framework::OpKernel<T> {
public: public:
......
...@@ -32,8 +32,6 @@ static std::map<framework::proto::VarType::Type, aclDataType> ...@@ -32,8 +32,6 @@ static std::map<framework::proto::VarType::Type, aclDataType>
{framework::proto::VarType::FP64, ACL_DOUBLE}, {framework::proto::VarType::FP64, ACL_DOUBLE},
}; };
using Tensor = phi::DenseTensor;
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
class CastNPUKernel : public framework::OpKernel<T> { class CastNPUKernel : public framework::OpKernel<T> {
public: public:
......
...@@ -26,7 +26,6 @@ limitations under the License. */ ...@@ -26,7 +26,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
template <typename T, template <typename T,
int MajorType = Eigen::RowMajor, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex> typename IndexType = Eigen::DenseIndex>
...@@ -81,7 +80,7 @@ class CenterLossKernel : public framework::OpKernel<T> { ...@@ -81,7 +80,7 @@ class CenterLossKernel : public framework::OpKernel<T> {
auto loss_data = out_loss->mutable_data<T>(ctx.GetPlace()); auto loss_data = out_loss->mutable_data<T>(ctx.GetPlace());
Tensor centers_diffacc; // used to accumulate all diff phi::DenseTensor centers_diffacc; // used to accumulate all diff
auto centers_diffacc_data = auto centers_diffacc_data =
centers_diffacc.mutable_data<T>(centers_dim, ctx.GetPlace()); centers_diffacc.mutable_data<T>(centers_dim, ctx.GetPlace());
int numel = centers_diffacc.numel(); int numel = centers_diffacc.numel();
......
...@@ -23,7 +23,6 @@ limitations under the License. */ ...@@ -23,7 +23,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
// using SelectedRows = phi::SelectedRows; // using SelectedRows = phi::SelectedRows;
template <typename T, template <typename T,
int MajorType = Eigen::RowMajor, int MajorType = Eigen::RowMajor,
......
...@@ -18,8 +18,6 @@ limitations under the License. */ ...@@ -18,8 +18,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
class NPUClipByNormKernel : public framework::OpKernel<T> { class NPUClipByNormKernel : public framework::OpKernel<T> {
public: public:
...@@ -48,7 +46,7 @@ class NPUClipByNormKernel : public framework::OpKernel<T> { ...@@ -48,7 +46,7 @@ class NPUClipByNormKernel : public framework::OpKernel<T> {
"Input(X) of ClipByNormOp should not be null. " "Input(X) of ClipByNormOp should not be null. "
"Please check if it is created correctly.")); "Please check if it is created correctly."));
Tensor square_sum(input->type()); phi::DenseTensor square_sum(input->type());
square_sum.mutable_data<T>(framework::DDim({1}), place); square_sum.mutable_data<T>(framework::DDim({1}), place);
const auto& x_dims = input->dims(); const auto& x_dims = input->dims();
std::vector<int> axis; std::vector<int> axis;
...@@ -62,12 +60,12 @@ class NPUClipByNormKernel : public framework::OpKernel<T> { ...@@ -62,12 +60,12 @@ class NPUClipByNormKernel : public framework::OpKernel<T> {
{{"axis", axis}, {"keep_dims", false}}); {{"axis", axis}, {"keep_dims", false}});
square_sum_runner.Run(stream); square_sum_runner.Run(stream);
Tensor x_norm(input->type()); phi::DenseTensor x_norm(input->type());
x_norm.mutable_data<T>(framework::DDim({1}), place); x_norm.mutable_data<T>(framework::DDim({1}), place);
const auto& x_norm_runner = NpuOpRunner("Sqrt", {square_sum}, {x_norm}, {}); const auto& x_norm_runner = NpuOpRunner("Sqrt", {square_sum}, {x_norm}, {});
x_norm_runner.Run(stream); x_norm_runner.Run(stream);
Tensor x_norm_t; phi::DenseTensor x_norm_t;
framework::TensorCopySync(x_norm, platform::CPUPlace(), &x_norm_t); framework::TensorCopySync(x_norm, platform::CPUPlace(), &x_norm_t);
auto x_norm_v = static_cast<float>(*x_norm_t.data<T>()); auto x_norm_v = static_cast<float>(*x_norm_t.data<T>());
if (x_norm_v <= max_norm) { if (x_norm_v <= max_norm) {
......
...@@ -29,7 +29,7 @@ class ClipMLUKernel : public framework::OpKernel<T> { ...@@ -29,7 +29,7 @@ class ClipMLUKernel : public framework::OpKernel<T> {
auto max = static_cast<T>(ctx.Attr<float>("max")); auto max = static_cast<T>(ctx.Attr<float>("max"));
if (ctx.HasInput("Min")) { if (ctx.HasInput("Min")) {
Tensor min_cpu; phi::DenseTensor min_cpu;
auto* min_tensor = ctx.Input<phi::DenseTensor>("Min"); auto* min_tensor = ctx.Input<phi::DenseTensor>("Min");
auto* min_data = min_tensor->data<T>(); auto* min_data = min_tensor->data<T>();
if (platform::is_mlu_place(min_tensor->place())) { if (platform::is_mlu_place(min_tensor->place())) {
...@@ -41,7 +41,7 @@ class ClipMLUKernel : public framework::OpKernel<T> { ...@@ -41,7 +41,7 @@ class ClipMLUKernel : public framework::OpKernel<T> {
} }
if (ctx.HasInput("Max")) { if (ctx.HasInput("Max")) {
Tensor max_cpu; phi::DenseTensor max_cpu;
auto* max_tensor = ctx.Input<phi::DenseTensor>("Max"); auto* max_tensor = ctx.Input<phi::DenseTensor>("Max");
auto* max_data = max_tensor->data<T>(); auto* max_data = max_tensor->data<T>();
if (platform::is_mlu_place(max_tensor->place())) { if (platform::is_mlu_place(max_tensor->place())) {
...@@ -80,7 +80,7 @@ class ClipGradMLUKernel : public framework::OpKernel<T> { ...@@ -80,7 +80,7 @@ class ClipGradMLUKernel : public framework::OpKernel<T> {
auto min_val = ctx.Attr<float>("min"); auto min_val = ctx.Attr<float>("min");
if (min_tensor) { if (min_tensor) {
Tensor min_data; phi::DenseTensor min_data;
framework::TensorCopy( framework::TensorCopy(
*min_tensor, *min_tensor,
platform::CPUPlace(), platform::CPUPlace(),
...@@ -91,7 +91,7 @@ class ClipGradMLUKernel : public framework::OpKernel<T> { ...@@ -91,7 +91,7 @@ class ClipGradMLUKernel : public framework::OpKernel<T> {
} }
auto max_val = ctx.Attr<float>("max"); auto max_val = ctx.Attr<float>("max");
if (max_tensor) { if (max_tensor) {
Tensor max_data; phi::DenseTensor max_data;
framework::TensorCopy( framework::TensorCopy(
*max_tensor, *max_tensor,
platform::CPUPlace(), platform::CPUPlace(),
......
...@@ -18,8 +18,6 @@ ...@@ -18,8 +18,6 @@
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
class ClipNPUKernel : public framework::OpKernel<T> { class ClipNPUKernel : public framework::OpKernel<T> {
public: public:
...@@ -33,8 +31,8 @@ class ClipNPUKernel : public framework::OpKernel<T> { ...@@ -33,8 +31,8 @@ class ClipNPUKernel : public framework::OpKernel<T> {
auto max_tensor = auto max_tensor =
ctx.HasInput("Max") ? ctx.Input<phi::DenseTensor>("Max") : nullptr; ctx.HasInput("Max") ? ctx.Input<phi::DenseTensor>("Max") : nullptr;
Tensor min_tensor_temp(x->type()); phi::DenseTensor min_tensor_temp(x->type());
Tensor max_tensor_temp(x->type()); phi::DenseTensor max_tensor_temp(x->type());
if (min_tensor == nullptr) { if (min_tensor == nullptr) {
auto min_value = static_cast<T>(ctx.Attr<float>("min")); auto min_value = static_cast<T>(ctx.Attr<float>("min"));
min_tensor_temp.mutable_data<T>({1}, ctx.GetPlace()); min_tensor_temp.mutable_data<T>({1}, ctx.GetPlace());
...@@ -74,7 +72,7 @@ class ClipGradNPUKernel : public framework::OpKernel<T> { ...@@ -74,7 +72,7 @@ class ClipGradNPUKernel : public framework::OpKernel<T> {
auto min_val = ctx.Attr<float>("min"); auto min_val = ctx.Attr<float>("min");
if (min_tensor) { if (min_tensor) {
Tensor min_data; phi::DenseTensor min_data;
framework::TensorCopy( framework::TensorCopy(
*min_tensor, *min_tensor,
platform::CPUPlace(), platform::CPUPlace(),
...@@ -86,7 +84,7 @@ class ClipGradNPUKernel : public framework::OpKernel<T> { ...@@ -86,7 +84,7 @@ class ClipGradNPUKernel : public framework::OpKernel<T> {
auto max_val = ctx.Attr<float>("max"); auto max_val = ctx.Attr<float>("max");
if (max_tensor) { if (max_tensor) {
Tensor max_data; phi::DenseTensor max_data;
framework::TensorCopy( framework::TensorCopy(
*max_tensor, *max_tensor,
platform::CPUPlace(), platform::CPUPlace(),
......
...@@ -61,7 +61,7 @@ struct FillConstantVisitor { ...@@ -61,7 +61,7 @@ struct FillConstantVisitor {
* = nullptr) const { * = nullptr) const {
#ifdef PADDLE_WITH_ASCEND_CL #ifdef PADDLE_WITH_ASCEND_CL
if (platform::is_npu_place(dev_ctx_.GetPlace())) { if (platform::is_npu_place(dev_ctx_.GetPlace())) {
Tensor tensor_tmp(framework::TransToPhiDataType(dtype_)); phi::DenseTensor tensor_tmp(framework::TransToPhiDataType(dtype_));
tensor_tmp.mutable_data<T>({1}, context_.GetPlace()); tensor_tmp.mutable_data<T>({1}, context_.GetPlace());
FillNpuTensorWithConstant<T>(&tensor_tmp, static_cast<T>(value_)); FillNpuTensorWithConstant<T>(&tensor_tmp, static_cast<T>(value_));
......
...@@ -151,10 +151,9 @@ class CAllReduceOpCPUKernel : public framework::OpKernel<T> { ...@@ -151,10 +151,9 @@ class CAllReduceOpCPUKernel : public framework::OpKernel<T> {
inline bool ContainsNan(const paddle::platform::NPUDeviceContext& dev_ctx, inline bool ContainsNan(const paddle::platform::NPUDeviceContext& dev_ctx,
aclrtStream stream, aclrtStream stream,
const phi::DenseTensor* in) { const phi::DenseTensor* in) {
using Tensor = phi::DenseTensor; phi::DenseTensor out(in->type());
Tensor out(in->type());
Tensor mean(in->type()); phi::DenseTensor mean(in->type());
mean.Resize({1}); mean.Resize({1});
mean.mutable_data<float>(dev_ctx.GetPlace()); mean.mutable_data<float>(dev_ctx.GetPlace());
std::vector<int> axes; std::vector<int> axes;
......
...@@ -24,8 +24,6 @@ limitations under the License. */ ...@@ -24,8 +24,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
static constexpr int kNumCUDAThreads = 512; static constexpr int kNumCUDAThreads = 512;
static constexpr int kNumMaxinumNumBlocks = 4096; static constexpr int kNumMaxinumNumBlocks = 4096;
...@@ -126,7 +124,7 @@ struct CSoftmaxWithCrossEntropyFunctor<phi::GPUContext, T> { ...@@ -126,7 +124,7 @@ struct CSoftmaxWithCrossEntropyFunctor<phi::GPUContext, T> {
const int N = phi::funcs::SizeToAxis(axis, logits_dims); const int N = phi::funcs::SizeToAxis(axis, logits_dims);
const int D = phi::funcs::SizeFromAxis(axis, logits_dims); const int D = phi::funcs::SizeFromAxis(axis, logits_dims);
Tensor logits_2d, softmax_2d, loss_2d; phi::DenseTensor logits_2d, softmax_2d, loss_2d;
logits_2d.ShareDataWith(*logits).Resize({N, D}); logits_2d.ShareDataWith(*logits).Resize({N, D});
softmax_2d.ShareDataWith(*softmax).Resize({N, D}); softmax_2d.ShareDataWith(*softmax).Resize({N, D});
loss_2d.ShareDataWith(*loss).Resize({N, 1}); loss_2d.ShareDataWith(*loss).Resize({N, 1});
...@@ -135,7 +133,7 @@ struct CSoftmaxWithCrossEntropyFunctor<phi::GPUContext, T> { ...@@ -135,7 +133,7 @@ struct CSoftmaxWithCrossEntropyFunctor<phi::GPUContext, T> {
auto eigen_softmax = math::EigenMatrix<T>::From(softmax_2d); auto eigen_softmax = math::EigenMatrix<T>::From(softmax_2d);
// step 1, obtain logit_max // step 1, obtain logit_max
Tensor logits_max; phi::DenseTensor logits_max;
logits_max = ctx.AllocateTmpTensor<T, phi::GPUContext>({N, 1}, dev_ctx); logits_max = ctx.AllocateTmpTensor<T, phi::GPUContext>({N, 1}, dev_ctx);
void* logits_max_buff = logits_max.mutable_data<T>(place); void* logits_max_buff = logits_max.mutable_data<T>(place);
...@@ -163,7 +161,7 @@ struct CSoftmaxWithCrossEntropyFunctor<phi::GPUContext, T> { ...@@ -163,7 +161,7 @@ struct CSoftmaxWithCrossEntropyFunctor<phi::GPUContext, T> {
.unaryExpr(math::ValueClip<T>()); .unaryExpr(math::ValueClip<T>());
// step 3, obtain predict target // step 3, obtain predict target
Tensor predicted_logits; phi::DenseTensor predicted_logits;
predicted_logits = predicted_logits =
ctx.AllocateTmpTensor<T, phi::GPUContext>({N, 1}, dev_ctx); ctx.AllocateTmpTensor<T, phi::GPUContext>({N, 1}, dev_ctx);
predicted_logits.mutable_data<T>(place); predicted_logits.mutable_data<T>(place);
...@@ -215,7 +213,7 @@ struct CSoftmaxWithCrossEntropyFunctor<phi::GPUContext, T> { ...@@ -215,7 +213,7 @@ struct CSoftmaxWithCrossEntropyFunctor<phi::GPUContext, T> {
eigen_softmax.device(*dev_ctx.eigen_device()) = eigen_softmax.exp(); eigen_softmax.device(*dev_ctx.eigen_device()) = eigen_softmax.exp();
// step 5, obtain sum_exp_logits // step 5, obtain sum_exp_logits
Tensor sum_exp_logits; phi::DenseTensor sum_exp_logits;
sum_exp_logits = ctx.AllocateTmpTensor<T, phi::GPUContext>({N, 1}, dev_ctx); sum_exp_logits = ctx.AllocateTmpTensor<T, phi::GPUContext>({N, 1}, dev_ctx);
void* sum_exp_logits_buff = sum_exp_logits.mutable_data<T>(place); void* sum_exp_logits_buff = sum_exp_logits.mutable_data<T>(place);
...@@ -278,7 +276,7 @@ struct CSoftmaxWithCrossEntropyProcessGroupFunctor<phi::GPUContext, T> { ...@@ -278,7 +276,7 @@ struct CSoftmaxWithCrossEntropyProcessGroupFunctor<phi::GPUContext, T> {
const int N = phi::funcs::SizeToAxis(axis, logits_dims); const int N = phi::funcs::SizeToAxis(axis, logits_dims);
const int D = phi::funcs::SizeFromAxis(axis, logits_dims); const int D = phi::funcs::SizeFromAxis(axis, logits_dims);
Tensor logits_2d, softmax_2d, loss_2d; phi::DenseTensor logits_2d, softmax_2d, loss_2d;
logits_2d.ShareDataWith(*logits).Resize({N, D}); logits_2d.ShareDataWith(*logits).Resize({N, D});
softmax_2d.ShareDataWith(*softmax).Resize({N, D}); softmax_2d.ShareDataWith(*softmax).Resize({N, D});
loss_2d.ShareDataWith(*loss).Resize({N, 1}); loss_2d.ShareDataWith(*loss).Resize({N, 1});
...@@ -287,7 +285,7 @@ struct CSoftmaxWithCrossEntropyProcessGroupFunctor<phi::GPUContext, T> { ...@@ -287,7 +285,7 @@ struct CSoftmaxWithCrossEntropyProcessGroupFunctor<phi::GPUContext, T> {
auto eigen_softmax = math::EigenMatrix<T>::From(softmax_2d); auto eigen_softmax = math::EigenMatrix<T>::From(softmax_2d);
// step 1, obtain logit_max // step 1, obtain logit_max
Tensor logits_max; phi::DenseTensor logits_max;
logits_max = ctx.AllocateTmpTensor<T, phi::GPUContext>({N, 1}, dev_ctx); logits_max = ctx.AllocateTmpTensor<T, phi::GPUContext>({N, 1}, dev_ctx);
auto eigen_logits_max = math::EigenMatrix<T>::From(logits_max); auto eigen_logits_max = math::EigenMatrix<T>::From(logits_max);
...@@ -309,7 +307,7 @@ struct CSoftmaxWithCrossEntropyProcessGroupFunctor<phi::GPUContext, T> { ...@@ -309,7 +307,7 @@ struct CSoftmaxWithCrossEntropyProcessGroupFunctor<phi::GPUContext, T> {
.unaryExpr(math::ValueClip<T>()); .unaryExpr(math::ValueClip<T>());
// step 3, obtain predict target // step 3, obtain predict target
Tensor predicted_logits; phi::DenseTensor predicted_logits;
predicted_logits = predicted_logits =
ctx.AllocateTmpTensor<T, phi::GPUContext>({N, 1}, dev_ctx); ctx.AllocateTmpTensor<T, phi::GPUContext>({N, 1}, dev_ctx);
predicted_logits.mutable_data<T>(place); predicted_logits.mutable_data<T>(place);
...@@ -355,7 +353,7 @@ struct CSoftmaxWithCrossEntropyProcessGroupFunctor<phi::GPUContext, T> { ...@@ -355,7 +353,7 @@ struct CSoftmaxWithCrossEntropyProcessGroupFunctor<phi::GPUContext, T> {
eigen_softmax.device(*dev_ctx.eigen_device()) = eigen_softmax.exp(); eigen_softmax.device(*dev_ctx.eigen_device()) = eigen_softmax.exp();
// step 5, obtain sum_exp_logits // step 5, obtain sum_exp_logits
Tensor sum_exp_logits; phi::DenseTensor sum_exp_logits;
sum_exp_logits = ctx.AllocateTmpTensor<T, phi::GPUContext>({N, 1}, dev_ctx); sum_exp_logits = ctx.AllocateTmpTensor<T, phi::GPUContext>({N, 1}, dev_ctx);
void* sum_exp_logits_buff = sum_exp_logits.mutable_data<T>(place); void* sum_exp_logits_buff = sum_exp_logits.mutable_data<T>(place);
...@@ -405,7 +403,7 @@ class CSoftmaxWithCrossEntropyGradCUDAKernel : public framework::OpKernel<T> { ...@@ -405,7 +403,7 @@ class CSoftmaxWithCrossEntropyGradCUDAKernel : public framework::OpKernel<T> {
const int N = phi::funcs::SizeToAxis(axis, sofrmax_dims); const int N = phi::funcs::SizeToAxis(axis, sofrmax_dims);
const int D = phi::funcs::SizeFromAxis(axis, sofrmax_dims); const int D = phi::funcs::SizeFromAxis(axis, sofrmax_dims);
Tensor logit_grad_2d; phi::DenseTensor logit_grad_2d;
logit_grad_2d.ShareDataWith(*logit_grad).Resize({N, D}); logit_grad_2d.ShareDataWith(*logit_grad).Resize({N, D});
int blocks = NumBlocks(N * D); int blocks = NumBlocks(N * D);
......
...@@ -26,7 +26,6 @@ limitations under the License. */ ...@@ -26,7 +26,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
class ConcatOp : public framework::OperatorWithKernel { class ConcatOp : public framework::OperatorWithKernel {
public: public:
......
...@@ -119,7 +119,7 @@ class ConcatGradMLUKernel : public framework::OpKernel<T> { ...@@ -119,7 +119,7 @@ class ConcatGradMLUKernel : public framework::OpKernel<T> {
out_grad->dims().size())); out_grad->dims().size()));
// get output tensor that the name is not kEmptyVarName // get output tensor that the name is not kEmptyVarName
std::vector<void*> outputs_vec; std::vector<void*> outputs_vec;
std::vector<Tensor> tmp_outputs_vec; std::vector<phi::DenseTensor> tmp_outputs_vec;
std::vector<MLUCnnlTensorDesc> output_descs; std::vector<MLUCnnlTensorDesc> output_descs;
std::vector<cnnlTensorDescriptor_t> descs_vec; std::vector<cnnlTensorDescriptor_t> descs_vec;
for (size_t j = 0; j < outs.size(); ++j) { for (size_t j = 0; j < outs.size(); ++j) {
...@@ -129,7 +129,7 @@ class ConcatGradMLUKernel : public framework::OpKernel<T> { ...@@ -129,7 +129,7 @@ class ConcatGradMLUKernel : public framework::OpKernel<T> {
output_descs.emplace_back(MLUCnnlTensorDesc(*outs[j])); output_descs.emplace_back(MLUCnnlTensorDesc(*outs[j]));
outputs_vec.push_back(GetBasePtr(outs[j])); outputs_vec.push_back(GetBasePtr(outs[j]));
} else { } else {
Tensor tmp_tensor; phi::DenseTensor tmp_tensor;
tmp_tensor.mutable_data<T>(ins[j]->dims(), ctx.GetPlace()); tmp_tensor.mutable_data<T>(ins[j]->dims(), ctx.GetPlace());
tmp_outputs_vec.push_back(tmp_tensor); tmp_outputs_vec.push_back(tmp_tensor);
output_descs.emplace_back(MLUCnnlTensorDesc(*ins[j])); output_descs.emplace_back(MLUCnnlTensorDesc(*ins[j]));
......
...@@ -18,8 +18,6 @@ limitations under the License. */ ...@@ -18,8 +18,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
template <typename T, cnnlLogicOp_t log_method> template <typename T, cnnlLogicOp_t log_method>
class LogicalMLUKernel : public framework::OpKernel<T> { class LogicalMLUKernel : public framework::OpKernel<T> {
public: public:
......
...@@ -15,8 +15,6 @@ limitations under the License. */ ...@@ -15,8 +15,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
class LogicalNotNPUKernel : public framework::OpKernel<T> { class LogicalNotNPUKernel : public framework::OpKernel<T> {
public: public:
......
...@@ -29,8 +29,6 @@ limitations under the License. */ ...@@ -29,8 +29,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
// Base convolution operator definations for other conv // Base convolution operator definations for other conv
// like operators to reuse the implementation. // like operators to reuse the implementation.
inline int ConvOutputSize( inline int ConvOutputSize(
......
...@@ -18,7 +18,6 @@ ...@@ -18,7 +18,6 @@
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
using DataLayout = phi::DataLayout; using DataLayout = phi::DataLayout;
template <typename T> template <typename T>
...@@ -56,8 +55,8 @@ class MLUConvOpKernel : public framework::OpKernel<T> { ...@@ -56,8 +55,8 @@ class MLUConvOpKernel : public framework::OpKernel<T> {
UpdatePaddingAndDilation( UpdatePaddingAndDilation(
&paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize); &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize);
Tensor input_tensor(input->type()); phi::DenseTensor input_tensor(input->type());
Tensor output_tensor(output->type()); phi::DenseTensor output_tensor(output->type());
const std::vector<int> perm_to_nhwc = {0, 2, 3, 1}; const std::vector<int> perm_to_nhwc = {0, 2, 3, 1};
if (channel_last) { if (channel_last) {
input_tensor.ShareDataWith(*input); input_tensor.ShareDataWith(*input);
...@@ -78,7 +77,7 @@ class MLUConvOpKernel : public framework::OpKernel<T> { ...@@ -78,7 +77,7 @@ class MLUConvOpKernel : public framework::OpKernel<T> {
output_tensor.set_layout(DataLayout::kNHWC); output_tensor.set_layout(DataLayout::kNHWC);
// transpose filter from MCHW to MHWC // transpose filter from MCHW to MHWC
Tensor trans_filter(filter->type()); phi::DenseTensor trans_filter(filter->type());
TransposeFromMLUTensor<T>(ctx, TransposeFromMLUTensor<T>(ctx,
perm_to_nhwc, perm_to_nhwc,
filter, filter,
...@@ -166,8 +165,8 @@ class MLUConvGradOpKernel : public framework::OpKernel<T> { ...@@ -166,8 +165,8 @@ class MLUConvGradOpKernel : public framework::OpKernel<T> {
UpdatePaddingAndDilation( UpdatePaddingAndDilation(
&paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize); &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize);
Tensor input_tensor(input->type()); phi::DenseTensor input_tensor(input->type());
Tensor output_grad_tensor(output_grad->type()); phi::DenseTensor output_grad_tensor(output_grad->type());
const std::vector<int> perm_to_nhwc = {0, 2, 3, 1}; const std::vector<int> perm_to_nhwc = {0, 2, 3, 1};
const std::vector<int> perm_to_nchw = {0, 3, 1, 2}; const std::vector<int> perm_to_nchw = {0, 3, 1, 2};
if (channel_last) { if (channel_last) {
...@@ -193,7 +192,7 @@ class MLUConvGradOpKernel : public framework::OpKernel<T> { ...@@ -193,7 +192,7 @@ class MLUConvGradOpKernel : public framework::OpKernel<T> {
filter_grad->mutable_data<T>(ctx.GetPlace()); filter_grad->mutable_data<T>(ctx.GetPlace());
auto filter_grad_dims = filter_grad->dims(); auto filter_grad_dims = filter_grad->dims();
Tensor temp_filter_grad(filter_grad->type()); phi::DenseTensor temp_filter_grad(filter_grad->type());
temp_filter_grad.mutable_data<T>({filter_grad_dims[0], temp_filter_grad.mutable_data<T>({filter_grad_dims[0],
filter_grad_dims[2], filter_grad_dims[2],
filter_grad_dims[3], filter_grad_dims[3],
...@@ -234,7 +233,7 @@ class MLUConvGradOpKernel : public framework::OpKernel<T> { ...@@ -234,7 +233,7 @@ class MLUConvGradOpKernel : public framework::OpKernel<T> {
if (input_grad) { if (input_grad) {
input_grad->mutable_data<T>(ctx.GetPlace()); input_grad->mutable_data<T>(ctx.GetPlace());
Tensor input_grad_tensor(input_grad->type()); phi::DenseTensor input_grad_tensor(input_grad->type());
if (channel_last) { if (channel_last) {
input_grad_tensor.ShareDataWith(*input_grad); input_grad_tensor.ShareDataWith(*input_grad);
} else { } else {
...@@ -248,7 +247,7 @@ class MLUConvGradOpKernel : public framework::OpKernel<T> { ...@@ -248,7 +247,7 @@ class MLUConvGradOpKernel : public framework::OpKernel<T> {
input_grad_tensor.set_layout(DataLayout::kNHWC); input_grad_tensor.set_layout(DataLayout::kNHWC);
// transpose filter from MCHW to MHWC // transpose filter from MCHW to MHWC
Tensor trans_filter(filter->type()); phi::DenseTensor trans_filter(filter->type());
TransposeFromMLUTensor<T>(ctx, TransposeFromMLUTensor<T>(ctx,
perm_to_nhwc, perm_to_nhwc,
filter, filter,
...@@ -326,8 +325,8 @@ class MLUDepthwiseConvOpKernel : public framework::OpKernel<T> { ...@@ -326,8 +325,8 @@ class MLUDepthwiseConvOpKernel : public framework::OpKernel<T> {
UpdatePaddingAndDilation( UpdatePaddingAndDilation(
&paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize); &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize);
Tensor input_tensor(input->type()); phi::DenseTensor input_tensor(input->type());
Tensor output_tensor(output->type()); phi::DenseTensor output_tensor(output->type());
const std::vector<int> perm_to_nhwc = {0, 2, 3, 1}; const std::vector<int> perm_to_nhwc = {0, 2, 3, 1};
if (channel_last) { if (channel_last) {
groups = in_dims[3]; groups = in_dims[3];
...@@ -350,7 +349,7 @@ class MLUDepthwiseConvOpKernel : public framework::OpKernel<T> { ...@@ -350,7 +349,7 @@ class MLUDepthwiseConvOpKernel : public framework::OpKernel<T> {
output_tensor.set_layout(DataLayout::kNHWC); output_tensor.set_layout(DataLayout::kNHWC);
// transpose filter from MCHW to MHWC // transpose filter from MCHW to MHWC
Tensor trans_filter(filter->type()); phi::DenseTensor trans_filter(filter->type());
TransposeFromMLUTensor<T>(ctx, TransposeFromMLUTensor<T>(ctx,
perm_to_nhwc, perm_to_nhwc,
filter, filter,
...@@ -438,8 +437,8 @@ class MLUDepthwiseConvGradOpKernel : public framework::OpKernel<T> { ...@@ -438,8 +437,8 @@ class MLUDepthwiseConvGradOpKernel : public framework::OpKernel<T> {
UpdatePaddingAndDilation( UpdatePaddingAndDilation(
&paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize); &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize);
Tensor input_tensor(input->type()); phi::DenseTensor input_tensor(input->type());
Tensor output_grad_tensor(output_grad->type()); phi::DenseTensor output_grad_tensor(output_grad->type());
const std::vector<int> perm_to_nhwc = {0, 2, 3, 1}; const std::vector<int> perm_to_nhwc = {0, 2, 3, 1};
const std::vector<int> perm_to_nchw = {0, 3, 1, 2}; const std::vector<int> perm_to_nchw = {0, 3, 1, 2};
const std::vector<int> perm_hwcm_to_mchw = {3, 2, 0, 1}; const std::vector<int> perm_hwcm_to_mchw = {3, 2, 0, 1};
...@@ -469,7 +468,7 @@ class MLUDepthwiseConvGradOpKernel : public framework::OpKernel<T> { ...@@ -469,7 +468,7 @@ class MLUDepthwiseConvGradOpKernel : public framework::OpKernel<T> {
filter_grad->mutable_data<T>(ctx.GetPlace()); filter_grad->mutable_data<T>(ctx.GetPlace());
auto filter_grad_dims = filter_grad->dims(); auto filter_grad_dims = filter_grad->dims();
Tensor temp_filter_grad(filter_grad->type()); phi::DenseTensor temp_filter_grad(filter_grad->type());
// Details about setting diff_w hwcn for better performance, see the CNNL // Details about setting diff_w hwcn for better performance, see the CNNL
// documentation. // documentation.
temp_filter_grad.mutable_data<T>({filter_grad_dims[perm_mchw_to_hwcm[0]], temp_filter_grad.mutable_data<T>({filter_grad_dims[perm_mchw_to_hwcm[0]],
...@@ -512,7 +511,7 @@ class MLUDepthwiseConvGradOpKernel : public framework::OpKernel<T> { ...@@ -512,7 +511,7 @@ class MLUDepthwiseConvGradOpKernel : public framework::OpKernel<T> {
if (input_grad) { if (input_grad) {
input_grad->mutable_data<T>(ctx.GetPlace()); input_grad->mutable_data<T>(ctx.GetPlace());
Tensor input_grad_tensor(input_grad->type()); phi::DenseTensor input_grad_tensor(input_grad->type());
if (channel_last) { if (channel_last) {
input_grad_tensor.ShareDataWith(*input_grad); input_grad_tensor.ShareDataWith(*input_grad);
} else { } else {
...@@ -526,7 +525,7 @@ class MLUDepthwiseConvGradOpKernel : public framework::OpKernel<T> { ...@@ -526,7 +525,7 @@ class MLUDepthwiseConvGradOpKernel : public framework::OpKernel<T> {
input_grad_tensor.set_layout(DataLayout::kNHWC); input_grad_tensor.set_layout(DataLayout::kNHWC);
// transpose filter from MCHW to MHWC // transpose filter from MCHW to MHWC
Tensor trans_filter(filter->type()); phi::DenseTensor trans_filter(filter->type());
TransposeFromMLUTensor<T>(ctx, TransposeFromMLUTensor<T>(ctx,
perm_to_nhwc, perm_to_nhwc,
filter, filter,
......
...@@ -18,7 +18,6 @@ ...@@ -18,7 +18,6 @@
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
using NPUDeviceContext = platform::NPUDeviceContext; using NPUDeviceContext = platform::NPUDeviceContext;
static void CastToFP16(const framework::ExecutionContext& ctx, static void CastToFP16(const framework::ExecutionContext& ctx,
const aclrtStream& stream, const aclrtStream& stream,
...@@ -104,7 +103,7 @@ class DepthwiseConvNPUKernel : public framework::OpKernel<T> { ...@@ -104,7 +103,7 @@ class DepthwiseConvNPUKernel : public framework::OpKernel<T> {
std::vector<int> strides(4, 1); std::vector<int> strides(4, 1);
std::vector<int> dilations(4, 1); std::vector<int> dilations(4, 1);
Tensor input_tensor, output_tensor; phi::DenseTensor input_tensor, output_tensor;
input_tensor.ShareDataWith(*input); input_tensor.ShareDataWith(*input);
output_tensor.ShareDataWith(*output); output_tensor.ShareDataWith(*output);
...@@ -125,7 +124,7 @@ class DepthwiseConvNPUKernel : public framework::OpKernel<T> { ...@@ -125,7 +124,7 @@ class DepthwiseConvNPUKernel : public framework::OpKernel<T> {
auto stream = ctx.template device_context<NPUDeviceContext>().stream(); auto stream = ctx.template device_context<NPUDeviceContext>().stream();
// Transform filter (n, 1, h, w) --> (1, n, h, w) // Transform filter (n, 1, h, w) --> (1, n, h, w)
Tensor transformed_filter(filter->type()); phi::DenseTensor transformed_filter(filter->type());
transformed_filter.mutable_data<T>({filter->dims()[1], transformed_filter.mutable_data<T>({filter->dims()[1],
filter->dims()[0], filter->dims()[0],
filter->dims()[2], filter->dims()[2],
...@@ -189,7 +188,7 @@ class DepthwiseConvGradNPUKernel : public framework::OpKernel<T> { ...@@ -189,7 +188,7 @@ class DepthwiseConvGradNPUKernel : public framework::OpKernel<T> {
auto stream = ctx.template device_context<NPUDeviceContext>().stream(); auto stream = ctx.template device_context<NPUDeviceContext>().stream();
// Transform filter (n, 1, h, w) --> (1, n, h, w) // Transform filter (n, 1, h, w) --> (1, n, h, w)
Tensor transformed_filter(filter->type()); phi::DenseTensor transformed_filter(filter->type());
transformed_filter.mutable_data<T>({filter->dims()[1], transformed_filter.mutable_data<T>({filter->dims()[1],
filter->dims()[0], filter->dims()[0],
filter->dims()[2], filter->dims()[2],
...@@ -204,7 +203,7 @@ class DepthwiseConvGradNPUKernel : public framework::OpKernel<T> { ...@@ -204,7 +203,7 @@ class DepthwiseConvGradNPUKernel : public framework::OpKernel<T> {
std::vector<int> strides(4, 1); std::vector<int> strides(4, 1);
std::vector<int> dilations(4, 1); std::vector<int> dilations(4, 1);
Tensor input_tensor, output_grad_tensor; phi::DenseTensor input_tensor, output_grad_tensor;
input_tensor.ShareDataWith(*input); input_tensor.ShareDataWith(*input);
output_grad_tensor.ShareDataWith(*output_grad); output_grad_tensor.ShareDataWith(*output_grad);
if (channel_last) { if (channel_last) {
...@@ -247,7 +246,7 @@ class DepthwiseConvGradNPUKernel : public framework::OpKernel<T> { ...@@ -247,7 +246,7 @@ class DepthwiseConvGradNPUKernel : public framework::OpKernel<T> {
} }
if (input_grad) { if (input_grad) {
input_grad->mutable_data<T>(ctx.GetPlace()); input_grad->mutable_data<T>(ctx.GetPlace());
Tensor input_grad_tensor; phi::DenseTensor input_grad_tensor;
input_grad_tensor.ShareDataWith(*input_grad); input_grad_tensor.ShareDataWith(*input_grad);
if (channel_last) { if (channel_last) {
input_grad_tensor.set_layout(DataLayout::kNHWC); input_grad_tensor.set_layout(DataLayout::kNHWC);
...@@ -305,7 +304,7 @@ class NPUConvOpKernel : public framework::OpKernel<T> { ...@@ -305,7 +304,7 @@ class NPUConvOpKernel : public framework::OpKernel<T> {
std::vector<int> strides_vec(4, 1); std::vector<int> strides_vec(4, 1);
std::vector<int> dilations_vec(4, 1); std::vector<int> dilations_vec(4, 1);
Tensor input_tensor, output_tensor; phi::DenseTensor input_tensor, output_tensor;
input_tensor.ShareDataWith(*input); input_tensor.ShareDataWith(*input);
output_tensor.ShareDataWith(*output); output_tensor.ShareDataWith(*output);
if (channel_last) { if (channel_last) {
...@@ -378,7 +377,7 @@ class NPUConvGradOpKernel : public framework::OpKernel<T> { ...@@ -378,7 +377,7 @@ class NPUConvGradOpKernel : public framework::OpKernel<T> {
std::vector<int> strides_vec(4, 1); std::vector<int> strides_vec(4, 1);
std::vector<int> dilations_vec(4, 1); std::vector<int> dilations_vec(4, 1);
Tensor input_tensor, output_grad_tensor; phi::DenseTensor input_tensor, output_grad_tensor;
input_tensor.ShareDataWith(*input); input_tensor.ShareDataWith(*input);
output_grad_tensor.ShareDataWith(*output_grad); output_grad_tensor.ShareDataWith(*output_grad);
if (channel_last) { if (channel_last) {
...@@ -400,7 +399,7 @@ class NPUConvGradOpKernel : public framework::OpKernel<T> { ...@@ -400,7 +399,7 @@ class NPUConvGradOpKernel : public framework::OpKernel<T> {
filter_grad->mutable_data<T>(ctx.GetPlace()); filter_grad->mutable_data<T>(ctx.GetPlace());
std::vector<int> filter_shape_vec = phi::vectorize<int>(filter->dims()); std::vector<int> filter_shape_vec = phi::vectorize<int>(filter->dims());
Tensor filter_grad_fp32(experimental::DataType::FLOAT32); phi::DenseTensor filter_grad_fp32(experimental::DataType::FLOAT32);
filter_grad_fp32.Resize(filter_grad->dims()); filter_grad_fp32.Resize(filter_grad->dims());
if (framework::TransToProtoVarType(input->dtype()) == if (framework::TransToProtoVarType(input->dtype()) ==
...@@ -430,7 +429,7 @@ class NPUConvGradOpKernel : public framework::OpKernel<T> { ...@@ -430,7 +429,7 @@ class NPUConvGradOpKernel : public framework::OpKernel<T> {
input_grad->mutable_data<T>(ctx.GetPlace()); input_grad->mutable_data<T>(ctx.GetPlace());
std::vector<int> input_shape_vec = phi::vectorize<int>(input->dims()); std::vector<int> input_shape_vec = phi::vectorize<int>(input->dims());
Tensor input_grad_tensor; phi::DenseTensor input_grad_tensor;
input_grad_tensor.ShareDataWith(*input_grad); input_grad_tensor.ShareDataWith(*input_grad);
if (channel_last) { if (channel_last) {
input_grad_tensor.set_layout(DataLayout::kNHWC); input_grad_tensor.set_layout(DataLayout::kNHWC);
...@@ -617,8 +616,9 @@ class NPUConv3dGradKernel : public framework::OpKernel<T> { ...@@ -617,8 +616,9 @@ class NPUConv3dGradKernel : public framework::OpKernel<T> {
filter_grad->mutable_data<T>(ctx.GetPlace()); filter_grad->mutable_data<T>(ctx.GetPlace());
std::vector<int> filter_shape_vec = phi::vectorize<int>(filter->dims()); std::vector<int> filter_shape_vec = phi::vectorize<int>(filter->dims());
Tensor filter_grad_tensor = ctx.AllocateTmpTensor<T, NPUDeviceContext>( phi::DenseTensor filter_grad_tensor =
filter_grad->dims(), dev_ctx); ctx.AllocateTmpTensor<T, NPUDeviceContext>(filter_grad->dims(),
dev_ctx);
filter_grad_tensor.ShareDataWith(*filter_grad); filter_grad_tensor.ShareDataWith(*filter_grad);
filter_grad_tensor.set_layout(DataLayout::kNCDHW); filter_grad_tensor.set_layout(DataLayout::kNCDHW);
...@@ -638,8 +638,9 @@ class NPUConv3dGradKernel : public framework::OpKernel<T> { ...@@ -638,8 +638,9 @@ class NPUConv3dGradKernel : public framework::OpKernel<T> {
input_grad->mutable_data<T>(ctx.GetPlace()); input_grad->mutable_data<T>(ctx.GetPlace());
std::vector<int> input_shape_vec = phi::vectorize<int>(input->dims()); std::vector<int> input_shape_vec = phi::vectorize<int>(input->dims());
Tensor input_grad_tensor = ctx.AllocateTmpTensor<T, NPUDeviceContext>( phi::DenseTensor input_grad_tensor =
input_grad->dims(), dev_ctx); ctx.AllocateTmpTensor<T, NPUDeviceContext>(input_grad->dims(),
dev_ctx);
input_grad_tensor.ShareDataWith(*input_grad); input_grad_tensor.ShareDataWith(*input_grad);
input_grad_tensor.set_layout(DataLayout::kNCDHW); input_grad_tensor.set_layout(DataLayout::kNCDHW);
......
...@@ -20,7 +20,6 @@ limitations under the License. */ ...@@ -20,7 +20,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
using DataLayout = phi::DataLayout; using DataLayout = phi::DataLayout;
template <typename T> template <typename T>
...@@ -61,8 +60,8 @@ class Conv2DTransposeMLUKernel : public framework::OpKernel<T> { ...@@ -61,8 +60,8 @@ class Conv2DTransposeMLUKernel : public framework::OpKernel<T> {
phi::UpdatePaddingAndDilation( phi::UpdatePaddingAndDilation(
&paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize); &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize);
Tensor input_tensor(input->type()); phi::DenseTensor input_tensor(input->type());
Tensor output_tensor(output->type()); phi::DenseTensor output_tensor(output->type());
input_tensor.set_layout(DataLayout::kNHWC); input_tensor.set_layout(DataLayout::kNHWC);
output_tensor.set_layout(DataLayout::kNHWC); output_tensor.set_layout(DataLayout::kNHWC);
const std::vector<int> perm_to_nhwc = {0, 2, 3, 1}; const std::vector<int> perm_to_nhwc = {0, 2, 3, 1};
...@@ -84,7 +83,7 @@ class Conv2DTransposeMLUKernel : public framework::OpKernel<T> { ...@@ -84,7 +83,7 @@ class Conv2DTransposeMLUKernel : public framework::OpKernel<T> {
} }
// transpose filter from MCHW to MHWC // transpose filter from MCHW to MHWC
Tensor trans_filter(filter->type()); phi::DenseTensor trans_filter(filter->type());
TransposeFromMLUTensor<T>(ctx, TransposeFromMLUTensor<T>(ctx,
perm_to_nhwc, perm_to_nhwc,
filter, filter,
...@@ -168,8 +167,8 @@ class Conv2DTransposeGradMLUKernel : public framework::OpKernel<T> { ...@@ -168,8 +167,8 @@ class Conv2DTransposeGradMLUKernel : public framework::OpKernel<T> {
phi::UpdatePaddingAndDilation( phi::UpdatePaddingAndDilation(
&paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize); &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize);
Tensor input_tensor(input->type()); phi::DenseTensor input_tensor(input->type());
Tensor output_grad_tensor(output_grad->type()); phi::DenseTensor output_grad_tensor(output_grad->type());
output_grad_tensor.set_layout(DataLayout::kNHWC); output_grad_tensor.set_layout(DataLayout::kNHWC);
const std::vector<int> perm_to_nhwc = {0, 2, 3, 1}; const std::vector<int> perm_to_nhwc = {0, 2, 3, 1};
...@@ -191,7 +190,7 @@ class Conv2DTransposeGradMLUKernel : public framework::OpKernel<T> { ...@@ -191,7 +190,7 @@ class Conv2DTransposeGradMLUKernel : public framework::OpKernel<T> {
} }
// transpose filter from MCHW to MHWC // transpose filter from MCHW to MHWC
Tensor trans_filter(filter->type()); phi::DenseTensor trans_filter(filter->type());
TransposeFromMLUTensor<T>(ctx, TransposeFromMLUTensor<T>(ctx,
perm_to_nhwc, perm_to_nhwc,
filter, filter,
...@@ -217,7 +216,7 @@ class Conv2DTransposeGradMLUKernel : public framework::OpKernel<T> { ...@@ -217,7 +216,7 @@ class Conv2DTransposeGradMLUKernel : public framework::OpKernel<T> {
if (filter_grad) { if (filter_grad) {
filter_grad->mutable_data<T>(ctx.GetPlace()); filter_grad->mutable_data<T>(ctx.GetPlace());
Tensor filter_grad_tensor(filter_grad->type()); phi::DenseTensor filter_grad_tensor(filter_grad->type());
// filter_grad always MCHW // filter_grad always MCHW
// filter_grad_tensor always MHWC // filter_grad_tensor always MHWC
auto filter_grad_dims = filter_grad->dims(); auto filter_grad_dims = filter_grad->dims();
...@@ -253,7 +252,7 @@ class Conv2DTransposeGradMLUKernel : public framework::OpKernel<T> { ...@@ -253,7 +252,7 @@ class Conv2DTransposeGradMLUKernel : public framework::OpKernel<T> {
if (input_grad) { if (input_grad) {
input_grad->mutable_data<T>(ctx.GetPlace()); input_grad->mutable_data<T>(ctx.GetPlace());
Tensor input_grad_tensor(input_grad->type()); phi::DenseTensor input_grad_tensor(input_grad->type());
input_tensor.set_layout(DataLayout::kNHWC); input_tensor.set_layout(DataLayout::kNHWC);
if (channel_last) { if (channel_last) {
......
...@@ -20,7 +20,6 @@ limitations under the License. */ ...@@ -20,7 +20,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
using NPUDeviceContext = platform::NPUDeviceContext; using NPUDeviceContext = platform::NPUDeviceContext;
template <typename T> template <typename T>
...@@ -65,7 +64,7 @@ class Conv2DTransposeNPUKernel : public framework::OpKernel<T> { ...@@ -65,7 +64,7 @@ class Conv2DTransposeNPUKernel : public framework::OpKernel<T> {
std::vector<int> strides(4, 1); std::vector<int> strides(4, 1);
std::vector<int> dilations(4, 1); std::vector<int> dilations(4, 1);
Tensor input_tensor, output_tensor; phi::DenseTensor input_tensor, output_tensor;
input_tensor.ShareDataWith(*input); input_tensor.ShareDataWith(*input);
output_tensor.ShareDataWith(*output); output_tensor.ShareDataWith(*output);
...@@ -148,7 +147,7 @@ class Conv2DTransposeGradNPUKernel : public framework::OpKernel<T> { ...@@ -148,7 +147,7 @@ class Conv2DTransposeGradNPUKernel : public framework::OpKernel<T> {
std::vector<int> strides_vec(4, 1); std::vector<int> strides_vec(4, 1);
std::vector<int> dilations_vec(4, 1); std::vector<int> dilations_vec(4, 1);
Tensor input_tensor, output_grad_tensor; phi::DenseTensor input_tensor, output_grad_tensor;
input_tensor.ShareDataWith(*input); input_tensor.ShareDataWith(*input);
output_grad_tensor.ShareDataWith(*output_grad); output_grad_tensor.ShareDataWith(*output_grad);
if (channel_last) { if (channel_last) {
...@@ -182,7 +181,7 @@ class Conv2DTransposeGradNPUKernel : public framework::OpKernel<T> { ...@@ -182,7 +181,7 @@ class Conv2DTransposeGradNPUKernel : public framework::OpKernel<T> {
} }
if (input_grad) { if (input_grad) {
input_grad->mutable_data<T>(ctx.GetPlace()); input_grad->mutable_data<T>(ctx.GetPlace());
Tensor input_grad_tensor; phi::DenseTensor input_grad_tensor;
input_grad_tensor.ShareDataWith(*input_grad); input_grad_tensor.ShareDataWith(*input_grad);
if (channel_last) { if (channel_last) {
input_grad_tensor.set_layout(DataLayout::kNHWC); input_grad_tensor.set_layout(DataLayout::kNHWC);
...@@ -248,7 +247,7 @@ class Conv3DTransposeNPUKernel : public framework::OpKernel<T> { ...@@ -248,7 +247,7 @@ class Conv3DTransposeNPUKernel : public framework::OpKernel<T> {
std::vector<int> strides(5, 1); std::vector<int> strides(5, 1);
std::vector<int> dilations(5, 1); std::vector<int> dilations(5, 1);
Tensor input_tensor, output_tensor, filter_tensor; phi::DenseTensor input_tensor, output_tensor, filter_tensor;
input_tensor.Resize(input->dims()); input_tensor.Resize(input->dims());
input_tensor.ShareDataWith(*input); input_tensor.ShareDataWith(*input);
output_tensor.Resize(output->dims()); output_tensor.Resize(output->dims());
......
...@@ -30,8 +30,6 @@ class OpBase; ...@@ -30,8 +30,6 @@ class OpBase;
} // namespace imperative } // namespace imperative
} // namespace paddle } // namespace paddle
using Tensor = phi::DenseTensor;
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -22,8 +22,6 @@ limitations under the License. */ ...@@ -22,8 +22,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
inline std::vector<int64_t> CorrelationOutputSize(int batch, inline std::vector<int64_t> CorrelationOutputSize(int batch,
int input_height, int input_height,
int input_width, int input_width,
......
...@@ -21,13 +21,11 @@ limitations under the License. */ ...@@ -21,13 +21,11 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
class CosSimKernel : public framework::OpKernel<T> { class CosSimKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
// get Tensor // get phi::DenseTensor
auto* in_x = context.Input<phi::DenseTensor>("X"); auto* in_x = context.Input<phi::DenseTensor>("X");
auto* in_y = context.Input<phi::DenseTensor>("Y"); auto* in_y = context.Input<phi::DenseTensor>("Y");
auto* out_z = context.Output<phi::DenseTensor>("Out"); auto* out_z = context.Output<phi::DenseTensor>("Out");
...@@ -74,7 +72,7 @@ template <typename DeviceContext, typename T> ...@@ -74,7 +72,7 @@ template <typename DeviceContext, typename T>
class CosSimGradKernel : public framework::OpKernel<T> { class CosSimGradKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
// get Tensor // get phi::DenseTensor
auto* in_x = context.Input<phi::DenseTensor>("X"); auto* in_x = context.Input<phi::DenseTensor>("X");
auto* in_y = context.Input<phi::DenseTensor>("Y"); auto* in_y = context.Input<phi::DenseTensor>("Y");
auto* in_z = context.Input<phi::DenseTensor>("Out"); auto* in_z = context.Input<phi::DenseTensor>("Out");
......
...@@ -18,8 +18,6 @@ limitations under the License. */ ...@@ -18,8 +18,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
class CropNPUKernel : public framework::OpKernel<T> { class CropNPUKernel : public framework::OpKernel<T> {
public: public:
...@@ -71,7 +69,7 @@ class CropNPUKernel : public framework::OpKernel<T> { ...@@ -71,7 +69,7 @@ class CropNPUKernel : public framework::OpKernel<T> {
x->dims().size())); x->dims().size()));
// shape memory maybe have gc. // shape memory maybe have gc.
Tensor tmp_shape(*shape); phi::DenseTensor tmp_shape(*shape);
tmp_shape.mutable_data<T>(ctx.GetPlace()); tmp_shape.mutable_data<T>(ctx.GetPlace());
const auto& runner = const auto& runner =
...@@ -90,7 +88,7 @@ class CropNPUKernel : public framework::OpKernel<T> { ...@@ -90,7 +88,7 @@ class CropNPUKernel : public framework::OpKernel<T> {
"(%d) of the Input(X).", "(%d) of the Input(X).",
shape_size.size(), shape_size.size(),
x->dims().size())); x->dims().size()));
Tensor tmp_shape(x->dtype()); phi::DenseTensor tmp_shape(x->dtype());
tmp_shape.Resize(phi::make_ddim(shape_size)); tmp_shape.Resize(phi::make_ddim(shape_size));
tmp_shape.mutable_data<T>(ctx.GetPlace()); tmp_shape.mutable_data<T>(ctx.GetPlace());
const auto& runner = const auto& runner =
......
...@@ -23,8 +23,6 @@ limitations under the License. */ ...@@ -23,8 +23,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
class CrossEntropyOpKernel : public framework::OpKernel<T> { class CrossEntropyOpKernel : public framework::OpKernel<T> {
public: public:
...@@ -36,8 +34,8 @@ class CrossEntropyOpKernel : public framework::OpKernel<T> { ...@@ -36,8 +34,8 @@ class CrossEntropyOpKernel : public framework::OpKernel<T> {
int rank = x->dims().size(); int rank = x->dims().size();
auto label_dims = labels->dims(); auto label_dims = labels->dims();
Tensor x_2d = framework::ReshapeToMatrix(*x, rank - 1); phi::DenseTensor x_2d = framework::ReshapeToMatrix(*x, rank - 1);
Tensor labels_2d, y_2d; phi::DenseTensor labels_2d, y_2d;
if (label_dims.size() < rank) { if (label_dims.size() < rank) {
labels_2d.ShareDataWith(*labels); labels_2d.ShareDataWith(*labels);
labels_2d.Resize({phi::product(label_dims), 1}); labels_2d.Resize({phi::product(label_dims), 1});
......
...@@ -24,8 +24,6 @@ limitations under the License. */ ...@@ -24,8 +24,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
class CTCAlignKernel : public framework::OpKernel<T> { class CTCAlignKernel : public framework::OpKernel<T> {
public: public:
......
...@@ -26,8 +26,6 @@ limitations under the License. */ ...@@ -26,8 +26,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
template <typename T, typename Type> template <typename T, typename Type>
bool is_continuous(const Type &weight_list) { bool is_continuous(const Type &weight_list) {
bool continuous = true; bool continuous = true;
...@@ -41,7 +39,7 @@ bool is_continuous(const Type &weight_list) { ...@@ -41,7 +39,7 @@ bool is_continuous(const Type &weight_list) {
return continuous; return continuous;
} }
int size_sum(const std::vector<const Tensor *> &weight_list) { int size_sum(const std::vector<const phi::DenseTensor *> &weight_list) {
int size = 0; int size = 0;
for (size_t i = 0; i < weight_list.size(); ++i) { for (size_t i = 0; i < weight_list.size(); ++i) {
auto in_size = weight_list[i]->numel(); auto in_size = weight_list[i]->numel();
...@@ -53,8 +51,8 @@ int size_sum(const std::vector<const Tensor *> &weight_list) { ...@@ -53,8 +51,8 @@ int size_sum(const std::vector<const Tensor *> &weight_list) {
template <typename T> template <typename T>
void weight_to_tensor(const platform::Place &place, void weight_to_tensor(const platform::Place &place,
gpuStream_t stream, gpuStream_t stream,
const std::vector<const Tensor *> &weight_list, const std::vector<const phi::DenseTensor *> &weight_list,
Tensor *weight) { phi::DenseTensor *weight) {
auto weight_data = weight->data<T>(); auto weight_data = weight->data<T>();
int weight_offset = 0; int weight_offset = 0;
for (size_t i = 0; i < weight_list.size(); ++i) { for (size_t i = 0; i < weight_list.size(); ++i) {
...@@ -72,11 +70,12 @@ void weight_to_tensor(const platform::Place &place, ...@@ -72,11 +70,12 @@ void weight_to_tensor(const platform::Place &place,
} }
template <typename T> template <typename T>
void weight_to_tensor_list(const platform::Place &place, void weight_to_tensor_list(
gpuStream_t stream, const platform::Place &place,
std::vector<Tensor *> *weight_grad, gpuStream_t stream,
const std::vector<const Tensor *> &weight_input, std::vector<phi::DenseTensor *> *weight_grad,
const Tensor *weight) { const std::vector<const phi::DenseTensor *> &weight_input,
const phi::DenseTensor *weight) {
int weight_offset = 0; int weight_offset = 0;
auto *weight_data = weight->data<T>(); auto *weight_data = weight->data<T>();
for (size_t i = 0; i < weight_input.size(); ++i) { for (size_t i = 0; i < weight_input.size(); ++i) {
...@@ -204,15 +203,15 @@ template <typename T> ...@@ -204,15 +203,15 @@ template <typename T>
class CudnnLSTMGPUKernel : public framework::OpKernel<T> { class CudnnLSTMGPUKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext &ctx) const override { void Compute(const framework::ExecutionContext &ctx) const override {
const Tensor *x = ctx.Input<phi::DenseTensor>("Input"); const phi::DenseTensor *x = ctx.Input<phi::DenseTensor>("Input");
const Tensor *init_h = ctx.Input<phi::DenseTensor>("InitH"); const phi::DenseTensor *init_h = ctx.Input<phi::DenseTensor>("InitH");
const Tensor *init_c = ctx.Input<phi::DenseTensor>("InitC"); const phi::DenseTensor *init_c = ctx.Input<phi::DenseTensor>("InitC");
Tensor *out = ctx.Output<phi::DenseTensor>("Out"); phi::DenseTensor *out = ctx.Output<phi::DenseTensor>("Out");
Tensor *last_h = ctx.Output<phi::DenseTensor>("LastH"); phi::DenseTensor *last_h = ctx.Output<phi::DenseTensor>("LastH");
Tensor *last_c = ctx.Output<phi::DenseTensor>("LastC"); phi::DenseTensor *last_c = ctx.Output<phi::DenseTensor>("LastC");
Tensor *reserve = ctx.Output<phi::DenseTensor>("Reserve"); phi::DenseTensor *reserve = ctx.Output<phi::DenseTensor>("Reserve");
Tensor *state_out = ctx.Output<phi::DenseTensor>("StateOut"); phi::DenseTensor *state_out = ctx.Output<phi::DenseTensor>("StateOut");
const T *x_data = x->data<T>(); const T *x_data = x->data<T>();
const T *init_h_data = init_h->data<T>(); const T *init_h_data = init_h->data<T>();
...@@ -256,7 +255,7 @@ class CudnnLSTMGPUKernel : public framework::OpKernel<T> { ...@@ -256,7 +255,7 @@ class CudnnLSTMGPUKernel : public framework::OpKernel<T> {
size_t workspace_size; size_t workspace_size;
size_t reserve_size; size_t reserve_size;
Tensor weight_whole; phi::DenseTensor weight_whole;
T *w_data = nullptr; T *w_data = nullptr;
int weight_numel; int weight_numel;
bool w_initialized = false; bool w_initialized = false;
...@@ -272,7 +271,7 @@ class CudnnLSTMGPUKernel : public framework::OpKernel<T> { ...@@ -272,7 +271,7 @@ class CudnnLSTMGPUKernel : public framework::OpKernel<T> {
if (!w_initialized) { if (!w_initialized) {
auto weight_list = ctx.MultiInput<phi::DenseTensor>("WeightList"); auto weight_list = ctx.MultiInput<phi::DenseTensor>("WeightList");
bool continuous = bool continuous =
is_continuous<T, std::vector<const Tensor *>>(weight_list); is_continuous<T, std::vector<const phi::DenseTensor *>>(weight_list);
weight_numel = size_sum(weight_list); weight_numel = size_sum(weight_list);
if (!continuous) { if (!continuous) {
...@@ -288,7 +287,7 @@ class CudnnLSTMGPUKernel : public framework::OpKernel<T> { ...@@ -288,7 +287,7 @@ class CudnnLSTMGPUKernel : public framework::OpKernel<T> {
for (size_t i = 0; i < weight_list.size(); ++i) { for (size_t i = 0; i < weight_list.size(); ++i) {
size_t len = weight_list[i]->numel(); size_t len = weight_list[i]->numel();
auto dim = weight_list[i]->dims(); auto dim = weight_list[i]->dims();
const_cast<Tensor *>(weight_list[i]) const_cast<phi::DenseTensor *>(weight_list[i])
->ShareDataWith( ->ShareDataWith(
weight_whole.Slice(static_cast<int64_t>(offset), weight_whole.Slice(static_cast<int64_t>(offset),
static_cast<int64_t>(offset + len))) static_cast<int64_t>(offset + len)))
...@@ -481,12 +480,12 @@ class CudnnLSTMGPUGradKernel : public framework::OpKernel<T> { ...@@ -481,12 +480,12 @@ class CudnnLSTMGPUGradKernel : public framework::OpKernel<T> {
auto place = ctx.GetPlace(); auto place = ctx.GetPlace();
int weight_numel = size_sum(weight_list); int weight_numel = size_sum(weight_list);
bool continuous = bool continuous =
is_continuous<T, std::vector<const Tensor *>>(weight_list); is_continuous<T, std::vector<const phi::DenseTensor *>>(weight_list);
auto stream = auto stream =
reinterpret_cast<const phi::GPUContext &>(ctx.device_context()) reinterpret_cast<const phi::GPUContext &>(ctx.device_context())
.stream(); .stream();
Tensor weight_whole; phi::DenseTensor weight_whole;
T *weight_data = nullptr; T *weight_data = nullptr;
if (!continuous) { if (!continuous) {
...@@ -497,7 +496,7 @@ class CudnnLSTMGPUGradKernel : public framework::OpKernel<T> { ...@@ -497,7 +496,7 @@ class CudnnLSTMGPUGradKernel : public framework::OpKernel<T> {
weight_data = const_cast<T *>(weight_list[0]->data<T>()); weight_data = const_cast<T *>(weight_list[0]->data<T>());
} }
Tensor weight_grad; phi::DenseTensor weight_grad;
phi::funcs::SetConstant<phi::GPUContext, T> zero; phi::funcs::SetConstant<phi::GPUContext, T> zero;
weight_grad.mutable_data<T>({weight_numel}, ctx.GetPlace()); weight_grad.mutable_data<T>({weight_numel}, ctx.GetPlace());
zero(dev_ctx, &weight_grad, static_cast<T>(0.0)); zero(dev_ctx, &weight_grad, static_cast<T>(0.0));
...@@ -559,7 +558,7 @@ class CudnnLSTMGPUGradKernel : public framework::OpKernel<T> { ...@@ -559,7 +558,7 @@ class CudnnLSTMGPUGradKernel : public framework::OpKernel<T> {
SequenceLength, SequenceLength,
&workspace_size, &workspace_size,
&reserve_size, &reserve_size,
const_cast<Tensor *>(state_out)); const_cast<phi::DenseTensor *>(state_out));
phi::DenseTensor workspace_data_; phi::DenseTensor workspace_data_;
workspace_data_.mutable_data<uint8_t>( workspace_data_.mutable_data<uint8_t>(
......
...@@ -18,8 +18,6 @@ limitations under the License. */ ...@@ -18,8 +18,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
template <typename T> template <typename T>
class CumSumMLUKernel : public framework::OpKernel<T> { class CumSumMLUKernel : public framework::OpKernel<T> {
public: public:
...@@ -34,7 +32,7 @@ class CumSumMLUKernel : public framework::OpKernel<T> { ...@@ -34,7 +32,7 @@ class CumSumMLUKernel : public framework::OpKernel<T> {
out->mutable_data<T>(ctx.GetPlace()); out->mutable_data<T>(ctx.GetPlace());
phi::DenseTensor* input_ptr = const_cast<phi::DenseTensor*>(x); phi::DenseTensor* input_ptr = const_cast<phi::DenseTensor*>(x);
Tensor flat_x(x->type()); phi::DenseTensor flat_x(x->type());
if (flatten) { if (flatten) {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
axis, axis,
......
...@@ -19,8 +19,6 @@ limitations under the License. */ ...@@ -19,8 +19,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
static void CumsumImp(const phi::DenseTensor& input, static void CumsumImp(const phi::DenseTensor& input,
phi::DenseTensor* output, phi::DenseTensor* output,
const framework::NPUAttributeMap& attr_input, const framework::NPUAttributeMap& attr_input,
...@@ -30,7 +28,7 @@ static void CumsumImp(const phi::DenseTensor& input, ...@@ -30,7 +28,7 @@ static void CumsumImp(const phi::DenseTensor& input,
.stream(); .stream();
if (framework::TransToProtoVarType(input.dtype()) == if (framework::TransToProtoVarType(input.dtype()) ==
framework::proto::VarType::INT64) { framework::proto::VarType::INT64) {
Tensor tmp_input; phi::DenseTensor tmp_input;
tmp_input.mutable_data<float>(input.dims(), ctx.GetPlace()); tmp_input.mutable_data<float>(input.dims(), ctx.GetPlace());
auto dst_acl_dtype = auto dst_acl_dtype =
ConvertToNpuDtype(framework::TransToProtoVarType(tmp_input.type())); ConvertToNpuDtype(framework::TransToProtoVarType(tmp_input.type()));
...@@ -41,7 +39,7 @@ static void CumsumImp(const phi::DenseTensor& input, ...@@ -41,7 +39,7 @@ static void CumsumImp(const phi::DenseTensor& input,
{{"dst_type", static_cast<int>(dst_acl_dtype)}}); {{"dst_type", static_cast<int>(dst_acl_dtype)}});
cast_runner_1.Run(stream); cast_runner_1.Run(stream);
Tensor tmp_output; phi::DenseTensor tmp_output;
tmp_output.mutable_data<float>(output->dims(), ctx.GetPlace()); tmp_output.mutable_data<float>(output->dims(), ctx.GetPlace());
const auto& runner = const auto& runner =
NpuOpRunner("CumsumD", {tmp_input}, {tmp_output}, attr_input); NpuOpRunner("CumsumD", {tmp_input}, {tmp_output}, attr_input);
...@@ -86,7 +84,7 @@ class CumSumNPUKernel : public framework::OpKernel<T> { ...@@ -86,7 +84,7 @@ class CumSumNPUKernel : public framework::OpKernel<T> {
-1, -1,
axis)); axis));
Tensor new_x(x->type()); phi::DenseTensor new_x(x->type());
new_x.ShareDataWith(*x); new_x.ShareDataWith(*x);
new_x.Resize(phi::make_ddim({x->numel()})); new_x.Resize(phi::make_ddim({x->numel()}));
......
...@@ -21,8 +21,6 @@ limitations under the License. */ ...@@ -21,8 +21,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
class CVMOp : public framework::OperatorWithKernel { class CVMOp : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
......
...@@ -22,7 +22,6 @@ namespace paddle { ...@@ -22,7 +22,6 @@ namespace paddle {
namespace operators { namespace operators {
using phi::PADDLE_CUDA_NUM_THREADS; using phi::PADDLE_CUDA_NUM_THREADS;
using Tensor = phi::DenseTensor;
template <typename T> template <typename T>
__global__ void CvmComputeKernel(const bool use_cvm, __global__ void CvmComputeKernel(const bool use_cvm,
......
...@@ -19,8 +19,6 @@ limitations under the License. */ ...@@ -19,8 +19,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
template <typename T> template <typename T>
void CvmComputeKernel(const bool use_cvm, void CvmComputeKernel(const bool use_cvm,
const int64_t item_width, const int64_t item_width,
......
...@@ -23,7 +23,6 @@ limitations under the License. */ ...@@ -23,7 +23,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
using DataLayout = phi::DataLayout; using DataLayout = phi::DataLayout;
template <typename T> template <typename T>
...@@ -483,9 +482,9 @@ class DataNormGradOp : public framework::OperatorWithKernel { ...@@ -483,9 +482,9 @@ class DataNormGradOp : public framework::OperatorWithKernel {
PADDLE_THROW(platform::errors::InvalidArgument( PADDLE_THROW(platform::errors::InvalidArgument(
"Y@GRAD can not be found for computation")); "Y@GRAD can not be found for computation"));
} }
const Tensor *t = nullptr; const phi::DenseTensor *t = nullptr;
if (var->IsType<Tensor>()) { if (var->IsType<phi::DenseTensor>()) {
t = &var->Get<Tensor>(); t = &var->Get<phi::DenseTensor>();
} else if (var->IsType<phi::DenseTensor>()) { } else if (var->IsType<phi::DenseTensor>()) {
t = &var->Get<phi::DenseTensor>(); t = &var->Get<phi::DenseTensor>();
} }
...@@ -523,7 +522,7 @@ class DataNormGradKernel<phi::CPUContext, T> : public framework::OpKernel<T> { ...@@ -523,7 +522,7 @@ class DataNormGradKernel<phi::CPUContext, T> : public framework::OpKernel<T> {
(data_layout == DataLayout::kNCHW ? x_dims[1] (data_layout == DataLayout::kNCHW ? x_dims[1]
: x_dims[x_dims.size() - 1]); : x_dims[x_dims.size() - 1]);
// init output // init output
Tensor *d_x = nullptr; phi::DenseTensor *d_x = nullptr;
if (ctx.HasOutput(framework::GradVarName("X"))) { if (ctx.HasOutput(framework::GradVarName("X"))) {
d_x = ctx.Output<phi::DenseTensor>(framework::GradVarName("X")); d_x = ctx.Output<phi::DenseTensor>(framework::GradVarName("X"));
} }
...@@ -587,12 +586,12 @@ class DataNormGradKernel<phi::CPUContext, T> : public framework::OpKernel<T> { ...@@ -587,12 +586,12 @@ class DataNormGradKernel<phi::CPUContext, T> : public framework::OpKernel<T> {
EigenVectorArrayMap<T> d_bias_arr(d_bias_data, C); EigenVectorArrayMap<T> d_bias_arr(d_bias_data, C);
EigenVectorArrayMap<T> d_scale_arr(d_scale_data, C); EigenVectorArrayMap<T> d_scale_arr(d_scale_data, C);
Tensor dy_sum; phi::DenseTensor dy_sum;
dy_sum.Resize({C}); dy_sum.Resize({C});
dy_sum.mutable_data<T>(ctx.GetPlace()); dy_sum.mutable_data<T>(ctx.GetPlace());
EigenVectorArrayMap<T> dy_sum_arr( EigenVectorArrayMap<T> dy_sum_arr(
dy_sum.mutable_data<T>(ctx.GetPlace()), C); dy_sum.mutable_data<T>(ctx.GetPlace()), C);
Tensor dy_mul_x_sub_mean_mul_invstd_sum; phi::DenseTensor dy_mul_x_sub_mean_mul_invstd_sum;
dy_mul_x_sub_mean_mul_invstd_sum.Resize({C}); dy_mul_x_sub_mean_mul_invstd_sum.Resize({C});
dy_mul_x_sub_mean_mul_invstd_sum.mutable_data<T>(ctx.GetPlace()); dy_mul_x_sub_mean_mul_invstd_sum.mutable_data<T>(ctx.GetPlace());
EigenVectorArrayMap<T> dy_mul_x_sub_mean_mul_invstd_sum_arr( EigenVectorArrayMap<T> dy_mul_x_sub_mean_mul_invstd_sum_arr(
......
...@@ -26,7 +26,6 @@ limitations under the License. */ ...@@ -26,7 +26,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
using DataLayout = phi::DataLayout; using DataLayout = phi::DataLayout;
using phi::PADDLE_CUDA_NUM_THREADS; using phi::PADDLE_CUDA_NUM_THREADS;
...@@ -166,7 +165,7 @@ class DataNormGradKernel<phi::GPUContext, T> : public framework::OpKernel<T> { ...@@ -166,7 +165,7 @@ class DataNormGradKernel<phi::GPUContext, T> : public framework::OpKernel<T> {
const int C = x_dims[1]; const int C = x_dims[1];
// init output // init output
Tensor *d_x = nullptr; phi::DenseTensor *d_x = nullptr;
if (ctx.HasOutput(framework::GradVarName("X"))) { if (ctx.HasOutput(framework::GradVarName("X"))) {
d_x = ctx.Output<phi::DenseTensor>(framework::GradVarName("X")); d_x = ctx.Output<phi::DenseTensor>(framework::GradVarName("X"));
} }
......
...@@ -18,8 +18,6 @@ limitations under the License. */ ...@@ -18,8 +18,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
template <typename T> template <typename T>
class DeformableConvMLUKernel : public framework::OpKernel<T> { class DeformableConvMLUKernel : public framework::OpKernel<T> {
public: public:
...@@ -58,29 +56,29 @@ class DeformableConvMLUKernel : public framework::OpKernel<T> { ...@@ -58,29 +56,29 @@ class DeformableConvMLUKernel : public framework::OpKernel<T> {
im2col_step); im2col_step);
const std::vector<int> perm_to_nhwc = {0, 2, 3, 1}; const std::vector<int> perm_to_nhwc = {0, 2, 3, 1};
Tensor trans_input(input->dtype()); phi::DenseTensor trans_input(input->dtype());
TransposeFromMLUTensor<T>( TransposeFromMLUTensor<T>(
ctx, perm_to_nhwc, input, &trans_input, true /*need_reshape_or_alloc*/); ctx, perm_to_nhwc, input, &trans_input, true /*need_reshape_or_alloc*/);
Tensor trans_offset(offset->dtype()); phi::DenseTensor trans_offset(offset->dtype());
TransposeFromMLUTensor<T>(ctx, TransposeFromMLUTensor<T>(ctx,
perm_to_nhwc, perm_to_nhwc,
offset, offset,
&trans_offset, &trans_offset,
true /*need_reshape_or_alloc*/); true /*need_reshape_or_alloc*/);
Tensor trans_mask(mask->dtype()); phi::DenseTensor trans_mask(mask->dtype());
TransposeFromMLUTensor<T>( TransposeFromMLUTensor<T>(
ctx, perm_to_nhwc, mask, &trans_mask, true /*need_reshape_or_alloc*/); ctx, perm_to_nhwc, mask, &trans_mask, true /*need_reshape_or_alloc*/);
Tensor trans_filter(filter->dtype()); phi::DenseTensor trans_filter(filter->dtype());
TransposeFromMLUTensor<T>(ctx, TransposeFromMLUTensor<T>(ctx,
perm_to_nhwc, perm_to_nhwc,
filter, filter,
&trans_filter, &trans_filter,
true /*need_reshape_or_alloc*/); true /*need_reshape_or_alloc*/);
Tensor tmp_output(output->dtype()); phi::DenseTensor tmp_output(output->dtype());
auto output_dims = output->dims(); auto output_dims = output->dims();
tmp_output.mutable_data<T>( tmp_output.mutable_data<T>(
{output_dims[0], output_dims[2], output_dims[3], output_dims[1]}, {output_dims[0], output_dims[2], output_dims[3], output_dims[1]},
...@@ -167,54 +165,54 @@ class DeformableConvGradMLUKernel : public framework::OpKernel<T> { ...@@ -167,54 +165,54 @@ class DeformableConvGradMLUKernel : public framework::OpKernel<T> {
groups, groups,
im2col_step); im2col_step);
Tensor tmp_input_grad; phi::DenseTensor tmp_input_grad;
auto input_dims = input->dims(); auto input_dims = input->dims();
tmp_input_grad.mutable_data<T>( tmp_input_grad.mutable_data<T>(
{input_dims[0], input_dims[2], input_dims[3], input_dims[1]}, {input_dims[0], input_dims[2], input_dims[3], input_dims[1]},
ctx.GetPlace()); ctx.GetPlace());
Tensor tmp_filter_grad; phi::DenseTensor tmp_filter_grad;
auto filter_dims = filter->dims(); auto filter_dims = filter->dims();
tmp_filter_grad.mutable_data<T>( tmp_filter_grad.mutable_data<T>(
{filter_dims[0], filter_dims[2], filter_dims[3], filter_dims[1]}, {filter_dims[0], filter_dims[2], filter_dims[3], filter_dims[1]},
ctx.GetPlace()); ctx.GetPlace());
Tensor tmp_offset_grad; phi::DenseTensor tmp_offset_grad;
auto offset_dims = offset->dims(); auto offset_dims = offset->dims();
tmp_offset_grad.mutable_data<T>( tmp_offset_grad.mutable_data<T>(
{offset_dims[0], offset_dims[2], offset_dims[3], offset_dims[1]}, {offset_dims[0], offset_dims[2], offset_dims[3], offset_dims[1]},
ctx.GetPlace()); ctx.GetPlace());
Tensor tmp_mask_grad; phi::DenseTensor tmp_mask_grad;
auto mask_dims = mask->dims(); auto mask_dims = mask->dims();
tmp_mask_grad.mutable_data<T>( tmp_mask_grad.mutable_data<T>(
{mask_dims[0], mask_dims[2], mask_dims[3], mask_dims[1]}, {mask_dims[0], mask_dims[2], mask_dims[3], mask_dims[1]},
ctx.GetPlace()); ctx.GetPlace());
const std::vector<int> perm_to_nhwc = {0, 2, 3, 1}; const std::vector<int> perm_to_nhwc = {0, 2, 3, 1};
Tensor trans_output_grad(output_grad->dtype()); phi::DenseTensor trans_output_grad(output_grad->dtype());
TransposeFromMLUTensor<T>(ctx, TransposeFromMLUTensor<T>(ctx,
perm_to_nhwc, perm_to_nhwc,
output_grad, output_grad,
&trans_output_grad, &trans_output_grad,
true /*need_reshape_or_alloc*/); true /*need_reshape_or_alloc*/);
Tensor trans_input(input->dtype()); phi::DenseTensor trans_input(input->dtype());
TransposeFromMLUTensor<T>( TransposeFromMLUTensor<T>(
ctx, perm_to_nhwc, input, &trans_input, true /*need_reshape_or_alloc*/); ctx, perm_to_nhwc, input, &trans_input, true /*need_reshape_or_alloc*/);
Tensor trans_offset(offset->dtype()); phi::DenseTensor trans_offset(offset->dtype());
TransposeFromMLUTensor<T>(ctx, TransposeFromMLUTensor<T>(ctx,
perm_to_nhwc, perm_to_nhwc,
offset, offset,
&trans_offset, &trans_offset,
true /*need_reshape_or_alloc*/); true /*need_reshape_or_alloc*/);
Tensor trans_mask(mask->dtype()); phi::DenseTensor trans_mask(mask->dtype());
TransposeFromMLUTensor<T>( TransposeFromMLUTensor<T>(
ctx, perm_to_nhwc, mask, &trans_mask, true /*need_reshape_or_alloc*/); ctx, perm_to_nhwc, mask, &trans_mask, true /*need_reshape_or_alloc*/);
Tensor trans_filter(filter->dtype()); phi::DenseTensor trans_filter(filter->dtype());
TransposeFromMLUTensor<T>(ctx, TransposeFromMLUTensor<T>(ctx,
perm_to_nhwc, perm_to_nhwc,
filter, filter,
......
...@@ -39,7 +39,6 @@ ...@@ -39,7 +39,6 @@
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
using phi::PADDLE_CUDA_NUM_THREADS; using phi::PADDLE_CUDA_NUM_THREADS;
static inline int GET_BLOCKS(const int N) { static inline int GET_BLOCKS(const int N) {
......
...@@ -33,8 +33,6 @@ ...@@ -33,8 +33,6 @@
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
template <typename T> template <typename T>
T bilinear_interp( T bilinear_interp(
const T* data, const T x, const T y, const int width, const int height) { const T* data, const T x, const T y, const int width, const int height) {
...@@ -518,7 +516,7 @@ class DeformablePSROIPoolGradCPUKernel : public framework::OpKernel<T> { ...@@ -518,7 +516,7 @@ class DeformablePSROIPoolGradCPUKernel : public framework::OpKernel<T> {
const int num_classes = no_trans ? 1 : channels_trans / 2; const int num_classes = no_trans ? 1 : channels_trans / 2;
const int channels_each_class = const int channels_each_class =
no_trans ? output_dim : output_dim / num_classes; no_trans ? output_dim : output_dim / num_classes;
Tensor roi_batch_id_list; phi::DenseTensor roi_batch_id_list;
roi_batch_id_list.Resize({num_rois}); roi_batch_id_list.Resize({num_rois});
int* roi_batch_id_data = int* roi_batch_id_data =
roi_batch_id_list.mutable_data<int>(ctx.GetPlace()); roi_batch_id_list.mutable_data<int>(ctx.GetPlace());
......
...@@ -30,8 +30,6 @@ namespace cub = hipcub; ...@@ -30,8 +30,6 @@ namespace cub = hipcub;
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0)) #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
int const kThreadsPerBlock = sizeof(uint64_t) * 8; int const kThreadsPerBlock = sizeof(uint64_t) * 8;
...@@ -47,11 +45,11 @@ struct RangeInitFunctor { ...@@ -47,11 +45,11 @@ struct RangeInitFunctor {
template <typename T> template <typename T>
static void SortDescending(const phi::GPUContext &ctx, static void SortDescending(const phi::GPUContext &ctx,
const Tensor &value, const phi::DenseTensor &value,
Tensor *value_out, phi::DenseTensor *value_out,
Tensor *index_out) { phi::DenseTensor *index_out) {
int num = static_cast<int>(value.numel()); int num = static_cast<int>(value.numel());
Tensor index_in_t; phi::DenseTensor index_in_t;
int *idx_in = index_in_t.mutable_data<int>({num}, ctx.GetPlace()); int *idx_in = index_in_t.mutable_data<int>({num}, ctx.GetPlace());
platform::ForRange<phi::GPUContext> for_range(ctx, num); platform::ForRange<phi::GPUContext> for_range(ctx, num);
for_range(RangeInitFunctor{0, 1, idx_in}); for_range(RangeInitFunctor{0, 1, idx_in});
...@@ -287,10 +285,10 @@ static __global__ void NMSKernel(const int n_boxes, ...@@ -287,10 +285,10 @@ static __global__ void NMSKernel(const int n_boxes,
template <typename T> template <typename T>
static void NMS(const phi::GPUContext &ctx, static void NMS(const phi::GPUContext &ctx,
const Tensor &proposals, const phi::DenseTensor &proposals,
const Tensor &sorted_indices, const phi::DenseTensor &sorted_indices,
const T nms_threshold, const T nms_threshold,
Tensor *keep_out, phi::DenseTensor *keep_out,
bool pixel_offset = true) { bool pixel_offset = true) {
int boxes_num = proposals.dims()[0]; int boxes_num = proposals.dims()[0];
const int col_blocks = DIVUP(boxes_num, kThreadsPerBlock); const int col_blocks = DIVUP(boxes_num, kThreadsPerBlock);
......
...@@ -18,8 +18,6 @@ limitations under the License. */ ...@@ -18,8 +18,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
class BipartiteMatchOp : public framework::OperatorWithKernel { class BipartiteMatchOp : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
...@@ -234,7 +232,7 @@ class BipartiteMatchKernel : public framework::OpKernel<T> { ...@@ -234,7 +232,7 @@ class BipartiteMatchKernel : public framework::OpKernel<T> {
auto lod = dist_mat->lod().back(); auto lod = dist_mat->lod().back();
for (size_t i = 0; i < lod.size() - 1; ++i) { for (size_t i = 0; i < lod.size() - 1; ++i) {
if (lod[i + 1] > lod[i]) { if (lod[i + 1] > lod[i]) {
Tensor one_ins = dist_mat->Slice(lod[i], lod[i + 1]); phi::DenseTensor one_ins = dist_mat->Slice(lod[i], lod[i + 1]);
BipartiteMatch(one_ins, indices + i * col, dist + i * col); BipartiteMatch(one_ins, indices + i * col, dist + i * col);
if (type == "per_prediction") { if (type == "per_prediction") {
ArgMaxMatch(one_ins, indices + i * col, dist + i * col, threshold); ArgMaxMatch(one_ins, indices + i * col, dist + i * col, threshold);
......
...@@ -22,7 +22,6 @@ limitations under the License. */ ...@@ -22,7 +22,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
using LoDTenso = phi::DenseTensor; using LoDTenso = phi::DenseTensor;
static constexpr int ImInfoSize = 3; static constexpr int ImInfoSize = 3;
......
...@@ -19,8 +19,6 @@ limitations under the License. */ ...@@ -19,8 +19,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
class BoxClipKernel : public framework::OpKernel<T> { class BoxClipKernel : public framework::OpKernel<T> {
public: public:
...@@ -42,9 +40,10 @@ class BoxClipKernel : public framework::OpKernel<T> { ...@@ -42,9 +40,10 @@ class BoxClipKernel : public framework::OpKernel<T> {
auto box_lod = input_box->lod().back(); auto box_lod = input_box->lod().back();
int64_t n = static_cast<int64_t>(box_lod.size() - 1); int64_t n = static_cast<int64_t>(box_lod.size() - 1);
for (int i = 0; i < n; ++i) { for (int i = 0; i < n; ++i) {
Tensor im_info_slice = im_info->Slice(i, i + 1); phi::DenseTensor im_info_slice = im_info->Slice(i, i + 1);
Tensor box_slice = input_box->Slice(box_lod[i], box_lod[i + 1]); phi::DenseTensor box_slice = input_box->Slice(box_lod[i], box_lod[i + 1]);
Tensor output_slice = output_box->Slice(box_lod[i], box_lod[i + 1]); phi::DenseTensor output_slice =
output_box->Slice(box_lod[i], box_lod[i + 1]);
ClipTiledBoxes<T>(dev_ctx, im_info_slice, box_slice, &output_slice); ClipTiledBoxes<T>(dev_ctx, im_info_slice, box_slice, &output_slice);
} }
} }
......
...@@ -18,8 +18,6 @@ limitations under the License. */ ...@@ -18,8 +18,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
template <typename T> template <typename T>
struct BoxCoderFunction { struct BoxCoderFunction {
public: public:
...@@ -28,31 +26,31 @@ struct BoxCoderFunction { ...@@ -28,31 +26,31 @@ struct BoxCoderFunction {
stream = ctx.template device_context<paddle::platform::NPUDeviceContext>() stream = ctx.template device_context<paddle::platform::NPUDeviceContext>()
.stream(); .stream();
} }
Tensor Adds(const phi::DenseTensor& x, float scalar) { phi::DenseTensor Adds(const phi::DenseTensor& x, float scalar) {
Tensor y; phi::DenseTensor y;
y.mutable_data<T>(x.dims(), place); y.mutable_data<T>(x.dims(), place);
const auto& runner = NpuOpRunner("Adds", {x}, {y}, {{"value", scalar}}); const auto& runner = NpuOpRunner("Adds", {x}, {y}, {{"value", scalar}});
runner.Run(stream); runner.Run(stream);
return y; return y;
} }
Tensor Muls(const phi::DenseTensor& x, float scalar) { phi::DenseTensor Muls(const phi::DenseTensor& x, float scalar) {
Tensor y; phi::DenseTensor y;
y.mutable_data<T>(x.dims(), place); y.mutable_data<T>(x.dims(), place);
const auto& runner = NpuOpRunner("Muls", {x}, {y}, {{"value", scalar}}); const auto& runner = NpuOpRunner("Muls", {x}, {y}, {{"value", scalar}});
runner.Run(stream); runner.Run(stream);
return y; return y;
} }
Tensor Mul(const phi::DenseTensor& x, const phi::DenseTensor& y) { phi::DenseTensor Mul(const phi::DenseTensor& x, const phi::DenseTensor& y) {
Tensor z; phi::DenseTensor z;
z.mutable_data<T>(x.dims(), place); z.mutable_data<T>(x.dims(), place);
const auto& runner = NpuOpRunner("Mul", {x, y}, {z}, {}); const auto& runner = NpuOpRunner("Mul", {x, y}, {z}, {});
runner.Run(stream); runner.Run(stream);
return z; return z;
} }
Tensor SubWithBroadCast(const phi::DenseTensor& x, phi::DenseTensor SubWithBroadCast(const phi::DenseTensor& x,
const phi::DenseTensor& y, const phi::DenseTensor& y,
const framework::DDim& shape) { const framework::DDim& shape) {
Tensor z; phi::DenseTensor z;
z.mutable_data<T>(shape, place); z.mutable_data<T>(shape, place);
const auto& runner = NpuOpRunner("Sub", {x, y}, {z}, {}); const auto& runner = NpuOpRunner("Sub", {x, y}, {z}, {});
runner.Run(stream); runner.Run(stream);
...@@ -66,10 +64,10 @@ struct BoxCoderFunction { ...@@ -66,10 +64,10 @@ struct BoxCoderFunction {
const auto& runner = NpuOpRunner("Div", {x, y}, {*z}, {}); const auto& runner = NpuOpRunner("Div", {x, y}, {*z}, {});
runner.Run(stream); runner.Run(stream);
} }
Tensor DivWithBroadCast(const phi::DenseTensor& x, phi::DenseTensor DivWithBroadCast(const phi::DenseTensor& x,
const phi::DenseTensor& y, const phi::DenseTensor& y,
const framework::DDim& shape) { const framework::DDim& shape) {
Tensor z; phi::DenseTensor z;
DivWithBroadCastVoid(x, y, shape, &z); DivWithBroadCastVoid(x, y, shape, &z);
return z; return z;
} }
...@@ -81,10 +79,10 @@ struct BoxCoderFunction { ...@@ -81,10 +79,10 @@ struct BoxCoderFunction {
const auto& runner = NpuOpRunner("Mul", {x, y}, {*z}, {}); const auto& runner = NpuOpRunner("Mul", {x, y}, {*z}, {});
runner.Run(stream); runner.Run(stream);
} }
Tensor MulWithBroadCast(const phi::DenseTensor& x, phi::DenseTensor MulWithBroadCast(const phi::DenseTensor& x,
const phi::DenseTensor& y, const phi::DenseTensor& y,
const framework::DDim& shape) { const framework::DDim& shape) {
Tensor z; phi::DenseTensor z;
MulWithBroadCastVoid(x, y, shape, &z); MulWithBroadCastVoid(x, y, shape, &z);
return z; return z;
} }
...@@ -96,36 +94,36 @@ struct BoxCoderFunction { ...@@ -96,36 +94,36 @@ struct BoxCoderFunction {
const auto& runner = NpuOpRunner("AddV2", {x, y}, {*z}, {}); const auto& runner = NpuOpRunner("AddV2", {x, y}, {*z}, {});
runner.Run(stream); runner.Run(stream);
} }
Tensor AddWithBroadCast(const phi::DenseTensor& x, phi::DenseTensor AddWithBroadCast(const phi::DenseTensor& x,
const phi::DenseTensor& y, const phi::DenseTensor& y,
const framework::DDim& shape) { const framework::DDim& shape) {
Tensor z; phi::DenseTensor z;
AddWithBroadCastVoid(x, y, shape, &z); AddWithBroadCastVoid(x, y, shape, &z);
return z; return z;
} }
Tensor Abs(const phi::DenseTensor& x) { phi::DenseTensor Abs(const phi::DenseTensor& x) {
Tensor y; phi::DenseTensor y;
y.mutable_data<T>(x.dims(), place); y.mutable_data<T>(x.dims(), place);
const auto& runner = NpuOpRunner("Abs", {x}, {y}, {}); const auto& runner = NpuOpRunner("Abs", {x}, {y}, {});
runner.Run(stream); runner.Run(stream);
return y; return y;
} }
Tensor Log(const phi::DenseTensor& x) { phi::DenseTensor Log(const phi::DenseTensor& x) {
Tensor t_x_m1 = Adds(x, -1); phi::DenseTensor t_x_m1 = Adds(x, -1);
Tensor y; phi::DenseTensor y;
y.mutable_data<T>(x.dims(), place); y.mutable_data<T>(x.dims(), place);
const auto& runner = NpuOpRunner("Log1p", {t_x_m1}, {y}, {}); const auto& runner = NpuOpRunner("Log1p", {t_x_m1}, {y}, {});
runner.Run(stream); runner.Run(stream);
return y; return y;
} }
Tensor Exp(const phi::DenseTensor& x) { phi::DenseTensor Exp(const phi::DenseTensor& x) {
Tensor y; phi::DenseTensor y;
y.mutable_data<T>(x.dims(), place); y.mutable_data<T>(x.dims(), place);
const auto& runner = NpuOpRunner("Exp", {x}, {y}, {}); const auto& runner = NpuOpRunner("Exp", {x}, {y}, {});
runner.Run(stream); runner.Run(stream);
return y; return y;
} }
Tensor Dot(const phi::DenseTensor& x, const phi::DenseTensor& y) { phi::DenseTensor Dot(const phi::DenseTensor& x, const phi::DenseTensor& y) {
auto dim_x = x.dims(); auto dim_x = x.dims();
auto dim_y = y.dims(); auto dim_y = y.dims();
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
...@@ -145,7 +143,7 @@ struct BoxCoderFunction { ...@@ -145,7 +143,7 @@ struct BoxCoderFunction {
"got dim_x[1] = %d, dim_y[0] = %d.", "got dim_x[1] = %d, dim_y[0] = %d.",
dim_x[1], dim_x[1],
dim_y[0])); dim_y[0]));
Tensor z; phi::DenseTensor z;
z.mutable_data<T>({dim_x[0], dim_y[1]}, place); z.mutable_data<T>({dim_x[0], dim_y[1]}, place);
const auto& runner = const auto& runner =
NpuOpRunner("MatMul", NpuOpRunner("MatMul",
...@@ -155,7 +153,7 @@ struct BoxCoderFunction { ...@@ -155,7 +153,7 @@ struct BoxCoderFunction {
runner.Run(stream); runner.Run(stream);
return z; return z;
} }
void ConcatVoid(const std::vector<Tensor>& inputs, void ConcatVoid(const std::vector<phi::DenseTensor>& inputs,
const framework::DDim& shape_out, const framework::DDim& shape_out,
int axis, int axis,
phi::DenseTensor* output) { phi::DenseTensor* output) {
...@@ -172,18 +170,18 @@ struct BoxCoderFunction { ...@@ -172,18 +170,18 @@ struct BoxCoderFunction {
runner.AddInputNames(names); runner.AddInputNames(names);
runner.Run(stream); runner.Run(stream);
} }
Tensor Concat(const std::vector<Tensor>& inputs, phi::DenseTensor Concat(const std::vector<phi::DenseTensor>& inputs,
const framework::DDim& shape_out, const framework::DDim& shape_out,
int axis) { int axis) {
Tensor output; phi::DenseTensor output;
ConcatVoid(inputs, shape_out, axis, &output); ConcatVoid(inputs, shape_out, axis, &output);
return output; return output;
} }
Tensor Slice(const phi::DenseTensor& x, phi::DenseTensor Slice(const phi::DenseTensor& x,
const std::vector<int>& offsets, const std::vector<int>& offsets,
const std::vector<int>& size, const std::vector<int>& size,
const framework::DDim& shape) { const framework::DDim& shape) {
Tensor y; phi::DenseTensor y;
y.mutable_data<T>(shape, place); y.mutable_data<T>(shape, place);
const auto& runner = const auto& runner =
NpuOpRunner("SliceD", {x}, {y}, {{"offsets", offsets}, {"size", size}}); NpuOpRunner("SliceD", {x}, {y}, {{"offsets", offsets}, {"size", size}});
...@@ -218,8 +216,8 @@ void BoxCoderEnc(const framework::ExecutionContext& ctx, ...@@ -218,8 +216,8 @@ void BoxCoderEnc(const framework::ExecutionContext& ctx,
auto M = pb->dims()[0]; auto M = pb->dims()[0];
auto N = tb->dims()[0]; auto N = tb->dims()[0];
auto shape_0 = phi::make_ddim({4, 2}); auto shape_0 = phi::make_ddim({4, 2});
Tensor m_diff; phi::DenseTensor m_diff;
Tensor m_aver; phi::DenseTensor m_aver;
std::vector<T> vec_diff = {static_cast<T>(-1), std::vector<T> vec_diff = {static_cast<T>(-1),
static_cast<T>(0), static_cast<T>(0),
static_cast<T>(0), static_cast<T>(0),
...@@ -240,10 +238,10 @@ void BoxCoderEnc(const framework::ExecutionContext& ctx, ...@@ -240,10 +238,10 @@ void BoxCoderEnc(const framework::ExecutionContext& ctx,
Vector2Tensor<T>(ctx, vec_aver, shape_0, &m_aver); Vector2Tensor<T>(ctx, vec_aver, shape_0, &m_aver);
BoxCoderFunction<T> F(ctx); BoxCoderFunction<T> F(ctx);
Tensor pb_xy = F.Adds(F.Dot(*pb, m_aver), (norm ? 0 : 0.5)); phi::DenseTensor pb_xy = F.Adds(F.Dot(*pb, m_aver), (norm ? 0 : 0.5));
Tensor pb_wh = F.Adds(F.Dot(*pb, m_diff), (norm ? 0 : 1)); phi::DenseTensor pb_wh = F.Adds(F.Dot(*pb, m_diff), (norm ? 0 : 1));
Tensor tb_xy = F.Dot(*tb, m_aver); phi::DenseTensor tb_xy = F.Dot(*tb, m_aver);
Tensor tb_wh = F.Adds(F.Dot(*tb, m_diff), (norm ? 0 : 1)); phi::DenseTensor tb_wh = F.Adds(F.Dot(*tb, m_diff), (norm ? 0 : 1));
pb_xy.Resize({1, M, 2}); pb_xy.Resize({1, M, 2});
pb_wh.Resize({1, M, 2}); pb_wh.Resize({1, M, 2});
...@@ -253,15 +251,16 @@ void BoxCoderEnc(const framework::ExecutionContext& ctx, ...@@ -253,15 +251,16 @@ void BoxCoderEnc(const framework::ExecutionContext& ctx,
auto shape_half = phi::make_ddim({N, M, 2}); auto shape_half = phi::make_ddim({N, M, 2});
auto shape_full = phi::make_ddim({N, M, 4}); auto shape_full = phi::make_ddim({N, M, 4});
Tensor out_xy_0 = F.DivWithBroadCast( phi::DenseTensor out_xy_0 = F.DivWithBroadCast(
F.SubWithBroadCast(tb_xy, pb_xy, shape_half), pb_wh, shape_half); F.SubWithBroadCast(tb_xy, pb_xy, shape_half), pb_wh, shape_half);
Tensor out_wh_0 = F.Log(F.Abs(F.DivWithBroadCast(tb_wh, pb_wh, shape_half))); phi::DenseTensor out_wh_0 =
Tensor out_0 = F.Concat({out_xy_0, out_wh_0}, shape_full, 2); F.Log(F.Abs(F.DivWithBroadCast(tb_wh, pb_wh, shape_half)));
phi::DenseTensor out_0 = F.Concat({out_xy_0, out_wh_0}, shape_full, 2);
if (pbv) { if (pbv) {
F.DivWithBroadCastVoid(out_0, *pbv, shape_full, out); F.DivWithBroadCastVoid(out_0, *pbv, shape_full, out);
} else { } else {
Tensor t_var; phi::DenseTensor t_var;
std::vector<T> vec_var(4); std::vector<T> vec_var(4);
for (auto i = 0; i < 4; i++) { for (auto i = 0; i < 4; i++) {
vec_var[i] = static_cast<T>(variance[i]); vec_var[i] = static_cast<T>(variance[i]);
...@@ -281,8 +280,8 @@ void BoxCoderDec(const framework::ExecutionContext& ctx, ...@@ -281,8 +280,8 @@ void BoxCoderDec(const framework::ExecutionContext& ctx,
int axis, int axis,
phi::DenseTensor* out) { phi::DenseTensor* out) {
auto shape_0 = phi::make_ddim({4, 2}); auto shape_0 = phi::make_ddim({4, 2});
Tensor m_diff; phi::DenseTensor m_diff;
Tensor m_aver; phi::DenseTensor m_aver;
std::vector<T> vec_diff = {static_cast<T>(-1), std::vector<T> vec_diff = {static_cast<T>(-1),
static_cast<T>(0), static_cast<T>(0),
static_cast<T>(0), static_cast<T>(0),
...@@ -303,8 +302,8 @@ void BoxCoderDec(const framework::ExecutionContext& ctx, ...@@ -303,8 +302,8 @@ void BoxCoderDec(const framework::ExecutionContext& ctx,
Vector2Tensor<T>(ctx, vec_aver, shape_0, &m_aver); Vector2Tensor<T>(ctx, vec_aver, shape_0, &m_aver);
BoxCoderFunction<T> F(ctx); BoxCoderFunction<T> F(ctx);
Tensor pb_xy = F.Adds(F.Dot(*pb, m_aver), (norm ? 0 : 0.5)); phi::DenseTensor pb_xy = F.Adds(F.Dot(*pb, m_aver), (norm ? 0 : 0.5));
Tensor pb_wh = F.Adds(F.Dot(*pb, m_diff), (norm ? 0 : 1)); phi::DenseTensor pb_wh = F.Adds(F.Dot(*pb, m_diff), (norm ? 0 : 1));
auto pb_resize_shape = axis == 0 ? phi::make_ddim({1, pb->dims()[0], 2}) auto pb_resize_shape = axis == 0 ? phi::make_ddim({1, pb->dims()[0], 2})
: phi::make_ddim({pb->dims()[0], 1, 2}); : phi::make_ddim({pb->dims()[0], 1, 2});
pb_xy.Resize(pb_resize_shape); pb_xy.Resize(pb_resize_shape);
...@@ -313,18 +312,22 @@ void BoxCoderDec(const framework::ExecutionContext& ctx, ...@@ -313,18 +312,22 @@ void BoxCoderDec(const framework::ExecutionContext& ctx,
auto tbox_slice_shape = phi::make_ddim({tb->dims()[0], tb->dims()[1], 2}); auto tbox_slice_shape = phi::make_ddim({tb->dims()[0], tb->dims()[1], 2});
std::vector<int> tbox_slice_size = { std::vector<int> tbox_slice_size = {
static_cast<int>(tb->dims()[0]), static_cast<int>(tb->dims()[1]), 2}; static_cast<int>(tb->dims()[0]), static_cast<int>(tb->dims()[1]), 2};
Tensor tbox01 = F.Slice(*tb, {0, 0, 0}, tbox_slice_size, tbox_slice_shape); phi::DenseTensor tbox01 =
Tensor tbox23 = F.Slice(*tb, {0, 0, 2}, tbox_slice_size, tbox_slice_shape); F.Slice(*tb, {0, 0, 0}, tbox_slice_size, tbox_slice_shape);
phi::DenseTensor tbox23 =
F.Slice(*tb, {0, 0, 2}, tbox_slice_size, tbox_slice_shape);
Tensor tb_xy; phi::DenseTensor tb_xy;
Tensor tb_wh; phi::DenseTensor tb_wh;
if (pbv) { if (pbv) {
auto pbvt_slice_shape = phi::make_ddim({pbv->dims()[0], 2}); auto pbvt_slice_shape = phi::make_ddim({pbv->dims()[0], 2});
auto pbvt_resize_shape = axis == 0 ? phi::make_ddim({1, pbv->dims()[0], 2}) auto pbvt_resize_shape = axis == 0 ? phi::make_ddim({1, pbv->dims()[0], 2})
: phi::make_ddim({pbv->dims()[0], 1, 2}); : phi::make_ddim({pbv->dims()[0], 1, 2});
std::vector<int> pbvt_slice_size = {static_cast<int>(pbv->dims()[0]), 2}; std::vector<int> pbvt_slice_size = {static_cast<int>(pbv->dims()[0]), 2};
Tensor pbv_t01 = F.Slice(*pbv, {0, 0}, pbvt_slice_size, pbvt_slice_shape); phi::DenseTensor pbv_t01 =
Tensor pbv_t23 = F.Slice(*pbv, {0, 2}, pbvt_slice_size, pbvt_slice_shape); F.Slice(*pbv, {0, 0}, pbvt_slice_size, pbvt_slice_shape);
phi::DenseTensor pbv_t23 =
F.Slice(*pbv, {0, 2}, pbvt_slice_size, pbvt_slice_shape);
pbv_t01.Resize(pbvt_resize_shape); pbv_t01.Resize(pbvt_resize_shape);
pbv_t23.Resize(pbvt_resize_shape); pbv_t23.Resize(pbvt_resize_shape);
...@@ -345,7 +348,7 @@ void BoxCoderDec(const framework::ExecutionContext& ctx, ...@@ -345,7 +348,7 @@ void BoxCoderDec(const framework::ExecutionContext& ctx,
&tb_xy); &tb_xy);
F.MulWithBroadCastVoid(F.Exp(tbox23), pb_wh, tbox_slice_shape, &tb_wh); F.MulWithBroadCastVoid(F.Exp(tbox23), pb_wh, tbox_slice_shape, &tb_wh);
} else { } else {
Tensor t_var01, t_var23; phi::DenseTensor t_var01, t_var23;
auto t_var_shape = phi::make_ddim({1, 1, 2}); auto t_var_shape = phi::make_ddim({1, 1, 2});
std::vector<T> vec_var01 = {static_cast<T>(variance[0]), std::vector<T> vec_var01 = {static_cast<T>(variance[0]),
static_cast<T>(variance[1])}; static_cast<T>(variance[1])};
...@@ -366,9 +369,9 @@ void BoxCoderDec(const framework::ExecutionContext& ctx, ...@@ -366,9 +369,9 @@ void BoxCoderDec(const framework::ExecutionContext& ctx,
tbox_slice_shape, tbox_slice_shape,
&tb_wh); &tb_wh);
} }
Tensor obox01 = phi::DenseTensor obox01 =
F.AddWithBroadCast(tb_xy, F.Muls(tb_wh, -0.5), tbox_slice_shape); F.AddWithBroadCast(tb_xy, F.Muls(tb_wh, -0.5), tbox_slice_shape);
Tensor obox23 = phi::DenseTensor obox23 =
F.Adds(F.AddWithBroadCast(tb_xy, F.Muls(tb_wh, 0.5), tbox_slice_shape), F.Adds(F.AddWithBroadCast(tb_xy, F.Muls(tb_wh, 0.5), tbox_slice_shape),
(norm ? 0 : -1)); (norm ? 0 : -1));
F.ConcatVoid({obox01, obox23}, out->dims(), 2, out); F.ConcatVoid({obox01, obox23}, out->dims(), 2, out);
......
...@@ -16,7 +16,6 @@ limitations under the License.*/ ...@@ -16,7 +16,6 @@ limitations under the License.*/
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
class CollectFpnProposalsOp : public framework::OperatorWithKernel { class CollectFpnProposalsOp : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
......
...@@ -33,8 +33,6 @@ namespace cub = hipcub; ...@@ -33,8 +33,6 @@ namespace cub = hipcub;
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
static constexpr int kNumCUDAThreads = 64; static constexpr int kNumCUDAThreads = 64;
static constexpr int kNumMaxinumNumBlocks = 4096; static constexpr int kNumMaxinumNumBlocks = 4096;
...@@ -74,13 +72,13 @@ class GPUCollectFpnProposalsOpKernel : public framework::OpKernel<T> { ...@@ -74,13 +72,13 @@ class GPUCollectFpnProposalsOpKernel : public framework::OpKernel<T> {
int real_post_num = min(post_nms_topN, total_roi_num); int real_post_num = min(post_nms_topN, total_roi_num);
fpn_rois->mutable_data<T>({real_post_num, kBBoxSize}, dev_ctx.GetPlace()); fpn_rois->mutable_data<T>({real_post_num, kBBoxSize}, dev_ctx.GetPlace());
Tensor concat_rois; phi::DenseTensor concat_rois;
Tensor concat_scores; phi::DenseTensor concat_scores;
T* concat_rois_data = concat_rois.mutable_data<T>( T* concat_rois_data = concat_rois.mutable_data<T>(
{total_roi_num, kBBoxSize}, dev_ctx.GetPlace()); {total_roi_num, kBBoxSize}, dev_ctx.GetPlace());
T* concat_scores_data = T* concat_scores_data =
concat_scores.mutable_data<T>({total_roi_num, 1}, dev_ctx.GetPlace()); concat_scores.mutable_data<T>({total_roi_num, 1}, dev_ctx.GetPlace());
Tensor roi_batch_id_list; phi::DenseTensor roi_batch_id_list;
roi_batch_id_list.Resize({total_roi_num}); roi_batch_id_list.Resize({total_roi_num});
int* roi_batch_id_data = int* roi_batch_id_data =
roi_batch_id_list.mutable_data<int>(platform::CPUPlace()); roi_batch_id_list.mutable_data<int>(platform::CPUPlace());
...@@ -130,20 +128,20 @@ class GPUCollectFpnProposalsOpKernel : public framework::OpKernel<T> { ...@@ -130,20 +128,20 @@ class GPUCollectFpnProposalsOpKernel : public framework::OpKernel<T> {
} }
// copy batch id list to GPU // copy batch id list to GPU
Tensor roi_batch_id_list_gpu; phi::DenseTensor roi_batch_id_list_gpu;
framework::TensorCopy( framework::TensorCopy(
roi_batch_id_list, dev_ctx.GetPlace(), &roi_batch_id_list_gpu); roi_batch_id_list, dev_ctx.GetPlace(), &roi_batch_id_list_gpu);
Tensor index_in_t; phi::DenseTensor index_in_t;
int* idx_in = int* idx_in =
index_in_t.mutable_data<int>({total_roi_num}, dev_ctx.GetPlace()); index_in_t.mutable_data<int>({total_roi_num}, dev_ctx.GetPlace());
platform::ForRange<phi::GPUContext> for_range_total(dev_ctx, total_roi_num); platform::ForRange<phi::GPUContext> for_range_total(dev_ctx, total_roi_num);
for_range_total(RangeInitFunctor{0, 1, idx_in}); for_range_total(RangeInitFunctor{0, 1, idx_in});
Tensor keys_out_t; phi::DenseTensor keys_out_t;
T* keys_out = T* keys_out =
keys_out_t.mutable_data<T>({total_roi_num}, dev_ctx.GetPlace()); keys_out_t.mutable_data<T>({total_roi_num}, dev_ctx.GetPlace());
Tensor index_out_t; phi::DenseTensor index_out_t;
int* idx_out = int* idx_out =
index_out_t.mutable_data<int>({total_roi_num}, dev_ctx.GetPlace()); index_out_t.mutable_data<int>({total_roi_num}, dev_ctx.GetPlace());
...@@ -175,21 +173,21 @@ class GPUCollectFpnProposalsOpKernel : public framework::OpKernel<T> { ...@@ -175,21 +173,21 @@ class GPUCollectFpnProposalsOpKernel : public framework::OpKernel<T> {
sizeof(T) * 8, sizeof(T) * 8,
dev_ctx.stream()); dev_ctx.stream());
index_out_t.Resize({real_post_num}); index_out_t.Resize({real_post_num});
Tensor sorted_rois; phi::DenseTensor sorted_rois;
sorted_rois.mutable_data<T>({real_post_num, kBBoxSize}, dev_ctx.GetPlace()); sorted_rois.mutable_data<T>({real_post_num, kBBoxSize}, dev_ctx.GetPlace());
Tensor sorted_batch_id; phi::DenseTensor sorted_batch_id;
sorted_batch_id.mutable_data<int>({real_post_num}, dev_ctx.GetPlace()); sorted_batch_id.mutable_data<int>({real_post_num}, dev_ctx.GetPlace());
phi::funcs::GPUGather<T>(dev_ctx, concat_rois, index_out_t, &sorted_rois); phi::funcs::GPUGather<T>(dev_ctx, concat_rois, index_out_t, &sorted_rois);
phi::funcs::GPUGather<int>( phi::funcs::GPUGather<int>(
dev_ctx, roi_batch_id_list_gpu, index_out_t, &sorted_batch_id); dev_ctx, roi_batch_id_list_gpu, index_out_t, &sorted_batch_id);
Tensor batch_index_t; phi::DenseTensor batch_index_t;
int* batch_idx_in = int* batch_idx_in =
batch_index_t.mutable_data<int>({real_post_num}, dev_ctx.GetPlace()); batch_index_t.mutable_data<int>({real_post_num}, dev_ctx.GetPlace());
platform::ForRange<phi::GPUContext> for_range_post(dev_ctx, real_post_num); platform::ForRange<phi::GPUContext> for_range_post(dev_ctx, real_post_num);
for_range_post(RangeInitFunctor{0, 1, batch_idx_in}); for_range_post(RangeInitFunctor{0, 1, batch_idx_in});
Tensor out_id_t; phi::DenseTensor out_id_t;
int* out_id_data = int* out_id_data =
out_id_t.mutable_data<int>({real_post_num}, dev_ctx.GetPlace()); out_id_t.mutable_data<int>({real_post_num}, dev_ctx.GetPlace());
// Determine temporary device storage requirements // Determine temporary device storage requirements
...@@ -222,7 +220,7 @@ class GPUCollectFpnProposalsOpKernel : public framework::OpKernel<T> { ...@@ -222,7 +220,7 @@ class GPUCollectFpnProposalsOpKernel : public framework::OpKernel<T> {
phi::funcs::GPUGather<T>(dev_ctx, sorted_rois, index_out_t, fpn_rois); phi::funcs::GPUGather<T>(dev_ctx, sorted_rois, index_out_t, fpn_rois);
Tensor length_lod; phi::DenseTensor length_lod;
int* length_lod_data = int* length_lod_data =
length_lod.mutable_data<int>({lod_size}, dev_ctx.GetPlace()); length_lod.mutable_data<int>({lod_size}, dev_ctx.GetPlace());
phi::funcs::SetConstant<phi::GPUContext, int> set_zero; phi::funcs::SetConstant<phi::GPUContext, int> set_zero;
......
...@@ -15,7 +15,6 @@ limitations under the License. */ ...@@ -15,7 +15,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
using fp16 = paddle::platform::float16; using fp16 = paddle::platform::float16;
template <typename T> template <typename T>
...@@ -89,7 +88,7 @@ struct DensityPriorBoxFunction { ...@@ -89,7 +88,7 @@ struct DensityPriorBoxFunction {
const auto& runner = NpuOpRunner("Minimum", {*x, *y}, {*z}, {}); const auto& runner = NpuOpRunner("Minimum", {*x, *y}, {*z}, {});
runner.Run(stream); runner.Run(stream);
} }
void Concat(const std::vector<Tensor>& inputs, void Concat(const std::vector<phi::DenseTensor>& inputs,
int axis, int axis,
phi::DenseTensor* output) { phi::DenseTensor* output) {
// output should be init first // output should be init first
...@@ -131,14 +130,14 @@ struct DensityPriorBoxFunction { ...@@ -131,14 +130,14 @@ struct DensityPriorBoxFunction {
platform::Place place; platform::Place place;
aclrtStream stream; aclrtStream stream;
const framework::ExecutionContext& ctx; const framework::ExecutionContext& ctx;
Tensor t0; phi::DenseTensor t0;
Tensor t1; phi::DenseTensor t1;
Tensor tn; phi::DenseTensor tn;
}; };
template <> template <>
void DensityPriorBoxFunction<fp16>::Arange(int n, phi::DenseTensor* x) { void DensityPriorBoxFunction<fp16>::Arange(int n, phi::DenseTensor* x) {
Tensor x_fp32(experimental::DataType::FLOAT32); phi::DenseTensor x_fp32(experimental::DataType::FLOAT32);
x_fp32.mutable_data<float>(x->dims(), place); x_fp32.mutable_data<float>(x->dims(), place);
FillNpuTensorWithConstant<float>(&tn, static_cast<float>(n)); FillNpuTensorWithConstant<float>(&tn, static_cast<float>(n));
const auto& runner = NpuOpRunner("Range", {t0, tn, t1}, {x_fp32}, {}); const auto& runner = NpuOpRunner("Range", {t0, tn, t1}, {x_fp32}, {});
...@@ -149,7 +148,7 @@ void DensityPriorBoxFunction<fp16>::Arange(int n, phi::DenseTensor* x) { ...@@ -149,7 +148,7 @@ void DensityPriorBoxFunction<fp16>::Arange(int n, phi::DenseTensor* x) {
template <> template <>
void DensityPriorBoxFunction<fp16>::FloatVec2Tsr(const std::vector<float>& vec, void DensityPriorBoxFunction<fp16>::FloatVec2Tsr(const std::vector<float>& vec,
phi::DenseTensor* tsr_dst) { phi::DenseTensor* tsr_dst) {
Tensor tsr_fp32(experimental::DataType::FLOAT32); phi::DenseTensor tsr_fp32(experimental::DataType::FLOAT32);
tsr_fp32.mutable_data<float>(tsr_dst->dims(), place); tsr_fp32.mutable_data<float>(tsr_dst->dims(), place);
framework::TensorFromVector<float>(vec, ctx.device_context(), &tsr_fp32); framework::TensorFromVector<float>(vec, ctx.device_context(), &tsr_fp32);
ctx.template device_context<paddle::platform::NPUDeviceContext>().Wait(); ctx.template device_context<paddle::platform::NPUDeviceContext>().Wait();
...@@ -185,9 +184,9 @@ class DensityPriorBoxOpNPUKernel : public framework::OpKernel<T> { ...@@ -185,9 +184,9 @@ class DensityPriorBoxOpNPUKernel : public framework::OpKernel<T> {
auto place = ctx.GetPlace(); auto place = ctx.GetPlace();
DensityPriorBoxFunction<T> F(ctx); DensityPriorBoxFunction<T> F(ctx);
Tensor h(_type); phi::DenseTensor h(_type);
h.mutable_data<T>({layer_h}, place); h.mutable_data<T>({layer_h}, place);
Tensor w(_type); phi::DenseTensor w(_type);
w.mutable_data<T>({layer_w}, place); w.mutable_data<T>({layer_w}, place);
F.Arange(layer_h, &h); F.Arange(layer_h, &h);
F.Arange(layer_w, &w); F.Arange(layer_w, &w);
...@@ -203,11 +202,11 @@ class DensityPriorBoxOpNPUKernel : public framework::OpKernel<T> { ...@@ -203,11 +202,11 @@ class DensityPriorBoxOpNPUKernel : public framework::OpKernel<T> {
for (size_t i = 0; i < densities.size(); ++i) { for (size_t i = 0; i < densities.size(); ++i) {
num_priors_per_ratio += densities[i] * densities[i]; num_priors_per_ratio += densities[i] * densities[i];
} }
Tensor di(_type); phi::DenseTensor di(_type);
Tensor dj(_type); phi::DenseTensor dj(_type);
Tensor shifts(_type); phi::DenseTensor shifts(_type);
Tensor box_w_ratio(_type); phi::DenseTensor box_w_ratio(_type);
Tensor box_h_ratio(_type); phi::DenseTensor box_h_ratio(_type);
di.mutable_data<T>({ratios_size * num_priors_per_ratio}, place); di.mutable_data<T>({ratios_size * num_priors_per_ratio}, place);
dj.mutable_data<T>({ratios_size * num_priors_per_ratio}, place); dj.mutable_data<T>({ratios_size * num_priors_per_ratio}, place);
shifts.mutable_data<T>({ratios_size * num_priors_per_ratio}, place); shifts.mutable_data<T>({ratios_size * num_priors_per_ratio}, place);
...@@ -220,19 +219,21 @@ class DensityPriorBoxOpNPUKernel : public framework::OpKernel<T> { ...@@ -220,19 +219,21 @@ class DensityPriorBoxOpNPUKernel : public framework::OpKernel<T> {
// Range = start:start+ratios_size*density_sqr, density = densities[i] // Range = start:start+ratios_size*density_sqr, density = densities[i]
int density_sqr = densities[i] * densities[i]; int density_sqr = densities[i] * densities[i];
// shifts[Range] = [step_average/density]*ratios_size*density_sqr // shifts[Range] = [step_average/density]*ratios_size*density_sqr
Tensor shifts_part = phi::DenseTensor shifts_part =
shifts.Slice(start, start + ratios_size * density_sqr); shifts.Slice(start, start + ratios_size * density_sqr);
FillNpuTensorWithConstant<T>(&shifts_part, FillNpuTensorWithConstant<T>(&shifts_part,
static_cast<T>(step_average / densities[i])); static_cast<T>(step_average / densities[i]));
// di[Range] = [ i // density for i in range(density_sqr) ] * ratios_size // di[Range] = [ i // density for i in range(density_sqr) ] * ratios_size
// dj[Range] = [ i % density for i in range(density_sqr) ] * ratios_size // dj[Range] = [ i % density for i in range(density_sqr) ] * ratios_size
Tensor di_part = di.Slice(start, start + ratios_size * density_sqr); phi::DenseTensor di_part =
Tensor dj_part = dj.Slice(start, start + ratios_size * density_sqr); di.Slice(start, start + ratios_size * density_sqr);
phi::DenseTensor dj_part =
dj.Slice(start, start + ratios_size * density_sqr);
if (densities[i] > 1) { if (densities[i] > 1) {
di_part.Resize({ratios_size, densities[i], densities[i]}); di_part.Resize({ratios_size, densities[i], densities[i]});
dj_part.Resize({ratios_size, densities[i], densities[i]}); dj_part.Resize({ratios_size, densities[i], densities[i]});
Tensor range_n(_type); phi::DenseTensor range_n(_type);
range_n.mutable_data<T>({densities[i]}, place); range_n.mutable_data<T>({densities[i]}, place);
F.Arange(densities[i], &range_n); F.Arange(densities[i], &range_n);
range_n.Resize({1, densities[i], 1}); range_n.Resize({1, densities[i], 1});
...@@ -254,9 +255,9 @@ class DensityPriorBoxOpNPUKernel : public framework::OpKernel<T> { ...@@ -254,9 +255,9 @@ class DensityPriorBoxOpNPUKernel : public framework::OpKernel<T> {
// Range_mini = start_box_ratio:start_box_ratio+density_sqr // Range_mini = start_box_ratio:start_box_ratio+density_sqr
// box_h_ratio[Range_mini] = [fixed_sizes[i] * sqrt(ar)] * density_sqr // box_h_ratio[Range_mini] = [fixed_sizes[i] * sqrt(ar)] * density_sqr
// box_w_ratio[Range_mini] = [fixed_sizes[i] / sqrt(ar)] * density_sqr // box_w_ratio[Range_mini] = [fixed_sizes[i] / sqrt(ar)] * density_sqr
Tensor box_h_ratio_part = phi::DenseTensor box_h_ratio_part =
box_h_ratio.Slice(start_box_ratio, start_box_ratio + density_sqr); box_h_ratio.Slice(start_box_ratio, start_box_ratio + density_sqr);
Tensor box_w_ratio_part = phi::DenseTensor box_w_ratio_part =
box_w_ratio.Slice(start_box_ratio, start_box_ratio + density_sqr); box_w_ratio.Slice(start_box_ratio, start_box_ratio + density_sqr);
FillNpuTensorWithConstant<T>(&box_w_ratio_part, FillNpuTensorWithConstant<T>(&box_w_ratio_part,
static_cast<T>(fixed_sizes[i] * sqrt(ar))); static_cast<T>(fixed_sizes[i] * sqrt(ar)));
...@@ -274,8 +275,8 @@ class DensityPriorBoxOpNPUKernel : public framework::OpKernel<T> { ...@@ -274,8 +275,8 @@ class DensityPriorBoxOpNPUKernel : public framework::OpKernel<T> {
// c_x = (w+offset)*step_w - 0.5*step_average + 0.5*shifts + dj*shifts // c_x = (w+offset)*step_w - 0.5*step_average + 0.5*shifts + dj*shifts
// c_y = (h+offset)*step_h - 0.5*step_average + 0.5*shifts + di*shifts // c_y = (h+offset)*step_h - 0.5*step_average + 0.5*shifts + di*shifts
Tensor c_x(_type); phi::DenseTensor c_x(_type);
Tensor c_y(_type); phi::DenseTensor c_y(_type);
auto dim0 = auto dim0 =
phi::make_ddim({1, layer_w, ratios_size * num_priors_per_ratio, 1}); phi::make_ddim({1, layer_w, ratios_size * num_priors_per_ratio, 1});
auto dim1 = auto dim1 =
...@@ -301,17 +302,17 @@ class DensityPriorBoxOpNPUKernel : public framework::OpKernel<T> { ...@@ -301,17 +302,17 @@ class DensityPriorBoxOpNPUKernel : public framework::OpKernel<T> {
F.Muls(&box_w_ratio, static_cast<float>(0.5), &box_w_ratio); F.Muls(&box_w_ratio, static_cast<float>(0.5), &box_w_ratio);
F.Muls(&box_h_ratio, static_cast<float>(0.5), &box_h_ratio); F.Muls(&box_h_ratio, static_cast<float>(0.5), &box_h_ratio);
Tensor zero_t(_type); phi::DenseTensor zero_t(_type);
Tensor one_t(_type); phi::DenseTensor one_t(_type);
zero_t.mutable_data<T>({1}, place); zero_t.mutable_data<T>({1}, place);
one_t.mutable_data<T>({1}, place); one_t.mutable_data<T>({1}, place);
FillNpuTensorWithConstant<T>(&zero_t, static_cast<T>(0)); FillNpuTensorWithConstant<T>(&zero_t, static_cast<T>(0));
FillNpuTensorWithConstant<T>(&one_t, static_cast<T>(1)); FillNpuTensorWithConstant<T>(&one_t, static_cast<T>(1));
Tensor outbox0(_type); phi::DenseTensor outbox0(_type);
Tensor outbox1(_type); phi::DenseTensor outbox1(_type);
Tensor outbox2(_type); phi::DenseTensor outbox2(_type);
Tensor outbox3(_type); phi::DenseTensor outbox3(_type);
outbox0.mutable_data<T>(dim0, place); outbox0.mutable_data<T>(dim0, place);
outbox1.mutable_data<T>(dim1, place); outbox1.mutable_data<T>(dim1, place);
outbox2.mutable_data<T>(dim0, place); outbox2.mutable_data<T>(dim0, place);
...@@ -349,17 +350,17 @@ class DensityPriorBoxOpNPUKernel : public framework::OpKernel<T> { ...@@ -349,17 +350,17 @@ class DensityPriorBoxOpNPUKernel : public framework::OpKernel<T> {
{layer_h, layer_w, ratios_size * num_priors_per_ratio, 4}); {layer_h, layer_w, ratios_size * num_priors_per_ratio, 4});
boxes->mutable_data<T>(place); boxes->mutable_data<T>(place);
vars->mutable_data<T>(place); vars->mutable_data<T>(place);
Tensor boxes_share(_type); phi::DenseTensor boxes_share(_type);
Tensor vars_share(_type); phi::DenseTensor vars_share(_type);
boxes_share.ShareDataWith(*boxes); boxes_share.ShareDataWith(*boxes);
boxes_share.Resize(out_dim); boxes_share.Resize(out_dim);
vars_share.ShareDataWith(*vars); vars_share.ShareDataWith(*vars);
vars_share.Resize(out_dim); vars_share.Resize(out_dim);
Tensor box0(_type); phi::DenseTensor box0(_type);
Tensor box1(_type); phi::DenseTensor box1(_type);
Tensor box2(_type); phi::DenseTensor box2(_type);
Tensor box3(_type); phi::DenseTensor box3(_type);
// out_dim = {layer_h, layer_w, ratios_size*num_priors_per_ratio, 1} // out_dim = {layer_h, layer_w, ratios_size*num_priors_per_ratio, 1}
out_dim[3] = 1; out_dim[3] = 1;
box0.mutable_data<T>(out_dim, place); box0.mutable_data<T>(out_dim, place);
...@@ -377,7 +378,7 @@ class DensityPriorBoxOpNPUKernel : public framework::OpKernel<T> { ...@@ -377,7 +378,7 @@ class DensityPriorBoxOpNPUKernel : public framework::OpKernel<T> {
std::vector<int> multiples = { std::vector<int> multiples = {
layer_h, layer_w, ratios_size * num_priors_per_ratio, 1}; layer_h, layer_w, ratios_size * num_priors_per_ratio, 1};
Tensor variances_t(_type); phi::DenseTensor variances_t(_type);
// variances.size() == 4 // variances.size() == 4
variances_t.mutable_data<T>({4}, place); variances_t.mutable_data<T>({4}, place);
F.FloatVec2Tsr(variances, &variances_t); F.FloatVec2Tsr(variances, &variances_t);
......
...@@ -25,7 +25,6 @@ limitations under the License. */ ...@@ -25,7 +25,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
const int kBoxDim = 4; const int kBoxDim = 4;
template <typename T> template <typename T>
...@@ -151,16 +150,17 @@ static inline void ExpandMaskTarget(const phi::CPUContext& ctx, ...@@ -151,16 +150,17 @@ static inline void ExpandMaskTarget(const phi::CPUContext& ctx,
} }
template <typename T> template <typename T>
std::vector<Tensor> SampleMaskForOneImage(const phi::CPUContext& ctx, std::vector<phi::DenseTensor> SampleMaskForOneImage(
const phi::DenseTensor& im_info, const phi::CPUContext& ctx,
const phi::DenseTensor& gt_classes, const phi::DenseTensor& im_info,
const phi::DenseTensor& is_crowd, const phi::DenseTensor& gt_classes,
const phi::DenseTensor& gt_segms, const phi::DenseTensor& is_crowd,
const phi::DenseTensor& rois, const phi::DenseTensor& gt_segms,
const phi::DenseTensor& label_int32, const phi::DenseTensor& rois,
const int num_classes, const phi::DenseTensor& label_int32,
const int resolution, const int num_classes,
const framework::LoD& segm_length) { const int resolution,
const framework::LoD& segm_length) {
// Prepare the mask targets by associating one gt mask to each training roi // Prepare the mask targets by associating one gt mask to each training roi
// that has a fg (non-bg) class label. // that has a fg (non-bg) class label.
const int64_t gt_size = static_cast<int64_t>(gt_classes.dims()[0]); const int64_t gt_size = static_cast<int64_t>(gt_classes.dims()[0]);
...@@ -218,15 +218,15 @@ std::vector<Tensor> SampleMaskForOneImage(const phi::CPUContext& ctx, ...@@ -218,15 +218,15 @@ std::vector<Tensor> SampleMaskForOneImage(const phi::CPUContext& ctx,
int gt_num = mask_gt_inds.size(); int gt_num = mask_gt_inds.size();
int fg_num = fg_inds.size(); int fg_num = fg_inds.size();
Tensor boxes_from_polys; phi::DenseTensor boxes_from_polys;
boxes_from_polys.mutable_data<T>({gt_num, 4}, platform::CPUPlace()); boxes_from_polys.mutable_data<T>({gt_num, 4}, platform::CPUPlace());
Poly2Boxes(gt_polys, boxes_from_polys.data<T>()); Poly2Boxes(gt_polys, boxes_from_polys.data<T>());
std::vector<int> roi_has_mask = std::vector<int> roi_has_mask =
std::vector<int>(fg_inds.begin(), fg_inds.end()); std::vector<int>(fg_inds.begin(), fg_inds.end());
Tensor mask_class_labels; phi::DenseTensor mask_class_labels;
Tensor masks; phi::DenseTensor masks;
Tensor rois_fg; phi::DenseTensor rois_fg;
auto im_scale = im_info.data<T>()[2]; auto im_scale = im_info.data<T>()[2];
if (fg_num > 0) { if (fg_num > 0) {
...@@ -251,7 +251,7 @@ std::vector<Tensor> SampleMaskForOneImage(const phi::CPUContext& ctx, ...@@ -251,7 +251,7 @@ std::vector<Tensor> SampleMaskForOneImage(const phi::CPUContext& ctx,
rois_fg_data[k] = rois_fg_data[k] / im_scale; rois_fg_data[k] = rois_fg_data[k] / im_scale;
} }
Tensor overlaps_bbfg_bbpolys; phi::DenseTensor overlaps_bbfg_bbpolys;
overlaps_bbfg_bbpolys.mutable_data<T>({fg_num, gt_num}, ctx.GetPlace()); overlaps_bbfg_bbpolys.mutable_data<T>({fg_num, gt_num}, ctx.GetPlace());
BboxOverlaps<T>(rois_fg, boxes_from_polys, &overlaps_bbfg_bbpolys); BboxOverlaps<T>(rois_fg, boxes_from_polys, &overlaps_bbfg_bbpolys);
...@@ -306,7 +306,7 @@ std::vector<Tensor> SampleMaskForOneImage(const phi::CPUContext& ctx, ...@@ -306,7 +306,7 @@ std::vector<Tensor> SampleMaskForOneImage(const phi::CPUContext& ctx,
roi_has_mask = std::vector<int>(bg_inds.begin(), bg_inds.end()); roi_has_mask = std::vector<int>(bg_inds.begin(), bg_inds.end());
} }
Tensor masks_expand; phi::DenseTensor masks_expand;
ExpandMaskTarget<T>( ExpandMaskTarget<T>(
ctx, masks, mask_class_labels, resolution, num_classes, &masks_expand); ctx, masks, mask_class_labels, resolution, num_classes, &masks_expand);
...@@ -315,13 +315,13 @@ std::vector<Tensor> SampleMaskForOneImage(const phi::CPUContext& ctx, ...@@ -315,13 +315,13 @@ std::vector<Tensor> SampleMaskForOneImage(const phi::CPUContext& ctx,
rois_fg_data[k] = rois_fg_data[k] * im_scale; rois_fg_data[k] = rois_fg_data[k] * im_scale;
} }
Tensor roi_has_mask_t; phi::DenseTensor roi_has_mask_t;
int roi_has_mask_size = roi_has_mask.size(); int roi_has_mask_size = roi_has_mask.size();
int* roi_has_mask_data = int* roi_has_mask_data =
roi_has_mask_t.mutable_data<int>({roi_has_mask_size, 1}, ctx.GetPlace()); roi_has_mask_t.mutable_data<int>({roi_has_mask_size, 1}, ctx.GetPlace());
std::copy(roi_has_mask.begin(), roi_has_mask.end(), roi_has_mask_data); std::copy(roi_has_mask.begin(), roi_has_mask.end(), roi_has_mask_data);
std::vector<Tensor> res; std::vector<phi::DenseTensor> res;
res.emplace_back(rois_fg); res.emplace_back(rois_fg);
res.emplace_back(roi_has_mask_t); res.emplace_back(roi_has_mask_t);
res.emplace_back(masks_expand); res.emplace_back(masks_expand);
...@@ -405,23 +405,23 @@ class GenerateMaskLabelsKernel : public framework::OpKernel<T> { ...@@ -405,23 +405,23 @@ class GenerateMaskLabelsKernel : public framework::OpKernel<T> {
lod0.emplace_back(num_mask); lod0.emplace_back(num_mask);
continue; continue;
} }
Tensor im_info_slice = im_info->Slice(i, i + 1); phi::DenseTensor im_info_slice = im_info->Slice(i, i + 1);
Tensor gt_classes_slice = phi::DenseTensor gt_classes_slice =
gt_classes->Slice(gt_classes_lod[i], gt_classes_lod[i + 1]); gt_classes->Slice(gt_classes_lod[i], gt_classes_lod[i + 1]);
Tensor is_crowd_slice = phi::DenseTensor is_crowd_slice =
is_crowd->Slice(is_crowd_lod[i], is_crowd_lod[i + 1]); is_crowd->Slice(is_crowd_lod[i], is_crowd_lod[i + 1]);
Tensor label_int32_slice = phi::DenseTensor label_int32_slice =
label_int32->Slice(label_int32_lod[i], label_int32_lod[i + 1]); label_int32->Slice(label_int32_lod[i], label_int32_lod[i + 1]);
Tensor rois_slice = rois->Slice(rois_lod[i], rois_lod[i + 1]); phi::DenseTensor rois_slice = rois->Slice(rois_lod[i], rois_lod[i + 1]);
auto sub_lod_and_offset = auto sub_lod_and_offset =
framework::GetSubLoDAndAbsoluteOffset(gt_segms_lod, i, i + 1, 0); framework::GetSubLoDAndAbsoluteOffset(gt_segms_lod, i, i + 1, 0);
auto lod_length = sub_lod_and_offset.first; auto lod_length = sub_lod_and_offset.first;
size_t s = sub_lod_and_offset.second.first; size_t s = sub_lod_and_offset.second.first;
size_t e = sub_lod_and_offset.second.second; size_t e = sub_lod_and_offset.second.second;
Tensor gt_segms_slice = gt_segms->Slice(s, e); phi::DenseTensor gt_segms_slice = gt_segms->Slice(s, e);
std::vector<Tensor> tensor_output = std::vector<phi::DenseTensor> tensor_output =
SampleMaskForOneImage<T>(dev_ctx, SampleMaskForOneImage<T>(dev_ctx,
im_info_slice, im_info_slice,
gt_classes_slice, gt_classes_slice,
...@@ -433,9 +433,9 @@ class GenerateMaskLabelsKernel : public framework::OpKernel<T> { ...@@ -433,9 +433,9 @@ class GenerateMaskLabelsKernel : public framework::OpKernel<T> {
resolution, resolution,
lod_length); lod_length);
Tensor sampled_mask_rois = tensor_output[0]; phi::DenseTensor sampled_mask_rois = tensor_output[0];
Tensor sampled_roi_has_mask_int32 = tensor_output[1]; phi::DenseTensor sampled_roi_has_mask_int32 = tensor_output[1];
Tensor sampled_mask_int32 = tensor_output[2]; phi::DenseTensor sampled_mask_int32 = tensor_output[2];
AppendMask<T>(mask_rois, kBoxDim * num_mask, &sampled_mask_rois); AppendMask<T>(mask_rois, kBoxDim * num_mask, &sampled_mask_rois);
AppendMask<int>( AppendMask<int>(
......
...@@ -25,7 +25,6 @@ limitations under the License. */ ...@@ -25,7 +25,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
const int kBoxDim = 4; const int kBoxDim = 4;
template <typename T> template <typename T>
...@@ -174,7 +173,7 @@ void Concat(const phi::CPUContext& context, ...@@ -174,7 +173,7 @@ void Concat(const phi::CPUContext& context,
const phi::DenseTensor& in_tensor_b, const phi::DenseTensor& in_tensor_b,
phi::DenseTensor* out_tensor) { phi::DenseTensor* out_tensor) {
int axis = 0; int axis = 0;
std::vector<Tensor> inputs; std::vector<phi::DenseTensor> inputs;
inputs.emplace_back(in_tensor_a); inputs.emplace_back(in_tensor_a);
inputs.emplace_back(in_tensor_b); inputs.emplace_back(in_tensor_b);
math::ConcatFunctor<phi::CPUContext, T> concat_functor; math::ConcatFunctor<phi::CPUContext, T> concat_functor;
...@@ -300,7 +299,7 @@ void GatherBoxesLabels(const phi::CPUContext& context, ...@@ -300,7 +299,7 @@ void GatherBoxesLabels(const phi::CPUContext& context,
phi::DenseTensor* sampled_max_overlap) { phi::DenseTensor* sampled_max_overlap) {
int fg_num = fg_inds.size(); int fg_num = fg_inds.size();
int bg_num = bg_inds.size(); int bg_num = bg_inds.size();
Tensor fg_inds_t, bg_inds_t, gt_box_inds_t, gt_label_inds_t; phi::DenseTensor fg_inds_t, bg_inds_t, gt_box_inds_t, gt_label_inds_t;
int* fg_inds_data = fg_inds_t.mutable_data<int>({fg_num}, context.GetPlace()); int* fg_inds_data = fg_inds_t.mutable_data<int>({fg_num}, context.GetPlace());
int* bg_inds_data = bg_inds_t.mutable_data<int>({bg_num}, context.GetPlace()); int* bg_inds_data = bg_inds_t.mutable_data<int>({bg_num}, context.GetPlace());
int* gt_box_inds_data = int* gt_box_inds_data =
...@@ -312,7 +311,7 @@ void GatherBoxesLabels(const phi::CPUContext& context, ...@@ -312,7 +311,7 @@ void GatherBoxesLabels(const phi::CPUContext& context,
std::copy(gt_inds.begin(), gt_inds.end(), gt_box_inds_data); std::copy(gt_inds.begin(), gt_inds.end(), gt_box_inds_data);
std::copy(gt_inds.begin(), gt_inds.end(), gt_label_inds_data); std::copy(gt_inds.begin(), gt_inds.end(), gt_label_inds_data);
Tensor fg_boxes, bg_boxes, fg_labels, bg_labels; phi::DenseTensor fg_boxes, bg_boxes, fg_labels, bg_labels;
fg_boxes.mutable_data<T>({fg_num, kBoxDim}, context.GetPlace()); fg_boxes.mutable_data<T>({fg_num, kBoxDim}, context.GetPlace());
phi::funcs::CPUGather<T>(context, boxes, fg_inds_t, &fg_boxes); phi::funcs::CPUGather<T>(context, boxes, fg_inds_t, &fg_boxes);
bg_boxes.mutable_data<T>({bg_num, kBoxDim}, context.GetPlace()); bg_boxes.mutable_data<T>({bg_num, kBoxDim}, context.GetPlace());
...@@ -325,7 +324,7 @@ void GatherBoxesLabels(const phi::CPUContext& context, ...@@ -325,7 +324,7 @@ void GatherBoxesLabels(const phi::CPUContext& context,
phi::funcs::set_constant(context, &bg_labels, 0); phi::funcs::set_constant(context, &bg_labels, 0);
Concat<int>(context, fg_labels, bg_labels, sampled_labels); Concat<int>(context, fg_labels, bg_labels, sampled_labels);
Tensor fg_max_overlap, bg_max_overlap; phi::DenseTensor fg_max_overlap, bg_max_overlap;
fg_max_overlap.mutable_data<T>({fg_num}, context.GetPlace()); fg_max_overlap.mutable_data<T>({fg_num}, context.GetPlace());
phi::funcs::CPUGather<T>(context, max_overlap, fg_inds_t, &fg_max_overlap); phi::funcs::CPUGather<T>(context, max_overlap, fg_inds_t, &fg_max_overlap);
bg_max_overlap.mutable_data<T>({bg_num}, context.GetPlace()); bg_max_overlap.mutable_data<T>({bg_num}, context.GetPlace());
...@@ -334,7 +333,7 @@ void GatherBoxesLabels(const phi::CPUContext& context, ...@@ -334,7 +333,7 @@ void GatherBoxesLabels(const phi::CPUContext& context,
} }
template <typename T> template <typename T>
std::vector<Tensor> SampleRoisForOneImage( std::vector<phi::DenseTensor> SampleRoisForOneImage(
const phi::CPUContext& context, const phi::CPUContext& context,
const phi::DenseTensor& rpn_rois_in, const phi::DenseTensor& rpn_rois_in,
const phi::DenseTensor& gt_classes, const phi::DenseTensor& gt_classes,
...@@ -355,7 +354,7 @@ std::vector<Tensor> SampleRoisForOneImage( ...@@ -355,7 +354,7 @@ std::vector<Tensor> SampleRoisForOneImage(
const phi::DenseTensor& max_overlap) { const phi::DenseTensor& max_overlap) {
// 1.1 map to original image // 1.1 map to original image
auto im_scale = im_info.data<T>()[2]; auto im_scale = im_info.data<T>()[2];
Tensor rpn_rois; phi::DenseTensor rpn_rois;
rpn_rois.mutable_data<T>(rpn_rois_in.dims(), context.GetPlace()); rpn_rois.mutable_data<T>(rpn_rois_in.dims(), context.GetPlace());
const T* rpn_rois_in_dt = rpn_rois_in.data<T>(); const T* rpn_rois_in_dt = rpn_rois_in.data<T>();
T* rpn_rois_dt = rpn_rois.data<T>(); T* rpn_rois_dt = rpn_rois.data<T>();
...@@ -367,10 +366,10 @@ std::vector<Tensor> SampleRoisForOneImage( ...@@ -367,10 +366,10 @@ std::vector<Tensor> SampleRoisForOneImage(
int proposals_num = 1; int proposals_num = 1;
if (is_cascade_rcnn) { if (is_cascade_rcnn) {
Tensor keep; phi::DenseTensor keep;
FilterRoIs<T>(context, rpn_rois, max_overlap, &keep); FilterRoIs<T>(context, rpn_rois, max_overlap, &keep);
Tensor roi_filter; phi::DenseTensor roi_filter;
// Tensor box_filter; // phi::DenseTensor box_filter;
if (keep.numel() == 0) { if (keep.numel() == 0) {
phi::funcs::SetConstant<phi::CPUContext, T> set_zero; phi::funcs::SetConstant<phi::CPUContext, T> set_zero;
roi_filter.mutable_data<T>({proposals_num, kBoxDim}, context.GetPlace()); roi_filter.mutable_data<T>({proposals_num, kBoxDim}, context.GetPlace());
...@@ -389,16 +388,16 @@ std::vector<Tensor> SampleRoisForOneImage( ...@@ -389,16 +388,16 @@ std::vector<Tensor> SampleRoisForOneImage(
// 1.2 compute overlaps // 1.2 compute overlaps
proposals_num += gt_boxes.dims()[0]; proposals_num += gt_boxes.dims()[0];
Tensor proposal_to_gt_overlaps; phi::DenseTensor proposal_to_gt_overlaps;
proposal_to_gt_overlaps.mutable_data<T>({proposals_num, gt_boxes.dims()[0]}, proposal_to_gt_overlaps.mutable_data<T>({proposals_num, gt_boxes.dims()[0]},
context.GetPlace()); context.GetPlace());
Tensor boxes; phi::DenseTensor boxes;
boxes.mutable_data<T>({proposals_num, kBoxDim}, context.GetPlace()); boxes.mutable_data<T>({proposals_num, kBoxDim}, context.GetPlace());
Concat<T>(context, gt_boxes, rpn_rois, &boxes); Concat<T>(context, gt_boxes, rpn_rois, &boxes);
BboxOverlaps<T>(boxes, gt_boxes, &proposal_to_gt_overlaps); BboxOverlaps<T>(boxes, gt_boxes, &proposal_to_gt_overlaps);
Tensor proposal_with_max_overlap; phi::DenseTensor proposal_with_max_overlap;
proposal_with_max_overlap.mutable_data<T>({proposals_num}, proposal_with_max_overlap.mutable_data<T>({proposals_num},
context.GetPlace()); context.GetPlace());
...@@ -423,7 +422,8 @@ std::vector<Tensor> SampleRoisForOneImage( ...@@ -423,7 +422,8 @@ std::vector<Tensor> SampleRoisForOneImage(
std::vector<int> mapped_gt_inds = fg_bg_gt[2]; // mapped_gt_labels std::vector<int> mapped_gt_inds = fg_bg_gt[2]; // mapped_gt_labels
// Gather boxes and labels // Gather boxes and labels
Tensor sampled_boxes, sampled_labels, sampled_gts, sampled_max_overlap; phi::DenseTensor sampled_boxes, sampled_labels, sampled_gts,
sampled_max_overlap;
int fg_num = fg_inds.size(); int fg_num = fg_inds.size();
int bg_num = bg_inds.size(); int bg_num = bg_inds.size();
int boxes_num = fg_num + bg_num; int boxes_num = fg_num + bg_num;
...@@ -446,7 +446,7 @@ std::vector<Tensor> SampleRoisForOneImage( ...@@ -446,7 +446,7 @@ std::vector<Tensor> SampleRoisForOneImage(
&sampled_max_overlap); &sampled_max_overlap);
// Compute targets // Compute targets
Tensor bbox_targets_single; phi::DenseTensor bbox_targets_single;
bbox_targets_single.mutable_data<T>(bbox_dim, context.GetPlace()); bbox_targets_single.mutable_data<T>(bbox_dim, context.GetPlace());
BoxToDelta<T>(fg_num, BoxToDelta<T>(fg_num,
sampled_boxes, sampled_boxes,
...@@ -456,14 +456,14 @@ std::vector<Tensor> SampleRoisForOneImage( ...@@ -456,14 +456,14 @@ std::vector<Tensor> SampleRoisForOneImage(
&bbox_targets_single); &bbox_targets_single);
// Scale rois // Scale rois
Tensor sampled_rois; phi::DenseTensor sampled_rois;
sampled_rois.mutable_data<T>(sampled_boxes.dims(), context.GetPlace()); sampled_rois.mutable_data<T>(sampled_boxes.dims(), context.GetPlace());
auto sampled_rois_et = framework::EigenTensor<T, 2>::From(sampled_rois); auto sampled_rois_et = framework::EigenTensor<T, 2>::From(sampled_rois);
auto sampled_boxes_et = framework::EigenTensor<T, 2>::From(sampled_boxes); auto sampled_boxes_et = framework::EigenTensor<T, 2>::From(sampled_boxes);
sampled_rois_et = sampled_boxes_et * im_scale; sampled_rois_et = sampled_boxes_et * im_scale;
// Expand box targets // Expand box targets
Tensor bbox_targets, bbox_inside_weights, bbox_outside_weights; phi::DenseTensor bbox_targets, bbox_inside_weights, bbox_outside_weights;
framework::DDim bbox_expand_dim({boxes_num, kBoxDim * class_nums}); framework::DDim bbox_expand_dim({boxes_num, kBoxDim * class_nums});
bbox_targets.mutable_data<T>(bbox_expand_dim, context.GetPlace()); bbox_targets.mutable_data<T>(bbox_expand_dim, context.GetPlace());
bbox_inside_weights.mutable_data<T>(bbox_expand_dim, context.GetPlace()); bbox_inside_weights.mutable_data<T>(bbox_expand_dim, context.GetPlace());
...@@ -500,7 +500,7 @@ std::vector<Tensor> SampleRoisForOneImage( ...@@ -500,7 +500,7 @@ std::vector<Tensor> SampleRoisForOneImage(
bbox_outside_weights_data[dst_idx + 3] = 1; bbox_outside_weights_data[dst_idx + 3] = 1;
} }
} }
std::vector<Tensor> res; std::vector<phi::DenseTensor> res;
res.emplace_back(sampled_rois); res.emplace_back(sampled_rois);
res.emplace_back(sampled_labels); res.emplace_back(sampled_labels);
res.emplace_back(bbox_targets); res.emplace_back(bbox_targets);
...@@ -610,16 +610,16 @@ class GenerateProposalLabelsKernel : public framework::OpKernel<T> { ...@@ -610,16 +610,16 @@ class GenerateProposalLabelsKernel : public framework::OpKernel<T> {
lod0.emplace_back(num_rois); lod0.emplace_back(num_rois);
continue; continue;
} }
Tensor rpn_rois_slice = phi::DenseTensor rpn_rois_slice =
rpn_rois->Slice(rpn_rois_lod[i], rpn_rois_lod[i + 1]); rpn_rois->Slice(rpn_rois_lod[i], rpn_rois_lod[i + 1]);
Tensor gt_classes_slice = phi::DenseTensor gt_classes_slice =
gt_classes->Slice(gt_classes_lod[i], gt_classes_lod[i + 1]); gt_classes->Slice(gt_classes_lod[i], gt_classes_lod[i + 1]);
Tensor is_crowd_slice = phi::DenseTensor is_crowd_slice =
is_crowd->Slice(is_crowd_lod[i], is_crowd_lod[i + 1]); is_crowd->Slice(is_crowd_lod[i], is_crowd_lod[i + 1]);
Tensor gt_boxes_slice = phi::DenseTensor gt_boxes_slice =
gt_boxes->Slice(gt_boxes_lod[i], gt_boxes_lod[i + 1]); gt_boxes->Slice(gt_boxes_lod[i], gt_boxes_lod[i + 1]);
Tensor im_info_slice = im_info->Slice(i, i + 1); phi::DenseTensor im_info_slice = im_info->Slice(i, i + 1);
Tensor max_overlap_slice; phi::DenseTensor max_overlap_slice;
if (is_cascade_rcnn) { if (is_cascade_rcnn) {
auto* max_overlap = context.Input<phi::DenseTensor>("MaxOverlap"); auto* max_overlap = context.Input<phi::DenseTensor>("MaxOverlap");
max_overlap_slice = max_overlap_slice =
...@@ -628,7 +628,7 @@ class GenerateProposalLabelsKernel : public framework::OpKernel<T> { ...@@ -628,7 +628,7 @@ class GenerateProposalLabelsKernel : public framework::OpKernel<T> {
max_overlap_slice.mutable_data<T>({rpn_rois_slice.dims()[0]}, max_overlap_slice.mutable_data<T>({rpn_rois_slice.dims()[0]},
context.GetPlace()); context.GetPlace());
} }
std::vector<Tensor> tensor_output = std::vector<phi::DenseTensor> tensor_output =
SampleRoisForOneImage<T>(dev_ctx, SampleRoisForOneImage<T>(dev_ctx,
rpn_rois_slice, rpn_rois_slice,
gt_classes_slice, gt_classes_slice,
...@@ -647,12 +647,12 @@ class GenerateProposalLabelsKernel : public framework::OpKernel<T> { ...@@ -647,12 +647,12 @@ class GenerateProposalLabelsKernel : public framework::OpKernel<T> {
is_cascade_rcnn, is_cascade_rcnn,
is_cls_agnostic, is_cls_agnostic,
max_overlap_slice); max_overlap_slice);
Tensor sampled_rois = tensor_output[0]; phi::DenseTensor sampled_rois = tensor_output[0];
Tensor sampled_labels_int32 = tensor_output[1]; phi::DenseTensor sampled_labels_int32 = tensor_output[1];
Tensor sampled_bbox_targets = tensor_output[2]; phi::DenseTensor sampled_bbox_targets = tensor_output[2];
Tensor sampled_bbox_inside_weights = tensor_output[3]; phi::DenseTensor sampled_bbox_inside_weights = tensor_output[3];
Tensor sampled_bbox_outside_weights = tensor_output[4]; phi::DenseTensor sampled_bbox_outside_weights = tensor_output[4];
Tensor sampled_max_overlap = tensor_output[5]; phi::DenseTensor sampled_max_overlap = tensor_output[5];
AppendRois<T>(rois, kBoxDim * num_rois, &sampled_rois); AppendRois<T>(rois, kBoxDim * num_rois, &sampled_rois);
AppendRois<int>(labels_int32, num_rois, &sampled_labels_int32); AppendRois<int>(labels_int32, num_rois, &sampled_labels_int32);
......
...@@ -27,8 +27,6 @@ limitations under the License. */ ...@@ -27,8 +27,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
class GenerateProposalsOp : public framework::OperatorWithKernel { class GenerateProposalsOp : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
...@@ -115,7 +113,7 @@ class GenerateProposalsKernel : public framework::OpKernel<T> { ...@@ -115,7 +113,7 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
context.GetPlace()); context.GetPlace());
rpn_roi_probs->mutable_data<T>({scores->numel(), 1}, context.GetPlace()); rpn_roi_probs->mutable_data<T>({scores->numel(), 1}, context.GetPlace());
Tensor bbox_deltas_swap, scores_swap; phi::DenseTensor bbox_deltas_swap, scores_swap;
bbox_deltas_swap.mutable_data<T>({num, h_bbox, w_bbox, c_bbox}, bbox_deltas_swap.mutable_data<T>({num, h_bbox, w_bbox, c_bbox},
dev_ctx.GetPlace()); dev_ctx.GetPlace());
scores_swap.mutable_data<T>({num, h_score, w_score, c_score}, scores_swap.mutable_data<T>({num, h_score, w_score, c_score},
...@@ -136,14 +134,14 @@ class GenerateProposalsKernel : public framework::OpKernel<T> { ...@@ -136,14 +134,14 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
int64_t num_proposals = 0; int64_t num_proposals = 0;
for (int64_t i = 0; i < num; ++i) { for (int64_t i = 0; i < num; ++i) {
Tensor im_info_slice = im_info->Slice(i, i + 1); phi::DenseTensor im_info_slice = im_info->Slice(i, i + 1);
Tensor bbox_deltas_slice = bbox_deltas_swap.Slice(i, i + 1); phi::DenseTensor bbox_deltas_slice = bbox_deltas_swap.Slice(i, i + 1);
Tensor scores_slice = scores_swap.Slice(i, i + 1); phi::DenseTensor scores_slice = scores_swap.Slice(i, i + 1);
bbox_deltas_slice.Resize({h_bbox * w_bbox * c_bbox / 4, 4}); bbox_deltas_slice.Resize({h_bbox * w_bbox * c_bbox / 4, 4});
scores_slice.Resize({h_score * w_score * c_score, 1}); scores_slice.Resize({h_score * w_score * c_score, 1});
std::pair<Tensor, Tensor> tensor_pair = std::pair<phi::DenseTensor, phi::DenseTensor> tensor_pair =
ProposalForOneImage(dev_ctx, ProposalForOneImage(dev_ctx,
im_info_slice, im_info_slice,
anchors, anchors,
...@@ -155,8 +153,8 @@ class GenerateProposalsKernel : public framework::OpKernel<T> { ...@@ -155,8 +153,8 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
nms_thresh, nms_thresh,
min_size, min_size,
eta); eta);
Tensor &proposals = tensor_pair.first; phi::DenseTensor &proposals = tensor_pair.first;
Tensor &scores = tensor_pair.second; phi::DenseTensor &scores = tensor_pair.second;
AppendProposals(rpn_rois, 4 * num_proposals, proposals); AppendProposals(rpn_rois, 4 * num_proposals, proposals);
AppendProposals(rpn_roi_probs, num_proposals, scores); AppendProposals(rpn_roi_probs, num_proposals, scores);
...@@ -179,13 +177,13 @@ class GenerateProposalsKernel : public framework::OpKernel<T> { ...@@ -179,13 +177,13 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
rpn_roi_probs->Resize({num_proposals, 1}); rpn_roi_probs->Resize({num_proposals, 1});
} }
std::pair<Tensor, Tensor> ProposalForOneImage( std::pair<phi::DenseTensor, phi::DenseTensor> ProposalForOneImage(
const phi::CPUContext &ctx, const phi::CPUContext &ctx,
const Tensor &im_info_slice, const phi::DenseTensor &im_info_slice,
const Tensor &anchors, const phi::DenseTensor &anchors,
const Tensor &variances, const phi::DenseTensor &variances,
const Tensor &bbox_deltas_slice, // [M, 4] const phi::DenseTensor &bbox_deltas_slice, // [M, 4]
const Tensor &scores_slice, // [N, 1] const phi::DenseTensor &scores_slice, // [N, 1]
int pre_nms_top_n, int pre_nms_top_n,
int post_nms_top_n, int post_nms_top_n,
float nms_thresh, float nms_thresh,
...@@ -194,7 +192,7 @@ class GenerateProposalsKernel : public framework::OpKernel<T> { ...@@ -194,7 +192,7 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
auto *scores_data = scores_slice.data<T>(); auto *scores_data = scores_slice.data<T>();
// Sort index // Sort index
Tensor index_t; phi::DenseTensor index_t;
index_t.Resize({scores_slice.numel()}); index_t.Resize({scores_slice.numel()});
int *index = index_t.mutable_data<int>(ctx.GetPlace()); int *index = index_t.mutable_data<int>(ctx.GetPlace());
for (int i = 0; i < scores_slice.numel(); ++i) { for (int i = 0; i < scores_slice.numel(); ++i) {
...@@ -212,7 +210,7 @@ class GenerateProposalsKernel : public framework::OpKernel<T> { ...@@ -212,7 +210,7 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
index_t.Resize({pre_nms_top_n}); index_t.Resize({pre_nms_top_n});
} }
Tensor scores_sel, bbox_sel, anchor_sel, var_sel; phi::DenseTensor scores_sel, bbox_sel, anchor_sel, var_sel;
scores_sel.mutable_data<T>({index_t.numel(), 1}, ctx.GetPlace()); scores_sel.mutable_data<T>({index_t.numel(), 1}, ctx.GetPlace());
bbox_sel.mutable_data<T>({index_t.numel(), 4}, ctx.GetPlace()); bbox_sel.mutable_data<T>({index_t.numel(), 4}, ctx.GetPlace());
anchor_sel.mutable_data<T>({index_t.numel(), 4}, ctx.GetPlace()); anchor_sel.mutable_data<T>({index_t.numel(), 4}, ctx.GetPlace());
...@@ -223,26 +221,26 @@ class GenerateProposalsKernel : public framework::OpKernel<T> { ...@@ -223,26 +221,26 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
phi::funcs::CPUGather<T>(ctx, anchors, index_t, &anchor_sel); phi::funcs::CPUGather<T>(ctx, anchors, index_t, &anchor_sel);
phi::funcs::CPUGather<T>(ctx, variances, index_t, &var_sel); phi::funcs::CPUGather<T>(ctx, variances, index_t, &var_sel);
Tensor proposals; phi::DenseTensor proposals;
proposals.mutable_data<T>({index_t.numel(), 4}, ctx.GetPlace()); proposals.mutable_data<T>({index_t.numel(), 4}, ctx.GetPlace());
BoxCoder<T>(ctx, &anchor_sel, &bbox_sel, &var_sel, &proposals); BoxCoder<T>(ctx, &anchor_sel, &bbox_sel, &var_sel, &proposals);
ClipTiledBoxes<T>(ctx, im_info_slice, proposals, &proposals, false); ClipTiledBoxes<T>(ctx, im_info_slice, proposals, &proposals, false);
Tensor keep; phi::DenseTensor keep;
FilterBoxes<T>(ctx, &proposals, min_size, im_info_slice, true, &keep); FilterBoxes<T>(ctx, &proposals, min_size, im_info_slice, true, &keep);
// Handle the case when there is no keep index left // Handle the case when there is no keep index left
if (keep.numel() == 0) { if (keep.numel() == 0) {
phi::funcs::SetConstant<phi::CPUContext, T> set_zero; phi::funcs::SetConstant<phi::CPUContext, T> set_zero;
bbox_sel.mutable_data<T>({1, 4}, ctx.GetPlace()); bbox_sel.mutable_data<T>({1, 4}, ctx.GetPlace());
set_zero(ctx, &bbox_sel, static_cast<T>(0)); set_zero(ctx, &bbox_sel, static_cast<T>(0));
Tensor scores_filter; phi::DenseTensor scores_filter;
scores_filter.mutable_data<T>({1, 1}, ctx.GetPlace()); scores_filter.mutable_data<T>({1, 1}, ctx.GetPlace());
set_zero(ctx, &scores_filter, static_cast<T>(0)); set_zero(ctx, &scores_filter, static_cast<T>(0));
return std::make_pair(bbox_sel, scores_filter); return std::make_pair(bbox_sel, scores_filter);
} }
Tensor scores_filter; phi::DenseTensor scores_filter;
bbox_sel.mutable_data<T>({keep.numel(), 4}, ctx.GetPlace()); bbox_sel.mutable_data<T>({keep.numel(), 4}, ctx.GetPlace());
scores_filter.mutable_data<T>({keep.numel(), 1}, ctx.GetPlace()); scores_filter.mutable_data<T>({keep.numel(), 1}, ctx.GetPlace());
phi::funcs::CPUGather<T>(ctx, proposals, keep, &bbox_sel); phi::funcs::CPUGather<T>(ctx, proposals, keep, &bbox_sel);
...@@ -251,7 +249,7 @@ class GenerateProposalsKernel : public framework::OpKernel<T> { ...@@ -251,7 +249,7 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
return std::make_pair(bbox_sel, scores_filter); return std::make_pair(bbox_sel, scores_filter);
} }
Tensor keep_nms = phi::DenseTensor keep_nms =
phi::funcs::NMS<T>(ctx, &bbox_sel, &scores_filter, nms_thresh, eta); phi::funcs::NMS<T>(ctx, &bbox_sel, &scores_filter, nms_thresh, eta);
if (post_nms_top_n > 0 && post_nms_top_n < keep_nms.numel()) { if (post_nms_top_n > 0 && post_nms_top_n < keep_nms.numel()) {
......
...@@ -28,24 +28,22 @@ limitations under the License. */ ...@@ -28,24 +28,22 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
namespace { namespace {
template <typename T> template <typename T>
static std::pair<Tensor, Tensor> ProposalForOneImage( static std::pair<phi::DenseTensor, phi::DenseTensor> ProposalForOneImage(
const phi::GPUContext &ctx, const phi::GPUContext &ctx,
const Tensor &im_info, const phi::DenseTensor &im_info,
const Tensor &anchors, const phi::DenseTensor &anchors,
const Tensor &variances, const phi::DenseTensor &variances,
const Tensor &bbox_deltas, // [M, 4] const phi::DenseTensor &bbox_deltas, // [M, 4]
const Tensor &scores, // [N, 1] const phi::DenseTensor &scores, // [N, 1]
int pre_nms_top_n, int pre_nms_top_n,
int post_nms_top_n, int post_nms_top_n,
float nms_thresh, float nms_thresh,
float min_size, float min_size,
float eta) { float eta) {
// 1. pre nms // 1. pre nms
Tensor scores_sort, index_sort; phi::DenseTensor scores_sort, index_sort;
SortDescending<T>(ctx, scores, &scores_sort, &index_sort); SortDescending<T>(ctx, scores, &scores_sort, &index_sort);
int num = scores.numel(); int num = scores.numel();
int pre_nms_num = (pre_nms_top_n <= 0 || pre_nms_top_n > num) ? scores.numel() int pre_nms_num = (pre_nms_top_n <= 0 || pre_nms_top_n > num) ? scores.numel()
...@@ -54,7 +52,7 @@ static std::pair<Tensor, Tensor> ProposalForOneImage( ...@@ -54,7 +52,7 @@ static std::pair<Tensor, Tensor> ProposalForOneImage(
index_sort.Resize({pre_nms_num, 1}); index_sort.Resize({pre_nms_num, 1});
// 2. box decode and clipping // 2. box decode and clipping
Tensor proposals; phi::DenseTensor proposals;
proposals.mutable_data<T>({pre_nms_num, 4}, ctx.GetPlace()); proposals.mutable_data<T>({pre_nms_num, 4}, ctx.GetPlace());
{ {
...@@ -68,7 +66,7 @@ static std::pair<Tensor, Tensor> ProposalForOneImage( ...@@ -68,7 +66,7 @@ static std::pair<Tensor, Tensor> ProposalForOneImage(
} }
// 3. filter // 3. filter
Tensor keep_index, keep_num_t; phi::DenseTensor keep_index, keep_num_t;
keep_index.mutable_data<int>({pre_nms_num}, ctx.GetPlace()); keep_index.mutable_data<int>({pre_nms_num}, ctx.GetPlace());
keep_num_t.mutable_data<int>({1}, ctx.GetPlace()); keep_num_t.mutable_data<int>({1}, ctx.GetPlace());
min_size = std::max(min_size, 1.0f); min_size = std::max(min_size, 1.0f);
...@@ -90,7 +88,7 @@ static std::pair<Tensor, Tensor> ProposalForOneImage( ...@@ -90,7 +88,7 @@ static std::pair<Tensor, Tensor> ProposalForOneImage(
ctx.Wait(); ctx.Wait();
keep_index.Resize({keep_num}); keep_index.Resize({keep_num});
Tensor scores_filter, proposals_filter; phi::DenseTensor scores_filter, proposals_filter;
// Handle the case when there is no keep index left // Handle the case when there is no keep index left
if (keep_num == 0) { if (keep_num == 0) {
phi::funcs::SetConstant<phi::GPUContext, T> set_zero; phi::funcs::SetConstant<phi::GPUContext, T> set_zero;
...@@ -110,13 +108,13 @@ static std::pair<Tensor, Tensor> ProposalForOneImage( ...@@ -110,13 +108,13 @@ static std::pair<Tensor, Tensor> ProposalForOneImage(
} }
// 4. nms // 4. nms
Tensor keep_nms; phi::DenseTensor keep_nms;
NMS<T>(ctx, proposals_filter, keep_index, nms_thresh, &keep_nms); NMS<T>(ctx, proposals_filter, keep_index, nms_thresh, &keep_nms);
if (post_nms_top_n > 0 && post_nms_top_n < keep_nms.numel()) { if (post_nms_top_n > 0 && post_nms_top_n < keep_nms.numel()) {
keep_nms.Resize({post_nms_top_n}); keep_nms.Resize({post_nms_top_n});
} }
Tensor scores_nms, proposals_nms; phi::DenseTensor scores_nms, proposals_nms;
proposals_nms.mutable_data<T>({keep_nms.numel(), 4}, ctx.GetPlace()); proposals_nms.mutable_data<T>({keep_nms.numel(), 4}, ctx.GetPlace());
scores_nms.mutable_data<T>({keep_nms.numel(), 1}, ctx.GetPlace()); scores_nms.mutable_data<T>({keep_nms.numel(), 1}, ctx.GetPlace());
phi::funcs::GPUGather<T>(ctx, proposals_filter, keep_nms, &proposals_nms); phi::funcs::GPUGather<T>(ctx, proposals_filter, keep_nms, &proposals_nms);
...@@ -171,7 +169,7 @@ class CUDAGenerateProposalsKernel : public framework::OpKernel<T> { ...@@ -171,7 +169,7 @@ class CUDAGenerateProposalsKernel : public framework::OpKernel<T> {
int64_t h_bbox = bbox_dim[2]; int64_t h_bbox = bbox_dim[2];
int64_t w_bbox = bbox_dim[3]; int64_t w_bbox = bbox_dim[3];
Tensor bbox_deltas_swap, scores_swap; phi::DenseTensor bbox_deltas_swap, scores_swap;
bbox_deltas_swap.mutable_data<T>({num, h_bbox, w_bbox, c_bbox}, bbox_deltas_swap.mutable_data<T>({num, h_bbox, w_bbox, c_bbox},
dev_ctx.GetPlace()); dev_ctx.GetPlace());
scores_swap.mutable_data<T>({num, h_score, w_score, c_score}, scores_swap.mutable_data<T>({num, h_score, w_score, c_score},
...@@ -200,14 +198,14 @@ class CUDAGenerateProposalsKernel : public framework::OpKernel<T> { ...@@ -200,14 +198,14 @@ class CUDAGenerateProposalsKernel : public framework::OpKernel<T> {
std::vector<int> tmp_num; std::vector<int> tmp_num;
for (int64_t i = 0; i < num; ++i) { for (int64_t i = 0; i < num; ++i) {
Tensor im_info_slice = im_info->Slice(i, i + 1); phi::DenseTensor im_info_slice = im_info->Slice(i, i + 1);
Tensor bbox_deltas_slice = bbox_deltas_swap.Slice(i, i + 1); phi::DenseTensor bbox_deltas_slice = bbox_deltas_swap.Slice(i, i + 1);
Tensor scores_slice = scores_swap.Slice(i, i + 1); phi::DenseTensor scores_slice = scores_swap.Slice(i, i + 1);
bbox_deltas_slice.Resize({h_bbox * w_bbox * c_bbox / 4, 4}); bbox_deltas_slice.Resize({h_bbox * w_bbox * c_bbox / 4, 4});
scores_slice.Resize({h_score * w_score * c_score, 1}); scores_slice.Resize({h_score * w_score * c_score, 1});
std::pair<Tensor, Tensor> box_score_pair = std::pair<phi::DenseTensor, phi::DenseTensor> box_score_pair =
ProposalForOneImage<T>(dev_ctx, ProposalForOneImage<T>(dev_ctx,
im_info_slice, im_info_slice,
anchors, anchors,
...@@ -220,8 +218,8 @@ class CUDAGenerateProposalsKernel : public framework::OpKernel<T> { ...@@ -220,8 +218,8 @@ class CUDAGenerateProposalsKernel : public framework::OpKernel<T> {
min_size, min_size,
eta); eta);
Tensor &proposals = box_score_pair.first; phi::DenseTensor &proposals = box_score_pair.first;
Tensor &scores = box_score_pair.second; phi::DenseTensor &scores = box_score_pair.second;
memory::Copy(place, memory::Copy(place,
rpn_rois_data + num_proposals * 4, rpn_rois_data + num_proposals * 4,
......
...@@ -29,8 +29,6 @@ limitations under the License. */ ...@@ -29,8 +29,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
class GenerateProposalsV2Op : public framework::OperatorWithKernel { class GenerateProposalsV2Op : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
......
...@@ -18,8 +18,6 @@ limitations under the License. */ ...@@ -18,8 +18,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
template <typename T> template <typename T>
struct IouFunction { struct IouFunction {
public: public:
...@@ -182,21 +180,21 @@ class IouSimilarityMLUKernel : public framework::OpKernel<T> { ...@@ -182,21 +180,21 @@ class IouSimilarityMLUKernel : public framework::OpKernel<T> {
auto M = y->dims()[0]; auto M = y->dims()[0];
out->mutable_data<T>({N, M}, place); out->mutable_data<T>({N, M}, place);
Tensor xt(_type); phi::DenseTensor xt(_type);
Tensor yt(_type); phi::DenseTensor yt(_type);
xt.mutable_data<T>({4, N}, place); xt.mutable_data<T>({4, N}, place);
yt.mutable_data<T>({4, M}, place); yt.mutable_data<T>({4, M}, place);
std::vector<int> vec_trans = {1, 0}; std::vector<int> vec_trans = {1, 0};
F.Transpose(x, &xt, vec_trans); F.Transpose(x, &xt, vec_trans);
F.Transpose(y, &yt, vec_trans); F.Transpose(y, &yt, vec_trans);
Tensor xmin1 = xt.Slice(0, 1); phi::DenseTensor xmin1 = xt.Slice(0, 1);
Tensor ymin1 = xt.Slice(1, 2); phi::DenseTensor ymin1 = xt.Slice(1, 2);
Tensor xmax1 = xt.Slice(2, 3); phi::DenseTensor xmax1 = xt.Slice(2, 3);
Tensor ymax1 = xt.Slice(3, 4); phi::DenseTensor ymax1 = xt.Slice(3, 4);
Tensor xmin2 = yt.Slice(0, 1); phi::DenseTensor xmin2 = yt.Slice(0, 1);
Tensor ymin2 = yt.Slice(1, 2); phi::DenseTensor ymin2 = yt.Slice(1, 2);
Tensor xmax2 = yt.Slice(2, 3); phi::DenseTensor xmax2 = yt.Slice(2, 3);
Tensor ymax2 = yt.Slice(3, 4); phi::DenseTensor ymax2 = yt.Slice(3, 4);
xmin1.Resize({N, 1}); xmin1.Resize({N, 1});
ymin1.Resize({N, 1}); ymin1.Resize({N, 1});
xmax1.Resize({N, 1}); xmax1.Resize({N, 1});
...@@ -206,12 +204,12 @@ class IouSimilarityMLUKernel : public framework::OpKernel<T> { ...@@ -206,12 +204,12 @@ class IouSimilarityMLUKernel : public framework::OpKernel<T> {
xmax2.Resize({1, M}); xmax2.Resize({1, M});
ymax2.Resize({1, M}); ymax2.Resize({1, M});
Tensor w1(_type); phi::DenseTensor w1(_type);
Tensor h1(_type); phi::DenseTensor h1(_type);
Tensor w2(_type); phi::DenseTensor w2(_type);
Tensor h2(_type); phi::DenseTensor h2(_type);
Tensor area1(_type); phi::DenseTensor area1(_type);
Tensor area2(_type); phi::DenseTensor area2(_type);
w1.mutable_data<T>({N, 1}, place); w1.mutable_data<T>({N, 1}, place);
h1.mutable_data<T>({N, 1}, place); h1.mutable_data<T>({N, 1}, place);
w2.mutable_data<T>({1, M}, place); w2.mutable_data<T>({1, M}, place);
...@@ -231,10 +229,10 @@ class IouSimilarityMLUKernel : public framework::OpKernel<T> { ...@@ -231,10 +229,10 @@ class IouSimilarityMLUKernel : public framework::OpKernel<T> {
F.Mul(&w1, &h1, &area1); F.Mul(&w1, &h1, &area1);
F.Mul(&w2, &h2, &area2); F.Mul(&w2, &h2, &area2);
Tensor inter_xmax(_type); phi::DenseTensor inter_xmax(_type);
Tensor inter_ymax(_type); phi::DenseTensor inter_ymax(_type);
Tensor inter_xmin(_type); phi::DenseTensor inter_xmin(_type);
Tensor inter_ymin(_type); phi::DenseTensor inter_ymin(_type);
inter_xmax.mutable_data<T>({N, M}, place); inter_xmax.mutable_data<T>({N, M}, place);
inter_ymax.mutable_data<T>({N, M}, place); inter_ymax.mutable_data<T>({N, M}, place);
inter_xmin.mutable_data<T>({N, M}, place); inter_xmin.mutable_data<T>({N, M}, place);
...@@ -244,8 +242,8 @@ class IouSimilarityMLUKernel : public framework::OpKernel<T> { ...@@ -244,8 +242,8 @@ class IouSimilarityMLUKernel : public framework::OpKernel<T> {
F.Maximum(&xmin1, &xmin2, &inter_xmin); F.Maximum(&xmin1, &xmin2, &inter_xmin);
F.Maximum(&ymin1, &ymin2, &inter_ymin); F.Maximum(&ymin1, &ymin2, &inter_ymin);
Tensor inter_w(_type); phi::DenseTensor inter_w(_type);
Tensor inter_h(_type); phi::DenseTensor inter_h(_type);
inter_w.mutable_data<T>({N, M}, place); inter_w.mutable_data<T>({N, M}, place);
inter_h.mutable_data<T>({N, M}, place); inter_h.mutable_data<T>({N, M}, place);
F.Sub(&inter_xmax, &inter_xmin, &inter_w); F.Sub(&inter_xmax, &inter_xmin, &inter_w);
...@@ -255,14 +253,14 @@ class IouSimilarityMLUKernel : public framework::OpKernel<T> { ...@@ -255,14 +253,14 @@ class IouSimilarityMLUKernel : public framework::OpKernel<T> {
F.Adds(&inter_w, 1.0f, &inter_w); F.Adds(&inter_w, 1.0f, &inter_w);
F.Adds(&inter_h, 1.0f, &inter_h); F.Adds(&inter_h, 1.0f, &inter_h);
} }
Tensor zeros(_type); phi::DenseTensor zeros(_type);
zeros.mutable_data<T>({1}, place); zeros.mutable_data<T>({1}, place);
FillMLUTensorWithHostValue<T>(ctx, static_cast<T>(0), &zeros); FillMLUTensorWithHostValue<T>(ctx, static_cast<T>(0), &zeros);
F.Maximum(&inter_w, &zeros, &inter_w); F.Maximum(&inter_w, &zeros, &inter_w);
F.Maximum(&inter_h, &zeros, &inter_h); F.Maximum(&inter_h, &zeros, &inter_h);
F.Mul(&inter_w, &inter_h, out); F.Mul(&inter_w, &inter_h, out);
Tensor union_area(_type); phi::DenseTensor union_area(_type);
union_area.mutable_data<T>({N, M}, place); union_area.mutable_data<T>({N, M}, place);
F.Add(&area1, &area2, &union_area); F.Add(&area1, &area2, &union_area);
F.Sub(&union_area, out, &union_area); F.Sub(&union_area, out, &union_area);
......
...@@ -18,8 +18,6 @@ limitations under the License. */ ...@@ -18,8 +18,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
template <typename T> template <typename T>
struct IouFunction { struct IouFunction {
public: public:
...@@ -108,21 +106,21 @@ class IouSimilarityNPUKernel : public framework::OpKernel<T> { ...@@ -108,21 +106,21 @@ class IouSimilarityNPUKernel : public framework::OpKernel<T> {
auto M = y->dims()[0]; auto M = y->dims()[0];
out->mutable_data<T>({N, M}, place); out->mutable_data<T>({N, M}, place);
Tensor xt(_type); phi::DenseTensor xt(_type);
Tensor yt(_type); phi::DenseTensor yt(_type);
xt.mutable_data<T>({4, N}, place); xt.mutable_data<T>({4, N}, place);
yt.mutable_data<T>({4, M}, place); yt.mutable_data<T>({4, M}, place);
std::vector<int> vec_trans = {1, 0}; std::vector<int> vec_trans = {1, 0};
F.Transpose(x, &xt, vec_trans); F.Transpose(x, &xt, vec_trans);
F.Transpose(y, &yt, vec_trans); F.Transpose(y, &yt, vec_trans);
Tensor xmin1 = xt.Slice(0, 1); phi::DenseTensor xmin1 = xt.Slice(0, 1);
Tensor ymin1 = xt.Slice(1, 2); phi::DenseTensor ymin1 = xt.Slice(1, 2);
Tensor xmax1 = xt.Slice(2, 3); phi::DenseTensor xmax1 = xt.Slice(2, 3);
Tensor ymax1 = xt.Slice(3, 4); phi::DenseTensor ymax1 = xt.Slice(3, 4);
Tensor xmin2 = yt.Slice(0, 1); phi::DenseTensor xmin2 = yt.Slice(0, 1);
Tensor ymin2 = yt.Slice(1, 2); phi::DenseTensor ymin2 = yt.Slice(1, 2);
Tensor xmax2 = yt.Slice(2, 3); phi::DenseTensor xmax2 = yt.Slice(2, 3);
Tensor ymax2 = yt.Slice(3, 4); phi::DenseTensor ymax2 = yt.Slice(3, 4);
xmin1.Resize({N, 1}); xmin1.Resize({N, 1});
ymin1.Resize({N, 1}); ymin1.Resize({N, 1});
xmax1.Resize({N, 1}); xmax1.Resize({N, 1});
...@@ -132,12 +130,12 @@ class IouSimilarityNPUKernel : public framework::OpKernel<T> { ...@@ -132,12 +130,12 @@ class IouSimilarityNPUKernel : public framework::OpKernel<T> {
xmax2.Resize({1, M}); xmax2.Resize({1, M});
ymax2.Resize({1, M}); ymax2.Resize({1, M});
Tensor w1(_type); phi::DenseTensor w1(_type);
Tensor h1(_type); phi::DenseTensor h1(_type);
Tensor w2(_type); phi::DenseTensor w2(_type);
Tensor h2(_type); phi::DenseTensor h2(_type);
Tensor area1(_type); phi::DenseTensor area1(_type);
Tensor area2(_type); phi::DenseTensor area2(_type);
w1.mutable_data<T>({N, 1}, place); w1.mutable_data<T>({N, 1}, place);
h1.mutable_data<T>({N, 1}, place); h1.mutable_data<T>({N, 1}, place);
w2.mutable_data<T>({1, M}, place); w2.mutable_data<T>({1, M}, place);
...@@ -157,10 +155,10 @@ class IouSimilarityNPUKernel : public framework::OpKernel<T> { ...@@ -157,10 +155,10 @@ class IouSimilarityNPUKernel : public framework::OpKernel<T> {
F.Mul(&w1, &h1, &area1); F.Mul(&w1, &h1, &area1);
F.Mul(&w2, &h2, &area2); F.Mul(&w2, &h2, &area2);
Tensor inter_xmax(_type); phi::DenseTensor inter_xmax(_type);
Tensor inter_ymax(_type); phi::DenseTensor inter_ymax(_type);
Tensor inter_xmin(_type); phi::DenseTensor inter_xmin(_type);
Tensor inter_ymin(_type); phi::DenseTensor inter_ymin(_type);
inter_xmax.mutable_data<T>({N, M}, place); inter_xmax.mutable_data<T>({N, M}, place);
inter_ymax.mutable_data<T>({N, M}, place); inter_ymax.mutable_data<T>({N, M}, place);
inter_xmin.mutable_data<T>({N, M}, place); inter_xmin.mutable_data<T>({N, M}, place);
...@@ -170,8 +168,8 @@ class IouSimilarityNPUKernel : public framework::OpKernel<T> { ...@@ -170,8 +168,8 @@ class IouSimilarityNPUKernel : public framework::OpKernel<T> {
F.Maximum(&xmin1, &xmin2, &inter_xmin); F.Maximum(&xmin1, &xmin2, &inter_xmin);
F.Maximum(&ymin1, &ymin2, &inter_ymin); F.Maximum(&ymin1, &ymin2, &inter_ymin);
Tensor inter_w(_type); phi::DenseTensor inter_w(_type);
Tensor inter_h(_type); phi::DenseTensor inter_h(_type);
inter_w.mutable_data<T>({N, M}, place); inter_w.mutable_data<T>({N, M}, place);
inter_h.mutable_data<T>({N, M}, place); inter_h.mutable_data<T>({N, M}, place);
F.Sub(&inter_xmax, &inter_xmin, &inter_w); F.Sub(&inter_xmax, &inter_xmin, &inter_w);
...@@ -181,14 +179,14 @@ class IouSimilarityNPUKernel : public framework::OpKernel<T> { ...@@ -181,14 +179,14 @@ class IouSimilarityNPUKernel : public framework::OpKernel<T> {
F.Adds(&inter_w, 1.0f, &inter_w); F.Adds(&inter_w, 1.0f, &inter_w);
F.Adds(&inter_h, 1.0f, &inter_h); F.Adds(&inter_h, 1.0f, &inter_h);
} }
Tensor zeros(_type); phi::DenseTensor zeros(_type);
zeros.mutable_data<T>({1}, place); zeros.mutable_data<T>({1}, place);
FillNpuTensorWithConstant<T>(&zeros, static_cast<T>(0)); FillNpuTensorWithConstant<T>(&zeros, static_cast<T>(0));
F.Maximum(&inter_w, &zeros, &inter_w); F.Maximum(&inter_w, &zeros, &inter_w);
F.Maximum(&inter_h, &zeros, &inter_h); F.Maximum(&inter_h, &zeros, &inter_h);
F.Mul(&inter_w, &inter_h, out); F.Mul(&inter_w, &inter_h, out);
Tensor union_area(_type); phi::DenseTensor union_area(_type);
union_area.mutable_data<T>({N, M}, place); union_area.mutable_data<T>({N, M}, place);
F.Add(&area1, &area2, &union_area); F.Add(&area1, &area2, &union_area);
F.Sub(&union_area, out, &union_area); F.Sub(&union_area, out, &union_area);
......
...@@ -19,8 +19,6 @@ limitations under the License. */ ...@@ -19,8 +19,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
class LocalityAwareNMSOp : public framework::OperatorWithKernel { class LocalityAwareNMSOp : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
...@@ -252,7 +250,7 @@ class LocalityAwareNMSKernel : public framework::OpKernel<T> { ...@@ -252,7 +250,7 @@ class LocalityAwareNMSKernel : public framework::OpKernel<T> {
int num_det = 0; int num_det = 0;
int64_t class_num = scores->dims()[0]; int64_t class_num = scores->dims()[0];
Tensor bbox_slice, score_slice; phi::DenseTensor bbox_slice, score_slice;
for (int64_t c = 0; c < class_num; ++c) { for (int64_t c = 0; c < class_num; ++c) {
if (c == background_label) continue; if (c == background_label) continue;
...@@ -325,7 +323,7 @@ class LocalityAwareNMSKernel : public framework::OpKernel<T> { ...@@ -325,7 +323,7 @@ class LocalityAwareNMSKernel : public framework::OpKernel<T> {
auto* bboxes_data = bboxes.data<T>(); auto* bboxes_data = bboxes.data<T>();
auto* odata = outs->data<T>(); auto* odata = outs->data<T>();
const T* sdata; const T* sdata;
Tensor bbox; phi::DenseTensor bbox;
bbox.Resize({scores.dims()[0], box_size}); bbox.Resize({scores.dims()[0], box_size});
int count = 0; int count = 0;
for (const auto& it : selected_indices) { for (const auto& it : selected_indices) {
...@@ -370,7 +368,7 @@ class LocalityAwareNMSKernel : public framework::OpKernel<T> { ...@@ -370,7 +368,7 @@ class LocalityAwareNMSKernel : public framework::OpKernel<T> {
int64_t box_dim = boxes.dims()[2]; int64_t box_dim = boxes.dims()[2];
int64_t out_dim = box_dim + 2; int64_t out_dim = box_dim + 2;
int num_nmsed_out = 0; int num_nmsed_out = 0;
Tensor boxes_slice, scores_slice; phi::DenseTensor boxes_slice, scores_slice;
int n = batch_size; int n = batch_size;
for (int i = 0; i < n; ++i) { for (int i = 0; i < n; ++i) {
scores_slice = scores.Slice(i, i + 1); scores_slice = scores.Slice(i, i + 1);
...@@ -407,7 +405,7 @@ class LocalityAwareNMSKernel : public framework::OpKernel<T> { ...@@ -407,7 +405,7 @@ class LocalityAwareNMSKernel : public framework::OpKernel<T> {
int64_t s = batch_starts[i]; int64_t s = batch_starts[i];
int64_t e = batch_starts[i + 1]; int64_t e = batch_starts[i + 1];
if (e > s) { if (e > s) {
Tensor out = outs->Slice(s, e); phi::DenseTensor out = outs->Slice(s, e);
LocalityAwareNMSOutput(dev_ctx, LocalityAwareNMSOutput(dev_ctx,
scores_slice, scores_slice,
boxes_slice, boxes_slice,
......
...@@ -20,8 +20,6 @@ limitations under the License. */ ...@@ -20,8 +20,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
class MatrixNMSOp : public framework::OperatorWithKernel { class MatrixNMSOp : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
......
...@@ -21,8 +21,6 @@ limitations under the License. */ ...@@ -21,8 +21,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
inline std::vector<size_t> GetNmsLodFromRoisNum( inline std::vector<size_t> GetNmsLodFromRoisNum(
const phi::DenseTensor* rois_num) { const phi::DenseTensor* rois_num) {
std::vector<size_t> rois_lod; std::vector<size_t> rois_lod;
...@@ -228,7 +226,7 @@ class MultiClassNMSKernel : public framework::OpKernel<T> { ...@@ -228,7 +226,7 @@ class MultiClassNMSKernel : public framework::OpKernel<T> {
int num_det = 0; int num_det = 0;
int64_t class_num = scores_size == 3 ? scores.dims()[0] : scores.dims()[1]; int64_t class_num = scores_size == 3 ? scores.dims()[0] : scores.dims()[1];
Tensor bbox_slice, score_slice; phi::DenseTensor bbox_slice, score_slice;
for (int64_t c = 0; c < class_num; ++c) { for (int64_t c = 0; c < class_num; ++c) {
if (c == background_label) continue; if (c == background_label) continue;
if (scores_size == 3) { if (scores_size == 3) {
...@@ -319,7 +317,7 @@ class MultiClassNMSKernel : public framework::OpKernel<T> { ...@@ -319,7 +317,7 @@ class MultiClassNMSKernel : public framework::OpKernel<T> {
auto* bboxes_data = bboxes.data<T>(); auto* bboxes_data = bboxes.data<T>();
auto* odata = outs->data<T>(); auto* odata = outs->data<T>();
const T* sdata; const T* sdata;
Tensor bbox; phi::DenseTensor bbox;
bbox.Resize({scores.dims()[0], box_size}); bbox.Resize({scores.dims()[0], box_size});
int count = 0; int count = 0;
for (const auto& it : selected_indices) { for (const auto& it : selected_indices) {
...@@ -373,7 +371,7 @@ class MultiClassNMSKernel : public framework::OpKernel<T> { ...@@ -373,7 +371,7 @@ class MultiClassNMSKernel : public framework::OpKernel<T> {
int64_t box_dim = boxes->dims()[2]; int64_t box_dim = boxes->dims()[2];
int64_t out_dim = box_dim + 2; int64_t out_dim = box_dim + 2;
int num_nmsed_out = 0; int num_nmsed_out = 0;
Tensor boxes_slice, scores_slice; phi::DenseTensor boxes_slice, scores_slice;
int n = 0; int n = 0;
if (has_roisnum) { if (has_roisnum) {
n = score_size == 3 ? batch_size : rois_num->numel(); n = score_size == 3 ? batch_size : rois_num->numel();
...@@ -449,7 +447,7 @@ class MultiClassNMSKernel : public framework::OpKernel<T> { ...@@ -449,7 +447,7 @@ class MultiClassNMSKernel : public framework::OpKernel<T> {
int64_t s = batch_starts[i]; int64_t s = batch_starts[i];
int64_t e = batch_starts[i + 1]; int64_t e = batch_starts[i + 1];
if (e > s) { if (e > s) {
Tensor out = outs->Slice(s, e); phi::DenseTensor out = outs->Slice(s, e);
if (return_index) { if (return_index) {
int* output_idx = int* output_idx =
index->mutable_data<int>({num_kept, 1}, ctx.GetPlace()); index->mutable_data<int>({num_kept, 1}, ctx.GetPlace());
......
...@@ -17,8 +17,6 @@ limitations under the License. */ ...@@ -17,8 +17,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
class PolygonBoxTransformCPUKernel : public framework::OpKernel<T> { class PolygonBoxTransformCPUKernel : public framework::OpKernel<T> {
public: public:
......
...@@ -19,7 +19,6 @@ limitations under the License. */ ...@@ -19,7 +19,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
using phi::PADDLE_CUDA_NUM_THREADS; using phi::PADDLE_CUDA_NUM_THREADS;
#define CUDA_BLOCK_SIZE 16 #define CUDA_BLOCK_SIZE 16
......
...@@ -18,8 +18,6 @@ limitations under the License. */ ...@@ -18,8 +18,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
class PriorBoxNPUKernel : public framework::OpKernel<T> { class PriorBoxNPUKernel : public framework::OpKernel<T> {
public: public:
...@@ -50,7 +48,7 @@ class PriorBoxNPUKernel : public framework::OpKernel<T> { ...@@ -50,7 +48,7 @@ class PriorBoxNPUKernel : public framework::OpKernel<T> {
auto place = ctx.GetPlace(); auto place = ctx.GetPlace();
Tensor out(input->type()); phi::DenseTensor out(input->type());
auto out_dims = phi::vectorize(boxes->dims()); auto out_dims = phi::vectorize(boxes->dims());
out_dims.insert(out_dims.begin(), 2); out_dims.insert(out_dims.begin(), 2);
out.Resize(phi::make_ddim(out_dims)); out.Resize(phi::make_ddim(out_dims));
...@@ -75,8 +73,8 @@ class PriorBoxNPUKernel : public framework::OpKernel<T> { ...@@ -75,8 +73,8 @@ class PriorBoxNPUKernel : public framework::OpKernel<T> {
runner.Run(stream); runner.Run(stream);
out.Resize(phi::make_ddim({out.numel()})); out.Resize(phi::make_ddim({out.numel()}));
Tensor out_boxes = out.Slice(0, boxes->numel()); phi::DenseTensor out_boxes = out.Slice(0, boxes->numel());
Tensor out_variances = out.Slice(boxes->numel(), out.numel()); phi::DenseTensor out_variances = out.Slice(boxes->numel(), out.numel());
out_boxes.Resize(boxes->dims()); out_boxes.Resize(boxes->dims());
out_variances.Resize(variances->dims()); out_variances.Resize(variances->dims());
......
...@@ -18,8 +18,6 @@ limitations under the License. */ ...@@ -18,8 +18,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
class RetinanetDetectionOutputOp : public framework::OperatorWithKernel { class RetinanetDetectionOutputOp : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
...@@ -409,9 +407,9 @@ class RetinanetDetectionOutputKernel : public framework::OpKernel<T> { ...@@ -409,9 +407,9 @@ class RetinanetDetectionOutputKernel : public framework::OpKernel<T> {
} }
void RetinanetDetectionOutput(const framework::ExecutionContext& ctx, void RetinanetDetectionOutput(const framework::ExecutionContext& ctx,
const std::vector<Tensor>& scores, const std::vector<phi::DenseTensor>& scores,
const std::vector<Tensor>& bboxes, const std::vector<phi::DenseTensor>& bboxes,
const std::vector<Tensor>& anchors, const std::vector<phi::DenseTensor>& anchors,
const phi::DenseTensor& im_info, const phi::DenseTensor& im_info,
std::vector<std::vector<T>>* nmsed_out, std::vector<std::vector<T>>* nmsed_out,
int* num_nmsed_out) const { int* num_nmsed_out) const {
...@@ -425,11 +423,11 @@ class RetinanetDetectionOutputKernel : public framework::OpKernel<T> { ...@@ -425,11 +423,11 @@ class RetinanetDetectionOutputKernel : public framework::OpKernel<T> {
std::map<int, std::vector<std::vector<T>>> preds; std::map<int, std::vector<std::vector<T>>> preds;
for (size_t l = 0; l < scores.size(); ++l) { for (size_t l = 0; l < scores.size(); ++l) {
// Fetch per level score // Fetch per level score
Tensor scores_per_level = scores[l]; phi::DenseTensor scores_per_level = scores[l];
// Fetch per level bbox // Fetch per level bbox
Tensor bboxes_per_level = bboxes[l]; phi::DenseTensor bboxes_per_level = bboxes[l];
// Fetch per level anchor // Fetch per level anchor
Tensor anchors_per_level = anchors[l]; phi::DenseTensor anchors_per_level = anchors[l];
int64_t scores_num = scores_per_level.numel(); int64_t scores_num = scores_per_level.numel();
int64_t bboxes_num = bboxes_per_level.numel(); int64_t bboxes_num = bboxes_per_level.numel();
...@@ -492,9 +490,9 @@ class RetinanetDetectionOutputKernel : public framework::OpKernel<T> { ...@@ -492,9 +490,9 @@ class RetinanetDetectionOutputKernel : public framework::OpKernel<T> {
auto* im_info = ctx.Input<phi::DenseTensor>("ImInfo"); auto* im_info = ctx.Input<phi::DenseTensor>("ImInfo");
auto* outs = ctx.Output<phi::DenseTensor>("Out"); auto* outs = ctx.Output<phi::DenseTensor>("Out");
std::vector<Tensor> boxes_list(boxes.size()); std::vector<phi::DenseTensor> boxes_list(boxes.size());
std::vector<Tensor> scores_list(scores.size()); std::vector<phi::DenseTensor> scores_list(scores.size());
std::vector<Tensor> anchors_list(anchors.size()); std::vector<phi::DenseTensor> anchors_list(anchors.size());
for (size_t j = 0; j < boxes_list.size(); ++j) { for (size_t j = 0; j < boxes_list.size(); ++j) {
boxes_list[j] = *boxes[j]; boxes_list[j] = *boxes[j];
scores_list[j] = *scores[j]; scores_list[j] = *scores[j];
...@@ -512,8 +510,8 @@ class RetinanetDetectionOutputKernel : public framework::OpKernel<T> { ...@@ -512,8 +510,8 @@ class RetinanetDetectionOutputKernel : public framework::OpKernel<T> {
std::vector<size_t> batch_starts = {0}; std::vector<size_t> batch_starts = {0};
for (int i = 0; i < batch_size; ++i) { for (int i = 0; i < batch_size; ++i) {
int num_nmsed_out = 0; int num_nmsed_out = 0;
std::vector<Tensor> box_per_batch_list(boxes_list.size()); std::vector<phi::DenseTensor> box_per_batch_list(boxes_list.size());
std::vector<Tensor> score_per_batch_list(scores_list.size()); std::vector<phi::DenseTensor> score_per_batch_list(scores_list.size());
for (size_t j = 0; j < boxes_list.size(); ++j) { for (size_t j = 0; j < boxes_list.size(); ++j) {
const auto& score_dims = scores_list[j].dims(); const auto& score_dims = scores_list[j].dims();
score_per_batch_list[j] = scores_list[j].Slice(i, i + 1); score_per_batch_list[j] = scores_list[j].Slice(i, i + 1);
...@@ -521,7 +519,7 @@ class RetinanetDetectionOutputKernel : public framework::OpKernel<T> { ...@@ -521,7 +519,7 @@ class RetinanetDetectionOutputKernel : public framework::OpKernel<T> {
box_per_batch_list[j] = boxes_list[j].Slice(i, i + 1); box_per_batch_list[j] = boxes_list[j].Slice(i, i + 1);
box_per_batch_list[j].Resize({score_dims[1], box_dim}); box_per_batch_list[j].Resize({score_dims[1], box_dim});
} }
Tensor im_info_slice = im_info->Slice(i, i + 1); phi::DenseTensor im_info_slice = im_info->Slice(i, i + 1);
std::vector<std::vector<T>> nmsed_out; std::vector<std::vector<T>> nmsed_out;
RetinanetDetectionOutput(ctx, RetinanetDetectionOutput(ctx,
...@@ -544,7 +542,7 @@ class RetinanetDetectionOutputKernel : public framework::OpKernel<T> { ...@@ -544,7 +542,7 @@ class RetinanetDetectionOutputKernel : public framework::OpKernel<T> {
int64_t s = batch_starts[i]; int64_t s = batch_starts[i];
int64_t e = batch_starts[i + 1]; int64_t e = batch_starts[i + 1];
if (e > s) { if (e > s) {
Tensor out = outs->Slice(s, e); phi::DenseTensor out = outs->Slice(s, e);
MultiClassOutput(dev_ctx, all_nmsed_out[i], &out); MultiClassOutput(dev_ctx, all_nmsed_out[i], &out);
} }
} }
...@@ -563,7 +561,8 @@ class RetinanetDetectionOutputOpMaker ...@@ -563,7 +561,8 @@ class RetinanetDetectionOutputOpMaker
void Make() override { void Make() override {
AddInput("BBoxes", AddInput("BBoxes",
"(List) A list of tensors from multiple FPN levels. Each " "(List) A list of tensors from multiple FPN levels. Each "
"element is a 3-D Tensor with shape [N, Mi, 4] represents the " "element is a 3-D phi::DenseTensor with shape [N, Mi, 4] "
"represents the "
"predicted locations of Mi bounding boxes, N is the batch size. " "predicted locations of Mi bounding boxes, N is the batch size. "
"Mi is the number of bounding boxes from i-th FPN level. Each " "Mi is the number of bounding boxes from i-th FPN level. Each "
"bounding box has four coordinate values and the layout is " "bounding box has four coordinate values and the layout is "
...@@ -571,18 +570,20 @@ class RetinanetDetectionOutputOpMaker ...@@ -571,18 +570,20 @@ class RetinanetDetectionOutputOpMaker
.AsDuplicable(); .AsDuplicable();
AddInput("Scores", AddInput("Scores",
"(List) A list of tensors from multiple FPN levels. Each " "(List) A list of tensors from multiple FPN levels. Each "
"element is a 3-D Tensor with shape [N, Mi, C] represents the " "element is a 3-D phi::DenseTensor with shape [N, Mi, C] "
"represents the "
"predicted confidence from its FPN level. N is the batch size, " "predicted confidence from its FPN level. N is the batch size, "
"C is the class number (excluding background), Mi is the number " "C is the class number (excluding background), Mi is the number "
"of bounding boxes from i-th FPN level. For each bounding box, " "of bounding boxes from i-th FPN level. For each bounding box, "
"there are total C scores.") "there are total C scores.")
.AsDuplicable(); .AsDuplicable();
AddInput("Anchors", AddInput(
"(List) A list of tensors from multiple FPN levels. Each" "Anchors",
"element is a 2-D Tensor with shape [Mi, 4] represents the " "(List) A list of tensors from multiple FPN levels. Each"
"locations of Mi anchor boxes from i-th FPN level. Each " "element is a 2-D phi::DenseTensor with shape [Mi, 4] represents the "
"bounding box has four coordinate values and the layout is " "locations of Mi anchor boxes from i-th FPN level. Each "
"[xmin, ymin, xmax, ymax].") "bounding box has four coordinate values and the layout is "
"[xmin, ymin, xmax, ymax].")
.AsDuplicable(); .AsDuplicable();
AddInput("ImInfo", AddInput("ImInfo",
"(phi::DenseTensor) A 2-D phi::DenseTensor with shape [N, 3] " "(phi::DenseTensor) A 2-D phi::DenseTensor with shape [N, 3] "
......
...@@ -22,8 +22,6 @@ limitations under the License. */ ...@@ -22,8 +22,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
template <typename T> template <typename T>
bool GT_E(T a, T b) { bool GT_E(T a, T b) {
return (a > b) || fabs(a - b) < 1e-4; return (a > b) || fabs(a - b) < 1e-4;
...@@ -600,7 +598,7 @@ class ROIPerspectiveTransformOpMaker ...@@ -600,7 +598,7 @@ class ROIPerspectiveTransformOpMaker
public: public:
void Make() override { void Make() override {
AddInput("X", AddInput("X",
"(Tensor), " "(phi::DenseTensor), "
"the input of ROIPerspectiveTransformOp. " "the input of ROIPerspectiveTransformOp. "
"The format of input tensor is NCHW. Where N is batch size, " "The format of input tensor is NCHW. Where N is batch size, "
"C is the number of input channels, " "C is the number of input channels, "
...@@ -617,28 +615,28 @@ class ROIPerspectiveTransformOpMaker ...@@ -617,28 +615,28 @@ class ROIPerspectiveTransformOpMaker
"(x4, y4) is the bottom left coordinates."); "(x4, y4) is the bottom left coordinates.");
AddOutput( AddOutput(
"Out", "Out",
"(Tensor), " "(phi::DenseTensor), "
"The output of ROIPerspectiveTransformOp is a 4-D tensor with shape " "The output of ROIPerspectiveTransformOp is a 4-D tensor with shape "
"(num_rois, channels, transformed_h, transformed_w)."); "(num_rois, channels, transformed_h, transformed_w).");
AddOutput("Mask", AddOutput("Mask",
"(Tensor), " "(phi::DenseTensor), "
"The output mask of ROIPerspectiveTransformOp is a 4-D tensor " "The output mask of ROIPerspectiveTransformOp is a 4-D tensor "
"with shape " "with shape "
"(num_rois, 1, transformed_h, transformed_w)."); "(num_rois, 1, transformed_h, transformed_w).");
AddOutput("TransformMatrix", AddOutput("TransformMatrix",
"(Tensor), " "(phi::DenseTensor), "
"The output transform matrix of ROIPerspectiveTransformOp is a " "The output transform matrix of ROIPerspectiveTransformOp is a "
"1-D tensor with shape " "1-D tensor with shape "
"(num_rois, 9)."); "(num_rois, 9).");
AddOutput("Out2InIdx", AddOutput("Out2InIdx",
"(Tensor), " "(phi::DenseTensor), "
"An intermediate tensor used to map indexes of input feature map " "An intermediate tensor used to map indexes of input feature map "
"and indexes of output feature map." "and indexes of output feature map."
"The shape of the tensor is [out_size, 4] and out_size is the " "The shape of the tensor is [out_size, 4] and out_size is the "
"number of elements in output feature map.") "number of elements in output feature map.")
.AsIntermediate(); .AsIntermediate();
AddOutput("Out2InWeights", AddOutput("Out2InWeights",
"(Tensor), " "(phi::DenseTensor), "
"An intermediate tensor used to record the weights of bilinear " "An intermediate tensor used to record the weights of bilinear "
"interpolatein for each element in output. The shape of the " "interpolatein for each element in output. The shape of the "
"tensor is [out_size, 4] and out_size is the number of elements " "tensor is [out_size, 4] and out_size is the number of elements "
......
...@@ -21,7 +21,6 @@ limitations under the License. */ ...@@ -21,7 +21,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
template <typename T, template <typename T,
int MajorType = Eigen::RowMajor, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex> typename IndexType = Eigen::DenseIndex>
...@@ -113,11 +112,12 @@ void AppendRpns(phi::DenseTensor* out, ...@@ -113,11 +112,12 @@ void AppendRpns(phi::DenseTensor* out,
} }
template <typename T> template <typename T>
std::vector<Tensor> FilterStraddleAnchor(const phi::CPUContext& context, std::vector<phi::DenseTensor> FilterStraddleAnchor(
const phi::DenseTensor* anchor, const phi::CPUContext& context,
const float rpn_straddle_thresh, const phi::DenseTensor* anchor,
T im_height, const float rpn_straddle_thresh,
T im_width) { T im_height,
T im_width) {
std::vector<int> inds_inside; std::vector<int> inds_inside;
int anchor_num = anchor->dims()[0]; int anchor_num = anchor->dims()[0];
auto* anchor_data = anchor->data<T>(); auto* anchor_data = anchor->data<T>();
...@@ -138,25 +138,25 @@ std::vector<Tensor> FilterStraddleAnchor(const phi::CPUContext& context, ...@@ -138,25 +138,25 @@ std::vector<Tensor> FilterStraddleAnchor(const phi::CPUContext& context,
} }
} }
int inside_num = inds_inside.size(); int inside_num = inds_inside.size();
Tensor inds_inside_t; phi::DenseTensor inds_inside_t;
int* inds_inside_data = int* inds_inside_data =
inds_inside_t.mutable_data<int>({inside_num}, context.GetPlace()); inds_inside_t.mutable_data<int>({inside_num}, context.GetPlace());
std::copy(inds_inside.begin(), inds_inside.end(), inds_inside_data); std::copy(inds_inside.begin(), inds_inside.end(), inds_inside_data);
Tensor inside_anchor_t; phi::DenseTensor inside_anchor_t;
T* inside_anchor_data = T* inside_anchor_data =
inside_anchor_t.mutable_data<T>({inside_num, 4}, context.GetPlace()); inside_anchor_t.mutable_data<T>({inside_num, 4}, context.GetPlace());
Gather<T>( Gather<T>(
anchor->data<T>(), 4, inds_inside_data, inside_num, inside_anchor_data); anchor->data<T>(), 4, inds_inside_data, inside_num, inside_anchor_data);
std::vector<Tensor> res; std::vector<phi::DenseTensor> res;
res.emplace_back(inds_inside_t); res.emplace_back(inds_inside_t);
res.emplace_back(inside_anchor_t); res.emplace_back(inside_anchor_t);
return res; return res;
} }
template <typename T> template <typename T>
Tensor FilterCrowdGt(const phi::CPUContext& context, phi::DenseTensor FilterCrowdGt(const phi::CPUContext& context,
phi::DenseTensor* gt_boxes, phi::DenseTensor* gt_boxes,
phi::DenseTensor* is_crowd) { phi::DenseTensor* is_crowd) {
int gt_num = gt_boxes->dims()[0]; int gt_num = gt_boxes->dims()[0];
std::vector<int> not_crowd_inds; std::vector<int> not_crowd_inds;
auto* is_crowd_data = is_crowd->data<int>(); auto* is_crowd_data = is_crowd->data<int>();
...@@ -166,7 +166,7 @@ Tensor FilterCrowdGt(const phi::CPUContext& context, ...@@ -166,7 +166,7 @@ Tensor FilterCrowdGt(const phi::CPUContext& context,
} }
} }
int ncrowd_num = not_crowd_inds.size(); int ncrowd_num = not_crowd_inds.size();
Tensor ncrowd_gt_boxes; phi::DenseTensor ncrowd_gt_boxes;
T* ncrowd_gt_boxes_data = T* ncrowd_gt_boxes_data =
ncrowd_gt_boxes.mutable_data<T>({ncrowd_num, 4}, context.GetPlace()); ncrowd_gt_boxes.mutable_data<T>({ncrowd_num, 4}, context.GetPlace());
Gather<T>(gt_boxes->data<T>(), Gather<T>(gt_boxes->data<T>(),
...@@ -300,7 +300,7 @@ void ScoreAssign(const T* anchor_by_gt_overlap_data, ...@@ -300,7 +300,7 @@ void ScoreAssign(const T* anchor_by_gt_overlap_data,
} }
template <typename T> template <typename T>
std::vector<Tensor> SampleRpnFgBgGt( std::vector<phi::DenseTensor> SampleRpnFgBgGt(
const phi::CPUContext& ctx, const phi::CPUContext& ctx,
const phi::DenseTensor& anchor_by_gt_overlap, const phi::DenseTensor& anchor_by_gt_overlap,
const int rpn_batch_size_per_im, const int rpn_batch_size_per_im,
...@@ -322,7 +322,7 @@ std::vector<Tensor> SampleRpnFgBgGt( ...@@ -322,7 +322,7 @@ std::vector<Tensor> SampleRpnFgBgGt(
// Calculate the max IoU between anchors and gt boxes // Calculate the max IoU between anchors and gt boxes
// Map from anchor to gt box that has highest overlap // Map from anchor to gt box that has highest overlap
auto place = ctx.GetPlace(); auto place = ctx.GetPlace();
Tensor anchor_to_gt_max, anchor_to_gt_argmax, gt_to_anchor_max; phi::DenseTensor anchor_to_gt_max, anchor_to_gt_argmax, gt_to_anchor_max;
anchor_to_gt_max.mutable_data<T>({anchor_num}, place); anchor_to_gt_max.mutable_data<T>({anchor_num}, place);
int* argmax = anchor_to_gt_argmax.mutable_data<int>({anchor_num}, place); int* argmax = anchor_to_gt_argmax.mutable_data<int>({anchor_num}, place);
gt_to_anchor_max.mutable_data<T>({gt_num}, place); gt_to_anchor_max.mutable_data<T>({gt_num}, place);
...@@ -365,7 +365,8 @@ std::vector<Tensor> SampleRpnFgBgGt( ...@@ -365,7 +365,8 @@ std::vector<Tensor> SampleRpnFgBgGt(
for (int i = 0; i < fg_fake_num; ++i) { for (int i = 0; i < fg_fake_num; ++i) {
gt_inds.emplace_back(argmax[fg_fake[i]]); gt_inds.emplace_back(argmax[fg_fake[i]]);
} }
Tensor loc_index_t, score_index_t, tgt_lbl_t, gt_inds_t, bbox_inside_weight_t; phi::DenseTensor loc_index_t, score_index_t, tgt_lbl_t, gt_inds_t,
bbox_inside_weight_t;
int* loc_index_data = loc_index_t.mutable_data<int>({fg_fake_num}, place); int* loc_index_data = loc_index_t.mutable_data<int>({fg_fake_num}, place);
int* score_index_data = int* score_index_data =
score_index_t.mutable_data<int>({fg_num + bg_num}, place); score_index_t.mutable_data<int>({fg_num + bg_num}, place);
...@@ -381,7 +382,7 @@ std::vector<Tensor> SampleRpnFgBgGt( ...@@ -381,7 +382,7 @@ std::vector<Tensor> SampleRpnFgBgGt(
std::copy(bbox_inside_weight.begin(), std::copy(bbox_inside_weight.begin(),
bbox_inside_weight.end(), bbox_inside_weight.end(),
bbox_inside_weight_data); bbox_inside_weight_data);
std::vector<Tensor> loc_score_tgtlbl_gt; std::vector<phi::DenseTensor> loc_score_tgtlbl_gt;
loc_score_tgtlbl_gt.emplace_back(loc_index_t); loc_score_tgtlbl_gt.emplace_back(loc_index_t);
loc_score_tgtlbl_gt.emplace_back(score_index_t); loc_score_tgtlbl_gt.emplace_back(score_index_t);
loc_score_tgtlbl_gt.emplace_back(tgt_lbl_t); loc_score_tgtlbl_gt.emplace_back(tgt_lbl_t);
...@@ -455,30 +456,30 @@ class RpnTargetAssignKernel : public framework::OpKernel<T> { ...@@ -455,30 +456,30 @@ class RpnTargetAssignKernel : public framework::OpKernel<T> {
auto gt_boxes_lod = gt_boxes->lod().back(); auto gt_boxes_lod = gt_boxes->lod().back();
auto is_crowd_lod = is_crowd->lod().back(); auto is_crowd_lod = is_crowd->lod().back();
for (int i = 0; i < batch_num; ++i) { for (int i = 0; i < batch_num; ++i) {
Tensor gt_boxes_slice = phi::DenseTensor gt_boxes_slice =
gt_boxes->Slice(gt_boxes_lod[i], gt_boxes_lod[i + 1]); gt_boxes->Slice(gt_boxes_lod[i], gt_boxes_lod[i + 1]);
Tensor is_crowd_slice = phi::DenseTensor is_crowd_slice =
is_crowd->Slice(is_crowd_lod[i], is_crowd_lod[i + 1]); is_crowd->Slice(is_crowd_lod[i], is_crowd_lod[i + 1]);
Tensor im_info_slice = im_info->Slice(i, i + 1); phi::DenseTensor im_info_slice = im_info->Slice(i, i + 1);
auto* im_info_data = im_info_slice.data<T>(); auto* im_info_data = im_info_slice.data<T>();
auto im_height = im_info_data[0]; auto im_height = im_info_data[0];
auto im_width = im_info_data[1]; auto im_width = im_info_data[1];
auto im_scale = im_info_data[2]; auto im_scale = im_info_data[2];
// Filter straddle anchor // Filter straddle anchor
std::vector<Tensor> filter_output = FilterStraddleAnchor<T>( std::vector<phi::DenseTensor> filter_output = FilterStraddleAnchor<T>(
dev_ctx, anchor, rpn_straddle_thresh, im_height, im_width); dev_ctx, anchor, rpn_straddle_thresh, im_height, im_width);
Tensor inds_inside = filter_output[0]; phi::DenseTensor inds_inside = filter_output[0];
Tensor inside_anchor = filter_output[1]; phi::DenseTensor inside_anchor = filter_output[1];
// Filter crowd gt // Filter crowd gt
Tensor ncrowd_gt_boxes = phi::DenseTensor ncrowd_gt_boxes =
FilterCrowdGt<T>(dev_ctx, &gt_boxes_slice, &is_crowd_slice); FilterCrowdGt<T>(dev_ctx, &gt_boxes_slice, &is_crowd_slice);
auto ncrowd_gt_boxes_et = auto ncrowd_gt_boxes_et =
framework::EigenTensor<T, 2>::From(ncrowd_gt_boxes); framework::EigenTensor<T, 2>::From(ncrowd_gt_boxes);
ncrowd_gt_boxes_et = ncrowd_gt_boxes_et * im_scale; ncrowd_gt_boxes_et = ncrowd_gt_boxes_et * im_scale;
Tensor anchor_by_gt_overlap; phi::DenseTensor anchor_by_gt_overlap;
anchor_by_gt_overlap.mutable_data<T>( anchor_by_gt_overlap.mutable_data<T>(
{inside_anchor.dims()[0], ncrowd_gt_boxes.dims()[0]}, place); {inside_anchor.dims()[0], ncrowd_gt_boxes.dims()[0]}, place);
BboxOverlaps<T>(inside_anchor, ncrowd_gt_boxes, &anchor_by_gt_overlap); BboxOverlaps<T>(inside_anchor, ncrowd_gt_boxes, &anchor_by_gt_overlap);
...@@ -492,16 +493,16 @@ class RpnTargetAssignKernel : public framework::OpKernel<T> { ...@@ -492,16 +493,16 @@ class RpnTargetAssignKernel : public framework::OpKernel<T> {
engine, engine,
use_random); use_random);
Tensor sampled_loc_index = loc_score_tgtlbl_gt[0]; phi::DenseTensor sampled_loc_index = loc_score_tgtlbl_gt[0];
Tensor sampled_score_index = loc_score_tgtlbl_gt[1]; phi::DenseTensor sampled_score_index = loc_score_tgtlbl_gt[1];
Tensor sampled_tgtlbl = loc_score_tgtlbl_gt[2]; phi::DenseTensor sampled_tgtlbl = loc_score_tgtlbl_gt[2];
Tensor sampled_gt_index = loc_score_tgtlbl_gt[3]; phi::DenseTensor sampled_gt_index = loc_score_tgtlbl_gt[3];
Tensor sampled_bbox_inside_weight = loc_score_tgtlbl_gt[4]; phi::DenseTensor sampled_bbox_inside_weight = loc_score_tgtlbl_gt[4];
int loc_num = sampled_loc_index.dims()[0]; int loc_num = sampled_loc_index.dims()[0];
int score_num = sampled_score_index.dims()[0]; int score_num = sampled_score_index.dims()[0];
// unmap to all anchor // unmap to all anchor
Tensor sampled_loc_index_unmap, sampled_score_index_unmap; phi::DenseTensor sampled_loc_index_unmap, sampled_score_index_unmap;
sampled_loc_index_unmap.mutable_data<int>({loc_num}, place); sampled_loc_index_unmap.mutable_data<int>({loc_num}, place);
sampled_score_index_unmap.mutable_data<int>({score_num}, place); sampled_score_index_unmap.mutable_data<int>({score_num}, place);
Gather<int>(inds_inside.data<int>(), Gather<int>(inds_inside.data<int>(),
...@@ -516,7 +517,7 @@ class RpnTargetAssignKernel : public framework::OpKernel<T> { ...@@ -516,7 +517,7 @@ class RpnTargetAssignKernel : public framework::OpKernel<T> {
sampled_score_index_unmap.data<int>()); sampled_score_index_unmap.data<int>());
// get target bbox deltas // get target bbox deltas
Tensor sampled_anchor, sampled_gt, sampled_tgt_bbox; phi::DenseTensor sampled_anchor, sampled_gt, sampled_tgt_bbox;
auto* sampled_anchor_data = auto* sampled_anchor_data =
sampled_anchor.mutable_data<T>({loc_num, 4}, place); sampled_anchor.mutable_data<T>({loc_num, 4}, place);
auto* sampled_gt_data = sampled_gt.mutable_data<T>({loc_num, 4}, place); auto* sampled_gt_data = sampled_gt.mutable_data<T>({loc_num, 4}, place);
...@@ -859,10 +860,11 @@ class RetinanetTargetAssignOp : public framework::OperatorWithKernel { ...@@ -859,10 +860,11 @@ class RetinanetTargetAssignOp : public framework::OperatorWithKernel {
}; };
template <typename T> template <typename T>
std::vector<Tensor> FilterCrowdGtBoxLabel(const phi::CPUContext& context, std::vector<phi::DenseTensor> FilterCrowdGtBoxLabel(
phi::DenseTensor* gt_boxes, const phi::CPUContext& context,
phi::DenseTensor* gt_labels, phi::DenseTensor* gt_boxes,
phi::DenseTensor* is_crowd) { phi::DenseTensor* gt_labels,
phi::DenseTensor* is_crowd) {
int gt_num = gt_boxes->dims()[0]; int gt_num = gt_boxes->dims()[0];
std::vector<int> not_crowd_inds; std::vector<int> not_crowd_inds;
auto* is_crowd_data = is_crowd->data<int>(); auto* is_crowd_data = is_crowd->data<int>();
...@@ -872,7 +874,7 @@ std::vector<Tensor> FilterCrowdGtBoxLabel(const phi::CPUContext& context, ...@@ -872,7 +874,7 @@ std::vector<Tensor> FilterCrowdGtBoxLabel(const phi::CPUContext& context,
} }
} }
int ncrowd_num = not_crowd_inds.size(); int ncrowd_num = not_crowd_inds.size();
Tensor ncrowd_gt_boxes, ncrowd_gt_labels; phi::DenseTensor ncrowd_gt_boxes, ncrowd_gt_labels;
T* ncrowd_gt_boxes_data = T* ncrowd_gt_boxes_data =
ncrowd_gt_boxes.mutable_data<T>({ncrowd_num, 4}, context.GetPlace()); ncrowd_gt_boxes.mutable_data<T>({ncrowd_num, 4}, context.GetPlace());
int* ncrowd_gt_labels_data = int* ncrowd_gt_labels_data =
...@@ -887,19 +889,20 @@ std::vector<Tensor> FilterCrowdGtBoxLabel(const phi::CPUContext& context, ...@@ -887,19 +889,20 @@ std::vector<Tensor> FilterCrowdGtBoxLabel(const phi::CPUContext& context,
not_crowd_inds.data(), not_crowd_inds.data(),
ncrowd_num, ncrowd_num,
ncrowd_gt_labels_data); ncrowd_gt_labels_data);
std::vector<Tensor> res; std::vector<phi::DenseTensor> res;
res.emplace_back(ncrowd_gt_boxes); res.emplace_back(ncrowd_gt_boxes);
res.emplace_back(ncrowd_gt_labels); res.emplace_back(ncrowd_gt_labels);
return res; return res;
} }
template <typename T> template <typename T>
std::vector<Tensor> GetAllFgBgGt(const phi::CPUContext& ctx, std::vector<phi::DenseTensor> GetAllFgBgGt(
const phi::DenseTensor& anchor_by_gt_overlap, const phi::CPUContext& ctx,
const phi::DenseTensor& ncrowd_gt_labels, const phi::DenseTensor& anchor_by_gt_overlap,
const float positive_overlap, const phi::DenseTensor& ncrowd_gt_labels,
const float negative_overlap, const float positive_overlap,
std::minstd_rand engine) { const float negative_overlap,
std::minstd_rand engine) {
auto* anchor_by_gt_overlap_data = anchor_by_gt_overlap.data<T>(); auto* anchor_by_gt_overlap_data = anchor_by_gt_overlap.data<T>();
int anchor_num = anchor_by_gt_overlap.dims()[0]; int anchor_num = anchor_by_gt_overlap.dims()[0];
int gt_num = anchor_by_gt_overlap.dims()[1]; int gt_num = anchor_by_gt_overlap.dims()[1];
...@@ -913,7 +916,7 @@ std::vector<Tensor> GetAllFgBgGt(const phi::CPUContext& ctx, ...@@ -913,7 +916,7 @@ std::vector<Tensor> GetAllFgBgGt(const phi::CPUContext& ctx,
// Calculate the max IoU between anchors and gt boxes // Calculate the max IoU between anchors and gt boxes
// Map from anchor to gt box that has highest overlap // Map from anchor to gt box that has highest overlap
auto place = ctx.GetPlace(); auto place = ctx.GetPlace();
Tensor anchor_to_gt_max, anchor_to_gt_argmax, gt_to_anchor_max; phi::DenseTensor anchor_to_gt_max, anchor_to_gt_argmax, gt_to_anchor_max;
anchor_to_gt_max.mutable_data<T>({anchor_num}, place); anchor_to_gt_max.mutable_data<T>({anchor_num}, place);
int* argmax = anchor_to_gt_argmax.mutable_data<int>({anchor_num}, place); int* argmax = anchor_to_gt_argmax.mutable_data<int>({anchor_num}, place);
gt_to_anchor_max.mutable_data<T>({gt_num}, place); gt_to_anchor_max.mutable_data<T>({gt_num}, place);
...@@ -961,8 +964,9 @@ std::vector<Tensor> GetAllFgBgGt(const phi::CPUContext& ctx, ...@@ -961,8 +964,9 @@ std::vector<Tensor> GetAllFgBgGt(const phi::CPUContext& ctx,
gt_inds.emplace_back(argmax[fg_fake[i]]); gt_inds.emplace_back(argmax[fg_fake[i]]);
} }
Tensor loc_index_t, score_index_t, tgt_lbl_t, gt_inds_t, bbox_inside_weight_t; phi::DenseTensor loc_index_t, score_index_t, tgt_lbl_t, gt_inds_t,
Tensor fg_num_t; bbox_inside_weight_t;
phi::DenseTensor fg_num_t;
int* loc_index_data = loc_index_t.mutable_data<int>({fg_fake_num}, place); int* loc_index_data = loc_index_t.mutable_data<int>({fg_fake_num}, place);
int* score_index_data = int* score_index_data =
score_index_t.mutable_data<int>({fg_num + bg_num}, place); score_index_t.mutable_data<int>({fg_num + bg_num}, place);
...@@ -980,7 +984,7 @@ std::vector<Tensor> GetAllFgBgGt(const phi::CPUContext& ctx, ...@@ -980,7 +984,7 @@ std::vector<Tensor> GetAllFgBgGt(const phi::CPUContext& ctx,
bbox_inside_weight.end(), bbox_inside_weight.end(),
bbox_inside_weight_data); bbox_inside_weight_data);
fg_num_data[0] = fg_fake.size() + 1; fg_num_data[0] = fg_fake.size() + 1;
std::vector<Tensor> loc_score_tgtlbl_gt; std::vector<phi::DenseTensor> loc_score_tgtlbl_gt;
loc_score_tgtlbl_gt.emplace_back(loc_index_t); loc_score_tgtlbl_gt.emplace_back(loc_index_t);
loc_score_tgtlbl_gt.emplace_back(score_index_t); loc_score_tgtlbl_gt.emplace_back(score_index_t);
loc_score_tgtlbl_gt.emplace_back(tgt_lbl_t); loc_score_tgtlbl_gt.emplace_back(tgt_lbl_t);
...@@ -1065,35 +1069,35 @@ class RetinanetTargetAssignKernel : public framework::OpKernel<T> { ...@@ -1065,35 +1069,35 @@ class RetinanetTargetAssignKernel : public framework::OpKernel<T> {
auto gt_labels_lod = gt_labels->lod().back(); auto gt_labels_lod = gt_labels->lod().back();
auto is_crowd_lod = is_crowd->lod().back(); auto is_crowd_lod = is_crowd->lod().back();
for (int i = 0; i < batch_num; ++i) { for (int i = 0; i < batch_num; ++i) {
Tensor gt_boxes_slice = phi::DenseTensor gt_boxes_slice =
gt_boxes->Slice(gt_boxes_lod[i], gt_boxes_lod[i + 1]); gt_boxes->Slice(gt_boxes_lod[i], gt_boxes_lod[i + 1]);
Tensor gt_labels_slice = phi::DenseTensor gt_labels_slice =
gt_labels->Slice(gt_labels_lod[i], gt_labels_lod[i + 1]); gt_labels->Slice(gt_labels_lod[i], gt_labels_lod[i + 1]);
Tensor is_crowd_slice = phi::DenseTensor is_crowd_slice =
is_crowd->Slice(is_crowd_lod[i], is_crowd_lod[i + 1]); is_crowd->Slice(is_crowd_lod[i], is_crowd_lod[i + 1]);
Tensor im_info_slice = im_info->Slice(i, i + 1); phi::DenseTensor im_info_slice = im_info->Slice(i, i + 1);
auto* im_info_data = im_info_slice.data<T>(); auto* im_info_data = im_info_slice.data<T>();
auto im_height = im_info_data[0]; auto im_height = im_info_data[0];
auto im_width = im_info_data[1]; auto im_width = im_info_data[1];
auto im_scale = im_info_data[2]; auto im_scale = im_info_data[2];
// Filter straddle anchor // Filter straddle anchor
std::vector<Tensor> filter_output = std::vector<phi::DenseTensor> filter_output =
FilterStraddleAnchor<T>(dev_ctx, anchor, -1, im_height, im_width); FilterStraddleAnchor<T>(dev_ctx, anchor, -1, im_height, im_width);
Tensor inds_inside = filter_output[0]; phi::DenseTensor inds_inside = filter_output[0];
Tensor inside_anchor = filter_output[1]; phi::DenseTensor inside_anchor = filter_output[1];
// Filter crowd gt // Filter crowd gt
std::vector<Tensor> ncrowd_output = FilterCrowdGtBoxLabel<T>( std::vector<phi::DenseTensor> ncrowd_output = FilterCrowdGtBoxLabel<T>(
dev_ctx, &gt_boxes_slice, &gt_labels_slice, &is_crowd_slice); dev_ctx, &gt_boxes_slice, &gt_labels_slice, &is_crowd_slice);
Tensor ncrowd_gt_boxes = ncrowd_output[0]; phi::DenseTensor ncrowd_gt_boxes = ncrowd_output[0];
Tensor ncrowd_gt_labels = ncrowd_output[1]; phi::DenseTensor ncrowd_gt_labels = ncrowd_output[1];
auto ncrowd_gt_boxes_et = auto ncrowd_gt_boxes_et =
framework::EigenTensor<T, 2>::From(ncrowd_gt_boxes); framework::EigenTensor<T, 2>::From(ncrowd_gt_boxes);
ncrowd_gt_boxes_et = ncrowd_gt_boxes_et * im_scale; ncrowd_gt_boxes_et = ncrowd_gt_boxes_et * im_scale;
Tensor anchor_by_gt_overlap; phi::DenseTensor anchor_by_gt_overlap;
anchor_by_gt_overlap.mutable_data<T>( anchor_by_gt_overlap.mutable_data<T>(
{inside_anchor.dims()[0], ncrowd_gt_boxes.dims()[0]}, place); {inside_anchor.dims()[0], ncrowd_gt_boxes.dims()[0]}, place);
BboxOverlaps<T>(inside_anchor, ncrowd_gt_boxes, &anchor_by_gt_overlap); BboxOverlaps<T>(inside_anchor, ncrowd_gt_boxes, &anchor_by_gt_overlap);
...@@ -1105,17 +1109,17 @@ class RetinanetTargetAssignKernel : public framework::OpKernel<T> { ...@@ -1105,17 +1109,17 @@ class RetinanetTargetAssignKernel : public framework::OpKernel<T> {
negative_overlap, negative_overlap,
engine); engine);
Tensor sampled_loc_index = loc_score_tgtlbl_gt[0]; phi::DenseTensor sampled_loc_index = loc_score_tgtlbl_gt[0];
Tensor sampled_score_index = loc_score_tgtlbl_gt[1]; phi::DenseTensor sampled_score_index = loc_score_tgtlbl_gt[1];
Tensor sampled_tgtlbl = loc_score_tgtlbl_gt[2]; phi::DenseTensor sampled_tgtlbl = loc_score_tgtlbl_gt[2];
Tensor sampled_gt_index = loc_score_tgtlbl_gt[3]; phi::DenseTensor sampled_gt_index = loc_score_tgtlbl_gt[3];
Tensor sampled_bbox_inside_weight = loc_score_tgtlbl_gt[4]; phi::DenseTensor sampled_bbox_inside_weight = loc_score_tgtlbl_gt[4];
Tensor sampled_fg_num = loc_score_tgtlbl_gt[5]; phi::DenseTensor sampled_fg_num = loc_score_tgtlbl_gt[5];
int loc_num = sampled_loc_index.dims()[0]; int loc_num = sampled_loc_index.dims()[0];
int score_num = sampled_score_index.dims()[0]; int score_num = sampled_score_index.dims()[0];
// unmap to all anchor // unmap to all anchor
Tensor sampled_loc_index_unmap, sampled_score_index_unmap; phi::DenseTensor sampled_loc_index_unmap, sampled_score_index_unmap;
sampled_loc_index_unmap.mutable_data<int>({loc_num}, place); sampled_loc_index_unmap.mutable_data<int>({loc_num}, place);
sampled_score_index_unmap.mutable_data<int>({score_num}, place); sampled_score_index_unmap.mutable_data<int>({score_num}, place);
Gather<int>(inds_inside.data<int>(), Gather<int>(inds_inside.data<int>(),
...@@ -1130,7 +1134,7 @@ class RetinanetTargetAssignKernel : public framework::OpKernel<T> { ...@@ -1130,7 +1134,7 @@ class RetinanetTargetAssignKernel : public framework::OpKernel<T> {
sampled_score_index_unmap.data<int>()); sampled_score_index_unmap.data<int>());
// get target bbox deltas // get target bbox deltas
Tensor sampled_anchor, sampled_gt, sampled_tgt_bbox; phi::DenseTensor sampled_anchor, sampled_gt, sampled_tgt_bbox;
auto* sampled_anchor_data = auto* sampled_anchor_data =
sampled_anchor.mutable_data<T>({loc_num, 4}, place); sampled_anchor.mutable_data<T>({loc_num, 4}, place);
auto* sampled_gt_data = sampled_gt.mutable_data<T>({loc_num, 4}, place); auto* sampled_gt_data = sampled_gt.mutable_data<T>({loc_num, 4}, place);
......
...@@ -19,8 +19,6 @@ limitations under the License. */ ...@@ -19,8 +19,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
static constexpr int kNumCUDAThreads = 512; static constexpr int kNumCUDAThreads = 512;
static constexpr int kNumMaxinumNumBlocks = 4096; static constexpr int kNumMaxinumNumBlocks = 4096;
...@@ -123,10 +121,10 @@ template <typename DeviceContext, typename T> ...@@ -123,10 +121,10 @@ template <typename DeviceContext, typename T>
class GPUSigmoidFocalLossKernel : public framework::OpKernel<T> { class GPUSigmoidFocalLossKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext &context) const override { void Compute(const framework::ExecutionContext &context) const override {
const Tensor *X = context.Input<phi::DenseTensor>("X"); const phi::DenseTensor *X = context.Input<phi::DenseTensor>("X");
const Tensor *Labels = context.Input<phi::DenseTensor>("Label"); const phi::DenseTensor *Labels = context.Input<phi::DenseTensor>("Label");
const Tensor *FgNum = context.Input<phi::DenseTensor>("FgNum"); const phi::DenseTensor *FgNum = context.Input<phi::DenseTensor>("FgNum");
Tensor *Out = context.Output<phi::DenseTensor>("Out"); phi::DenseTensor *Out = context.Output<phi::DenseTensor>("Out");
T gamma = static_cast<T>(context.Attr<float>("gamma")); T gamma = static_cast<T>(context.Attr<float>("gamma"));
T alpha = static_cast<T>(context.Attr<float>("alpha")); T alpha = static_cast<T>(context.Attr<float>("alpha"));
auto x_dims = X->dims(); auto x_dims = X->dims();
...@@ -154,12 +152,13 @@ template <typename DeviceContext, typename T> ...@@ -154,12 +152,13 @@ template <typename DeviceContext, typename T>
class GPUSigmoidFocalLossGradKernel : public framework::OpKernel<T> { class GPUSigmoidFocalLossGradKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext &context) const override { void Compute(const framework::ExecutionContext &context) const override {
const Tensor *X = context.Input<phi::DenseTensor>("X"); const phi::DenseTensor *X = context.Input<phi::DenseTensor>("X");
const Tensor *Labels = context.Input<phi::DenseTensor>("Label"); const phi::DenseTensor *Labels = context.Input<phi::DenseTensor>("Label");
const Tensor *FgNum = context.Input<phi::DenseTensor>("FgNum"); const phi::DenseTensor *FgNum = context.Input<phi::DenseTensor>("FgNum");
const Tensor *dOut = const phi::DenseTensor *dOut =
context.Input<phi::DenseTensor>(framework::GradVarName("Out")); context.Input<phi::DenseTensor>(framework::GradVarName("Out"));
Tensor *dX = context.Output<phi::DenseTensor>(framework::GradVarName("X")); phi::DenseTensor *dX =
context.Output<phi::DenseTensor>(framework::GradVarName("X"));
auto dx_data = dX->mutable_data<T>(context.GetPlace()); auto dx_data = dX->mutable_data<T>(context.GetPlace());
T gamma = static_cast<T>(context.Attr<float>("gamma")); T gamma = static_cast<T>(context.Attr<float>("gamma"));
T alpha = static_cast<T>(context.Attr<float>("alpha")); T alpha = static_cast<T>(context.Attr<float>("alpha"));
......
...@@ -22,16 +22,14 @@ limitations under the License. */ ...@@ -22,16 +22,14 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
class SigmoidFocalLossKernel : public framework::OpKernel<T> { class SigmoidFocalLossKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext &context) const override { void Compute(const framework::ExecutionContext &context) const override {
const Tensor *X = context.Input<phi::DenseTensor>("X"); const phi::DenseTensor *X = context.Input<phi::DenseTensor>("X");
const Tensor *Labels = context.Input<phi::DenseTensor>("Label"); const phi::DenseTensor *Labels = context.Input<phi::DenseTensor>("Label");
const Tensor *FgNum = context.Input<phi::DenseTensor>("FgNum"); const phi::DenseTensor *FgNum = context.Input<phi::DenseTensor>("FgNum");
Tensor *Out = context.Output<phi::DenseTensor>("Out"); phi::DenseTensor *Out = context.Output<phi::DenseTensor>("Out");
T gamma = static_cast<T>(context.Attr<float>("gamma")); T gamma = static_cast<T>(context.Attr<float>("gamma"));
T alpha = static_cast<T>(context.Attr<float>("alpha")); T alpha = static_cast<T>(context.Attr<float>("alpha"));
auto out_data = Out->mutable_data<T>(context.GetPlace()); auto out_data = Out->mutable_data<T>(context.GetPlace());
...@@ -79,12 +77,13 @@ template <typename DeviceContext, typename T> ...@@ -79,12 +77,13 @@ template <typename DeviceContext, typename T>
class SigmoidFocalLossGradKernel : public framework::OpKernel<T> { class SigmoidFocalLossGradKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext &context) const override { void Compute(const framework::ExecutionContext &context) const override {
const Tensor *X = context.Input<phi::DenseTensor>("X"); const phi::DenseTensor *X = context.Input<phi::DenseTensor>("X");
const Tensor *Labels = context.Input<phi::DenseTensor>("Label"); const phi::DenseTensor *Labels = context.Input<phi::DenseTensor>("Label");
const Tensor *FgNum = context.Input<phi::DenseTensor>("FgNum"); const phi::DenseTensor *FgNum = context.Input<phi::DenseTensor>("FgNum");
const Tensor *dOut = const phi::DenseTensor *dOut =
context.Input<phi::DenseTensor>(framework::GradVarName("Out")); context.Input<phi::DenseTensor>(framework::GradVarName("Out"));
Tensor *dX = context.Output<phi::DenseTensor>(framework::GradVarName("X")); phi::DenseTensor *dX =
context.Output<phi::DenseTensor>(framework::GradVarName("X"));
auto dx_data = dX->mutable_data<T>(context.GetPlace()); auto dx_data = dX->mutable_data<T>(context.GetPlace());
T gamma = static_cast<T>(context.Attr<float>("gamma")); T gamma = static_cast<T>(context.Attr<float>("gamma"));
T alpha = static_cast<T>(context.Attr<float>("alpha")); T alpha = static_cast<T>(context.Attr<float>("alpha"));
......
...@@ -77,7 +77,7 @@ class YoloBoxMLUKernel : public framework::OpKernel<T> { ...@@ -77,7 +77,7 @@ class YoloBoxMLUKernel : public framework::OpKernel<T> {
MLUOpTensorDesc x_desc(*x, MLUOP_LAYOUT_ARRAY, ToMluOpDataType<T>()); MLUOpTensorDesc x_desc(*x, MLUOP_LAYOUT_ARRAY, ToMluOpDataType<T>());
MLUOpTensorDesc img_size_desc( MLUOpTensorDesc img_size_desc(
*img_size, MLUOP_LAYOUT_ARRAY, ToMluOpDataType<int32_t>()); *img_size, MLUOP_LAYOUT_ARRAY, ToMluOpDataType<int32_t>());
Tensor anchors_temp(framework::TransToPhiDataType(VT::INT32)); phi::DenseTensor anchors_temp(framework::TransToPhiDataType(VT::INT32));
anchors_temp.Resize({size}); anchors_temp.Resize({size});
paddle::framework::TensorFromVector( paddle::framework::TensorFromVector(
anchors, ctx.device_context(), &anchors_temp); anchors, ctx.device_context(), &anchors_temp);
......
...@@ -19,8 +19,6 @@ limitations under the License. */ ...@@ -19,8 +19,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
class DetectionMAPOp : public framework::OperatorWithKernel { class DetectionMAPOp : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
......
...@@ -21,8 +21,6 @@ limitations under the License. */ ...@@ -21,8 +21,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
class DGCClipByNormKernel : public framework::OpKernel<T> { class DGCClipByNormKernel : public framework::OpKernel<T> {
public: public:
......
...@@ -18,8 +18,6 @@ limitations under the License. */ ...@@ -18,8 +18,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
template <typename T> template <typename T>
class DropoutMLUKernel : public framework::OpKernel<T> { class DropoutMLUKernel : public framework::OpKernel<T> {
public: public:
...@@ -106,8 +104,8 @@ class DropoutMLUKernel : public framework::OpKernel<T> { ...@@ -106,8 +104,8 @@ class DropoutMLUKernel : public framework::OpKernel<T> {
} }
// In downgrade_in_infer mode, need to multiply (1.0f - dropout_prob). // In downgrade_in_infer mode, need to multiply (1.0f - dropout_prob).
Tensor scale_tensor(x->dtype()); phi::DenseTensor scale_tensor(x->dtype());
Tensor bias_tensor(x->dtype()); phi::DenseTensor bias_tensor(x->dtype());
scale_tensor.mutable_data<T>({1}, ctx.GetPlace()); scale_tensor.mutable_data<T>({1}, ctx.GetPlace());
bias_tensor.mutable_data<T>({1}, ctx.GetPlace()); bias_tensor.mutable_data<T>({1}, ctx.GetPlace());
MLUCnnlTensorDesc scale_desc(scale_tensor); MLUCnnlTensorDesc scale_desc(scale_tensor);
...@@ -157,7 +155,7 @@ class DropoutGradMLUKernel : public framework::OpKernel<T> { ...@@ -157,7 +155,7 @@ class DropoutGradMLUKernel : public framework::OpKernel<T> {
} }
// cast mask from uint8 to float32/float16 // cast mask from uint8 to float32/float16
Tensor cast_mask(grad_x->dtype()); phi::DenseTensor cast_mask(grad_x->dtype());
cast_mask.Resize(mask->dims()); cast_mask.Resize(mask->dims());
cast_mask.mutable_data<T>(ctx.GetPlace()); cast_mask.mutable_data<T>(ctx.GetPlace());
......
...@@ -23,8 +23,6 @@ limitations under the License. */ ...@@ -23,8 +23,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
class DropoutNPUKernel : public framework::OpKernel<T> { class DropoutNPUKernel : public framework::OpKernel<T> {
public: public:
...@@ -56,8 +54,8 @@ class DropoutNPUKernel : public framework::OpKernel<T> { ...@@ -56,8 +54,8 @@ class DropoutNPUKernel : public framework::OpKernel<T> {
// only achieve the default `upscale_in_train` method // only achieve the default `upscale_in_train` method
if (!is_test) { if (!is_test) {
Tensor tmp_x(x->dtype()); phi::DenseTensor tmp_x(x->dtype());
Tensor tmp_out(out->dtype()); phi::DenseTensor tmp_out(out->dtype());
tmp_x.ShareDataWith(*x); tmp_x.ShareDataWith(*x);
tmp_out.ShareDataWith(*out); tmp_out.ShareDataWith(*out);
if (x->dims().size() == 1) { if (x->dims().size() == 1) {
...@@ -80,7 +78,7 @@ class DropoutNPUKernel : public framework::OpKernel<T> { ...@@ -80,7 +78,7 @@ class DropoutNPUKernel : public framework::OpKernel<T> {
seed = ctx.Attr<bool>("fix_seed") ? ctx.Attr<int>("seed") : 0; seed = ctx.Attr<bool>("fix_seed") ? ctx.Attr<int>("seed") : 0;
} }
Tensor keep_prob_tensor(x->dtype()); phi::DenseTensor keep_prob_tensor(x->dtype());
keep_prob_tensor.mutable_data<T>({1}, ctx.GetPlace()); keep_prob_tensor.mutable_data<T>({1}, ctx.GetPlace());
FillNpuTensorWithConstant<T>(&keep_prob_tensor, FillNpuTensorWithConstant<T>(&keep_prob_tensor,
static_cast<T>(keep_prob)); static_cast<T>(keep_prob));
...@@ -89,14 +87,14 @@ class DropoutNPUKernel : public framework::OpKernel<T> { ...@@ -89,14 +87,14 @@ class DropoutNPUKernel : public framework::OpKernel<T> {
// mask used in `DropOutGenMask` NPU OP is different from // mask used in `DropOutGenMask` NPU OP is different from
// the output `Mask`. // the output `Mask`.
Tensor npu_mask(experimental::DataType::UINT8); phi::DenseTensor npu_mask(experimental::DataType::UINT8);
uint32_t length = (x->numel() + 128 - 1) / 128 * 128; uint32_t length = (x->numel() + 128 - 1) / 128 * 128;
npu_mask.Resize(phi::make_ddim({length / 8})); npu_mask.Resize(phi::make_ddim({length / 8}));
npu_mask.mutable_data<uint8_t>(ctx.GetPlace()); npu_mask.mutable_data<uint8_t>(ctx.GetPlace());
// TODO(pangyoki): `keep_prob` used in `DropOutGenMask` NPU // TODO(pangyoki): `keep_prob` used in `DropOutGenMask` NPU
// OP must be a scalar with shape[0]. At present, the shape // OP must be a scalar with shape[0]. At present, the shape
// of the `prob` Tensor of this OP is forced to be set to 0 // of the `prob` phi::DenseTensor of this OP is forced to be set to 0
// in `npu_op_runner.cc`, which needs to be optimized later. // in `npu_op_runner.cc`, which needs to be optimized later.
NpuOpRunner runner_gen_mask; NpuOpRunner runner_gen_mask;
runner_gen_mask.SetType("DropOutGenMask") runner_gen_mask.SetType("DropOutGenMask")
...@@ -116,7 +114,7 @@ class DropoutNPUKernel : public framework::OpKernel<T> { ...@@ -116,7 +114,7 @@ class DropoutNPUKernel : public framework::OpKernel<T> {
runner_dropout.Run(stream); runner_dropout.Run(stream);
// cast `out` from float/float16 to bool // cast `out` from float/float16 to bool
Tensor cast_mask(experimental::DataType::BOOL); phi::DenseTensor cast_mask(experimental::DataType::BOOL);
cast_mask.Resize(mask->dims()); cast_mask.Resize(mask->dims());
cast_mask.mutable_data<bool>(ctx.GetPlace()); cast_mask.mutable_data<bool>(ctx.GetPlace());
auto dst_dtype_bool = auto dst_dtype_bool =
...@@ -176,7 +174,7 @@ class DropoutGradNPUKernel : public framework::OpKernel<T> { ...@@ -176,7 +174,7 @@ class DropoutGradNPUKernel : public framework::OpKernel<T> {
} }
// cast mask from uint8 to float32/float16 // cast mask from uint8 to float32/float16
Tensor cast_mask(dx->dtype()); phi::DenseTensor cast_mask(dx->dtype());
cast_mask.Resize(mask->dims()); cast_mask.Resize(mask->dims());
cast_mask.mutable_data<T>(ctx.GetPlace()); cast_mask.mutable_data<T>(ctx.GetPlace());
auto dst_dtype = auto dst_dtype =
......
...@@ -16,7 +16,6 @@ limitations under the License. */ ...@@ -16,7 +16,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
template <typename T> template <typename T>
class ElementwiseAddMLUKernel : public framework::OpKernel<T> { class ElementwiseAddMLUKernel : public framework::OpKernel<T> {
......
...@@ -21,7 +21,6 @@ limitations under the License. */ ...@@ -21,7 +21,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
template <typename T> template <typename T>
class ElementwiseAddNPUKernel : public framework::OpKernel<T> { class ElementwiseAddNPUKernel : public framework::OpKernel<T> {
...@@ -53,7 +52,7 @@ class ElementwiseAddNPUKernel : public framework::OpKernel<T> { ...@@ -53,7 +52,7 @@ class ElementwiseAddNPUKernel : public framework::OpKernel<T> {
const auto& runner = NpuOpRunner("Add", {*x, *y}, {*out}, {}); const auto& runner = NpuOpRunner("Add", {*x, *y}, {*out}, {});
runner.Run(dev_ctx.stream()); runner.Run(dev_ctx.stream());
} else { } else {
Tensor transformed_x, transformed_y; phi::DenseTensor transformed_x, transformed_y;
NpuElementWiseOpBroadcast<T>( NpuElementWiseOpBroadcast<T>(
dev_ctx, x, y, axis, &transformed_x, &transformed_y); dev_ctx, x, y, axis, &transformed_x, &transformed_y);
const auto& runner = const auto& runner =
...@@ -96,7 +95,7 @@ class ElementwiseAddGradNPUKernel : public framework::OpKernel<T> { ...@@ -96,7 +95,7 @@ class ElementwiseAddGradNPUKernel : public framework::OpKernel<T> {
} }
} }
if (!reduce_axes.empty()) { if (!reduce_axes.empty()) {
Tensor tmp; phi::DenseTensor tmp;
tmp.ShareDataWith(*dx); tmp.ShareDataWith(*dx);
tmp.Resize(phi::make_ddim(dst_dims_vec)); tmp.Resize(phi::make_ddim(dst_dims_vec));
const auto& runner = const auto& runner =
...@@ -128,7 +127,7 @@ class ElementwiseAddGradNPUKernel : public framework::OpKernel<T> { ...@@ -128,7 +127,7 @@ class ElementwiseAddGradNPUKernel : public framework::OpKernel<T> {
} }
} }
if (!reduce_axes.empty()) { if (!reduce_axes.empty()) {
Tensor tmp; phi::DenseTensor tmp;
tmp.ShareDataWith(*dy); tmp.ShareDataWith(*dy);
tmp.Resize(phi::make_ddim(dst_dims_vec)); tmp.Resize(phi::make_ddim(dst_dims_vec));
const auto& runner = const auto& runner =
......
...@@ -24,7 +24,6 @@ namespace operators { ...@@ -24,7 +24,6 @@ namespace operators {
class ElementwiseDivOpDoubleGrad : public framework::OperatorWithKernel { class ElementwiseDivOpDoubleGrad : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
using Tensor = phi::DenseTensor;
void InferShape(framework::InferShapeContext* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
auto y_grad_name = framework::GradVarName("Y"); auto y_grad_name = framework::GradVarName("Y");
......
...@@ -21,8 +21,6 @@ limitations under the License. */ ...@@ -21,8 +21,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
template <typename T> template <typename T>
class ElementwiseDivMLUKernel : public framework::OpKernel<T> { class ElementwiseDivMLUKernel : public framework::OpKernel<T> {
public: public:
...@@ -66,7 +64,7 @@ class ElementwiseDivGradMLUKernel : public framework::OpKernel<T> { ...@@ -66,7 +64,7 @@ class ElementwiseDivGradMLUKernel : public framework::OpKernel<T> {
CNNL_OP_TENSOR_MUL, ToCnnlDataType<T>(), CNNL_NOT_PROPAGATE_NAN); CNNL_OP_TENSOR_MUL, ToCnnlDataType<T>(), CNNL_NOT_PROPAGATE_NAN);
// compute dout/y == 1/y * dout // compute dout/y == 1/y * dout
Tensor dout_div_y(dout->dtype()); phi::DenseTensor dout_div_y(dout->dtype());
dout_div_y.Resize(dout->dims()); dout_div_y.Resize(dout->dims());
dout_div_y.mutable_data<T>(ctx.GetPlace()); dout_div_y.mutable_data<T>(ctx.GetPlace());
MLUBinary<DIV>(ctx, MLUBinary<DIV>(ctx,
...@@ -110,7 +108,7 @@ class ElementwiseDivGradMLUKernel : public framework::OpKernel<T> { ...@@ -110,7 +108,7 @@ class ElementwiseDivGradMLUKernel : public framework::OpKernel<T> {
if (dy) { if (dy) {
// compute dy = -out * (dout/y) = -out/y * dout // compute dy = -out * (dout/y) = -out/y * dout
Tensor neg_out(out->type()); phi::DenseTensor neg_out(out->type());
neg_out.mutable_data<T>(out->dims(), ctx.GetPlace()); neg_out.mutable_data<T>(out->dims(), ctx.GetPlace());
MLUCnnlTensorDesc out_desc(*out); MLUCnnlTensorDesc out_desc(*out);
...@@ -121,7 +119,7 @@ class ElementwiseDivGradMLUKernel : public framework::OpKernel<T> { ...@@ -121,7 +119,7 @@ class ElementwiseDivGradMLUKernel : public framework::OpKernel<T> {
out_desc.get(), out_desc.get(),
GetBasePtr(&neg_out)); GetBasePtr(&neg_out));
Tensor dy_temp(y->dtype()); phi::DenseTensor dy_temp(y->dtype());
dy_temp.Resize(dout->dims()); dy_temp.Resize(dout->dims());
dy_temp.mutable_data<T>(ctx.GetPlace()); dy_temp.mutable_data<T>(ctx.GetPlace());
......
...@@ -21,8 +21,6 @@ limitations under the License. */ ...@@ -21,8 +21,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
class ElementwiseDivNPUKernel : public framework::OpKernel<T> { class ElementwiseDivNPUKernel : public framework::OpKernel<T> {
public: public:
...@@ -66,38 +64,38 @@ class ElementwiseDivGradNPUKernel : public framework::OpKernel<T> { ...@@ -66,38 +64,38 @@ class ElementwiseDivGradNPUKernel : public framework::OpKernel<T> {
if (dx) { if (dx) {
dx->mutable_data<T>(place); dx->mutable_data<T>(place);
Tensor tensor_one(y->type()); phi::DenseTensor tensor_one(y->type());
tensor_one.mutable_data<float>({1}, place); tensor_one.mutable_data<float>({1}, place);
FillNpuTensorWithConstant<float>(&tensor_one, static_cast<float>(1.0)); FillNpuTensorWithConstant<float>(&tensor_one, static_cast<float>(1.0));
// Use `Div` CANN OP to achieve `1/y` instead of `Power` CANN OP. // Use `Div` CANN OP to achieve `1/y` instead of `Power` CANN OP.
// Because `Power` will cause precision overflow, that is, `float_status` // Because `Power` will cause precision overflow, that is, `float_status`
// will be set to 1. // will be set to 1.
Tensor y_div(y->type()); phi::DenseTensor y_div(y->type());
y_div.mutable_data<T>(y->dims(), place); y_div.mutable_data<T>(y->dims(), place);
const auto& runner_one_div_y = const auto& runner_one_div_y =
NpuOpRunner("Div", {tensor_one, *y}, {y_div}, {}); NpuOpRunner("Div", {tensor_one, *y}, {y_div}, {});
runner_one_div_y.Run(stream); runner_one_div_y.Run(stream);
Tensor tensor_zeros(x->type()); phi::DenseTensor tensor_zeros(x->type());
tensor_zeros.mutable_data<T>(x->dims(), place); tensor_zeros.mutable_data<T>(x->dims(), place);
const auto& runner_tensor_zeros = const auto& runner_tensor_zeros =
NpuOpRunner("ZerosLike", {*x}, {tensor_zeros}, {}); NpuOpRunner("ZerosLike", {*x}, {tensor_zeros}, {});
runner_tensor_zeros.Run(stream); runner_tensor_zeros.Run(stream);
Tensor x_zero(experimental::DataType::BOOL); phi::DenseTensor x_zero(experimental::DataType::BOOL);
x_zero.mutable_data<bool>(x->dims(), place); x_zero.mutable_data<bool>(x->dims(), place);
const auto& runner_x_zero = const auto& runner_x_zero =
NpuOpRunner("Equal", {*x, tensor_zeros}, {x_zero}, {}); NpuOpRunner("Equal", {*x, tensor_zeros}, {x_zero}, {});
runner_x_zero.Run(stream); runner_x_zero.Run(stream);
Tensor x_nozero(experimental::DataType::BOOL); phi::DenseTensor x_nozero(experimental::DataType::BOOL);
x_nozero.mutable_data<bool>(x->dims(), place); x_nozero.mutable_data<bool>(x->dims(), place);
const auto& runner_x_nonzero = const auto& runner_x_nonzero =
NpuOpRunner("LogicalNot", {x_zero}, {x_nozero}, {}); NpuOpRunner("LogicalNot", {x_zero}, {x_nozero}, {});
runner_x_nonzero.Run(stream); runner_x_nonzero.Run(stream);
Tensor x_nozero_f(x->type()); phi::DenseTensor x_nozero_f(x->type());
x_nozero_f.mutable_data<T>(x->dims(), place); x_nozero_f.mutable_data<T>(x->dims(), place);
const auto& runner_x_nonzero_f = const auto& runner_x_nonzero_f =
NpuOpRunner("Cast", NpuOpRunner("Cast",
...@@ -106,7 +104,7 @@ class ElementwiseDivGradNPUKernel : public framework::OpKernel<T> { ...@@ -106,7 +104,7 @@ class ElementwiseDivGradNPUKernel : public framework::OpKernel<T> {
{{"dst_type", static_cast<int32_t>(0)}}); {{"dst_type", static_cast<int32_t>(0)}});
runner_x_nonzero_f.Run(stream); runner_x_nonzero_f.Run(stream);
Tensor x_grad_w(x->type()); phi::DenseTensor x_grad_w(x->type());
x_grad_w.mutable_data<T>(x->dims(), place); x_grad_w.mutable_data<T>(x->dims(), place);
const auto& runner_x_grad_w = const auto& runner_x_grad_w =
NpuOpRunner("Mul", {x_nozero_f, y_div}, {x_grad_w}, {}); NpuOpRunner("Mul", {x_nozero_f, y_div}, {x_grad_w}, {});
...@@ -120,19 +118,19 @@ class ElementwiseDivGradNPUKernel : public framework::OpKernel<T> { ...@@ -120,19 +118,19 @@ class ElementwiseDivGradNPUKernel : public framework::OpKernel<T> {
if (dy) { if (dy) {
dy->mutable_data<T>(place); dy->mutable_data<T>(place);
Tensor neg_out(out->type()); phi::DenseTensor neg_out(out->type());
neg_out.mutable_data<T>(out->dims(), place); neg_out.mutable_data<T>(out->dims(), place);
const auto& runner_neg_out = NpuOpRunner("Neg", {*out}, {neg_out}, {}); const auto& runner_neg_out = NpuOpRunner("Neg", {*out}, {neg_out}, {});
runner_neg_out.Run(stream); runner_neg_out.Run(stream);
Tensor tmp_mul(out->type()); phi::DenseTensor tmp_mul(out->type());
tmp_mul.mutable_data<T>(out->dims(), place); tmp_mul.mutable_data<T>(out->dims(), place);
const auto& runner_mul = const auto& runner_mul =
NpuOpRunner("Mul", {neg_out, *dout}, {tmp_mul}, {}); NpuOpRunner("Mul", {neg_out, *dout}, {tmp_mul}, {});
runner_mul.Run(stream); runner_mul.Run(stream);
if (dy->dims() != dout->dims()) { if (dy->dims() != dout->dims()) {
Tensor reduced_tmp_mul(y->type()); phi::DenseTensor reduced_tmp_mul(y->type());
reduced_tmp_mul.mutable_data<T>(y->dims(), place); reduced_tmp_mul.mutable_data<T>(y->dims(), place);
std::vector<int64_t> axes; std::vector<int64_t> axes;
......
...@@ -21,8 +21,6 @@ limitations under the License. */ ...@@ -21,8 +21,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
template <typename T> template <typename T>
class ElementwiseFloorDivNPUKernel : public framework::OpKernel<T> { class ElementwiseFloorDivNPUKernel : public framework::OpKernel<T> {
public: public:
......
...@@ -18,8 +18,6 @@ limitations under the License. */ ...@@ -18,8 +18,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = phi::DenseTensor;
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
class ElementwiseMaxNPUKernel : public framework::OpKernel<T> { class ElementwiseMaxNPUKernel : public framework::OpKernel<T> {
public: public:
...@@ -51,7 +49,7 @@ class ElementwiseMaxNPUKernel : public framework::OpKernel<T> { ...@@ -51,7 +49,7 @@ class ElementwiseMaxNPUKernel : public framework::OpKernel<T> {
const auto& runner = NpuOpRunner("Maximum", {*x, *y}, {*out}, {}); const auto& runner = NpuOpRunner("Maximum", {*x, *y}, {*out}, {});
runner.Run(stream); runner.Run(stream);
} else { } else {
Tensor transformed_x, transformed_y; phi::DenseTensor transformed_x, transformed_y;
NpuElementWiseOpBroadcast<T>( NpuElementWiseOpBroadcast<T>(
dev_ctx, x, y, axis, &transformed_x, &transformed_y); dev_ctx, x, y, axis, &transformed_x, &transformed_y);
const auto& runner = const auto& runner =
...@@ -85,7 +83,7 @@ class ElementwiseMaxGradNPUKernel : public framework::OpKernel<T> { ...@@ -85,7 +83,7 @@ class ElementwiseMaxGradNPUKernel : public framework::OpKernel<T> {
auto x_dims = x->dims(); auto x_dims = x->dims();
auto y_dims = y->dims(); auto y_dims = y->dims();
axis = (axis == -1 ? std::abs(x_dims.size() - y_dims.size()) : axis); axis = (axis == -1 ? std::abs(x_dims.size() - y_dims.size()) : axis);
Tensor transformed_x, transformed_y; phi::DenseTensor transformed_x, transformed_y;
NpuElementWiseOpBroadcast<T>( NpuElementWiseOpBroadcast<T>(
dev_ctx, x, y, axis, &transformed_x, &transformed_y); dev_ctx, x, y, axis, &transformed_x, &transformed_y);
...@@ -99,9 +97,9 @@ class ElementwiseMaxGradNPUKernel : public framework::OpKernel<T> { ...@@ -99,9 +97,9 @@ class ElementwiseMaxGradNPUKernel : public framework::OpKernel<T> {
if (dx && dy) { if (dx && dy) {
dx->mutable_data<T>(ctx.GetPlace()); dx->mutable_data<T>(ctx.GetPlace());
dy->mutable_data<T>(ctx.GetPlace()); dy->mutable_data<T>(ctx.GetPlace());
Tensor tmp_dx; phi::DenseTensor tmp_dx;
tmp_dx.mutable_data<T>(dout_dims, ctx.GetPlace()); tmp_dx.mutable_data<T>(dout_dims, ctx.GetPlace());
Tensor tmp_dy; phi::DenseTensor tmp_dy;
tmp_dy.mutable_data<T>(dout_dims, ctx.GetPlace()); tmp_dy.mutable_data<T>(dout_dims, ctx.GetPlace());
const auto& runner = NpuOpRunner("MaximumGrad", const auto& runner = NpuOpRunner("MaximumGrad",
...@@ -153,12 +151,12 @@ class ElementwiseMaxGradNPUKernel : public framework::OpKernel<T> { ...@@ -153,12 +151,12 @@ class ElementwiseMaxGradNPUKernel : public framework::OpKernel<T> {
} }
} else if (dx) { } else if (dx) {
Tensor zero_tensor(dout->type()); phi::DenseTensor zero_tensor(dout->type());
zero_tensor.mutable_data<T>(dout_dims, ctx.GetPlace()); zero_tensor.mutable_data<T>(dout_dims, ctx.GetPlace());
FillNpuTensorWithConstant<T>(&zero_tensor, static_cast<T>(0)); FillNpuTensorWithConstant<T>(&zero_tensor, static_cast<T>(0));
dx->mutable_data<T>(ctx.GetPlace()); dx->mutable_data<T>(ctx.GetPlace());
Tensor tmp_dx; phi::DenseTensor tmp_dx;
tmp_dx.mutable_data<T>(dout_dims, ctx.GetPlace()); tmp_dx.mutable_data<T>(dout_dims, ctx.GetPlace());
const auto& runner = NpuOpRunner("MaximumGrad", const auto& runner = NpuOpRunner("MaximumGrad",
...@@ -190,12 +188,12 @@ class ElementwiseMaxGradNPUKernel : public framework::OpKernel<T> { ...@@ -190,12 +188,12 @@ class ElementwiseMaxGradNPUKernel : public framework::OpKernel<T> {
} }
} else if (dy) { } else if (dy) {
Tensor zero_tensor(dout->type()); phi::DenseTensor zero_tensor(dout->type());
zero_tensor.mutable_data<T>(dout_dims, ctx.GetPlace()); zero_tensor.mutable_data<T>(dout_dims, ctx.GetPlace());
FillNpuTensorWithConstant<T>(&zero_tensor, static_cast<T>(0)); FillNpuTensorWithConstant<T>(&zero_tensor, static_cast<T>(0));
dy->mutable_data<T>(ctx.GetPlace()); dy->mutable_data<T>(ctx.GetPlace());
Tensor tmp_dy; phi::DenseTensor tmp_dy;
tmp_dy.mutable_data<T>(dout_dims, ctx.GetPlace()); tmp_dy.mutable_data<T>(dout_dims, ctx.GetPlace());
const auto& runner = NpuOpRunner("MaximumGrad", const auto& runner = NpuOpRunner("MaximumGrad",
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册