未验证 提交 bb48b596 编写于 作者: Y Young-Flash 提交者: GitHub

delete paddle/fluid/operators/*_mlu.* files (#52435)

上级 0e3f7ab1
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the Licnse. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/mlu/mlu_baseop.h"
namespace paddle {
namespace operators {
template <typename T>
class AbsMLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* input = ctx.Input<phi::DenseTensor>("X");
auto* output = ctx.Output<phi::DenseTensor>("Out");
output->mutable_data<T>(ctx.GetPlace());
MLUCnnlTensorDesc input_desc(*input);
MLUCnnlTensorDesc output_desc(*output);
MLUCnnl::Abs(ctx,
input_desc.get(),
GetBasePtr(input),
output_desc.get(),
GetBasePtr(output));
}
};
template <typename T>
class AbsGradMLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* x = ctx.Input<phi::DenseTensor>("X");
auto* dout = ctx.Input<phi::DenseTensor>(framework::GradVarName("Out"));
auto* dx = ctx.Output<phi::DenseTensor>(framework::GradVarName("X"));
dx->mutable_data<T>(ctx.GetPlace());
MLUCnnlTensorDesc input_desc(*x);
MLUCnnlOpTensorDesc mul_op_desc(
CNNL_OP_TENSOR_MUL, ToCnnlDataType<T>(), CNNL_NOT_PROPAGATE_NAN);
phi::DenseTensor sign_x;
sign_x.mutable_data<T>(x->dims(), ctx.GetPlace());
MLUCnnl::Sign(ctx,
input_desc.get(),
GetBasePtr(x),
input_desc.get(),
GetBasePtr(&sign_x));
MLUCnnl::OpTensor(ctx,
mul_op_desc.get(),
input_desc.get(),
GetBasePtr(&sign_x),
input_desc.get(),
GetBasePtr(dout),
input_desc.get(),
GetBasePtr(dx),
ToCnnlDataType<T>());
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_MLU_KERNEL(abs,
ops::AbsMLUKernel<float>,
ops::AbsMLUKernel<plat::float16>);
REGISTER_OP_MLU_KERNEL(abs_grad,
ops::AbsGradMLUKernel<float>,
ops::AbsGradMLUKernel<plat::float16>);
此差异已折叠。
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/mlu/mlu_baseop.h"
namespace paddle {
namespace operators {
template <typename T>
class ArgMaxMLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* x = ctx.Input<phi::DenseTensor>("X");
auto* out = ctx.Output<phi::DenseTensor>("Out");
auto axis = static_cast<int>(ctx.Attr<int64_t>("axis"));
auto dtype = ctx.Attr<int>("dtype");
const bool& flatten = ctx.Attr<bool>("flatten");
if (x->numel() == 0) return;
PADDLE_ENFORCE_EQ(
(dtype == 2 || dtype == 3),
true,
platform::errors::InvalidArgument(
"The attribute of dtype in argmax op must be [%s] or [%s], "
"but "
"received [%s]",
paddle::framework::DataTypeToString(
framework::proto::VarType::INT64),
paddle::framework::DataTypeToString(
framework::proto::VarType::INT32),
paddle::framework::DataTypeToString(
static_cast<framework::proto::VarType::Type>(dtype))));
if (axis < 0) {
framework::DDim x_dims;
x_dims = x->dims();
axis += x_dims.size();
}
phi::DenseTensor flatten_x(x->type());
flatten_x.ShareDataWith(*x);
if (flatten) {
flatten_x.Resize(phi::make_ddim({x->numel()}));
// if flatten, the axis just as 0
axis = 0;
}
std::vector<int> reduce_dims;
reduce_dims.push_back(axis);
auto out_dims = out->dims();
int out_count = out_dims[0];
for (int i = 1; i < out_dims.size(); i++) {
out_count = out_count * out_dims[i];
}
size_t indices_size_inbytes = out_count * sizeof(int32_t);
auto& dev_ctx = ctx.template device_context<MLUDeviceContext>();
phi::DenseTensor value_out =
ctx.AllocateTmpTensor<T, MLUDeviceContext>(out->dims(), dev_ctx);
MLUCnnlTensorDesc value_out_desc(value_out);
MLUCnnlTensorDesc input_desc(
flatten_x, CNNL_LAYOUT_ARRAY, ToCnnlDataType(flatten_x.dtype()));
MLUCnnlReduceDesc reduction_desc(reduce_dims,
CNNL_REDUCE_MAX,
ToCnnlDataType<T>(),
CNNL_NOT_PROPAGATE_NAN,
CNNL_REDUCE_ONLY_INDICES,
CNNL_32BIT_INDICES);
if (dtype == 2) {
out->template mutable_data<int32_t>(ctx.GetPlace());
MLUCnnl::Reduce(ctx,
true /*need_workspace*/,
reduction_desc.get(),
nullptr,
input_desc.get(),
GetBasePtr(&flatten_x),
indices_size_inbytes /*indices_size*/,
GetBasePtr(out),
nullptr,
value_out_desc.get(),
GetBasePtr(&value_out));
} else {
out->template mutable_data<int64_t>(ctx.GetPlace());
phi::DenseTensor out_int32 =
ctx.AllocateTmpTensor<int32_t, MLUDeviceContext>(out->dims(),
dev_ctx);
MLUCnnl::Reduce(ctx,
true /*need_workspace*/,
reduction_desc.get(),
nullptr,
input_desc.get(),
GetBasePtr(&flatten_x),
indices_size_inbytes /*indices_size*/,
GetBasePtr(&out_int32),
nullptr,
value_out_desc.get(),
GetBasePtr(&value_out));
// cast indices type to int64
MLUCnnlTensorDesc out_int32_desc(out_int32);
MLUCnnlTensorDesc cast_output_desc(*out);
cnnlCastDataType_t cast_type = GetCastDataType(VT::INT32, VT::INT64);
MLUCnnl::Cast(ctx,
cast_type,
out_int32_desc.get(),
GetBasePtr(&out_int32),
cast_output_desc.get(),
GetBasePtr(out));
}
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_MLU_KERNEL(arg_max,
ops::ArgMaxMLUKernel<int>,
ops::ArgMaxMLUKernel<float>,
ops::ArgMaxMLUKernel<paddle::platform::float16>);
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/mlu/mlu_baseop.h"
namespace paddle {
namespace operators {
template <typename T>
class ArgsortMLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* input = ctx.Input<phi::DenseTensor>("X");
auto* output = ctx.Output<phi::DenseTensor>("Out");
auto* indices = ctx.Output<phi::DenseTensor>("Indices");
const auto& place = ctx.GetPlace();
const auto& sorted = true;
const bool descending = ctx.Attr<bool>("descending");
// axis < 0, cacluate the real axis
int axis = static_cast<int>(ctx.Attr<int>("axis"));
if (axis < 0) {
const auto& in_dims = input->dims();
axis += in_dims.size();
}
auto in_dims = input->dims();
size_t k = in_dims[axis];
output->mutable_data<T>(place);
indices->mutable_data<int64_t>(place);
// cnnl only support int32/int16 type of indices
phi::DenseTensor indices_int32(framework::TransToPhiDataType(VT::INT32));
indices_int32.Resize(indices->dims());
indices_int32.mutable_data<int32_t>(place);
MLUCnnlTensorDesc input_desc(*input);
MLUCnnlTensorDesc values_output_desc(*output);
MLUCnnlTensorDesc indices_int32_desc(indices_int32);
MLUCnnl::TopK(ctx,
k,
axis,
descending,
sorted,
input_desc.get(),
GetBasePtr(input),
values_output_desc.get(),
GetBasePtr(output),
indices_int32_desc.get(),
GetBasePtr(&indices_int32));
// cast indices type to int64
MLUCnnlTensorDesc cast_output_desc(*indices);
cnnlCastDataType_t cast_type = GetCastDataType(VT::INT32, VT::INT64);
MLUCnnl::Cast(ctx,
cast_type,
indices_int32_desc.get(),
GetBasePtr(&indices_int32),
cast_output_desc.get(),
GetBasePtr(indices));
}
};
template <typename T>
class ArgsortGradMLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* indices = ctx.Input<phi::DenseTensor>("Indices");
auto* dx = ctx.Output<phi::DenseTensor>(framework::GradVarName("X"));
auto* dout = ctx.Input<phi::DenseTensor>(framework::GradVarName("Out"));
int axis = ctx.Attr<int>("axis");
dx->mutable_data<T>(ctx.GetPlace());
auto in_dims = indices->dims();
axis = (axis < 0) ? (in_dims.size() + axis) : axis;
if (dout->numel() == 0) return;
MLUCnnlTensorDesc dout_desc(*dout);
MLUCnnlTensorDesc indices_desc(*indices);
MLUCnnlTensorDesc dx_desc(*dx);
MLUCnnl::ScatterFunctor(ctx,
dx_desc.get(),
GetBasePtr(dx),
dout_desc.get(),
GetBasePtr(dout),
indices_desc.get(),
GetBasePtr(indices),
axis);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_MLU_KERNEL(argsort,
ops::ArgsortMLUKernel<paddle::platform::float16>,
ops::ArgsortMLUKernel<float>,
ops::ArgsortMLUKernel<int8_t>,
ops::ArgsortMLUKernel<uint8_t>,
ops::ArgsortMLUKernel<int16_t>,
ops::ArgsortMLUKernel<int>);
REGISTER_OP_MLU_KERNEL(argsort_grad,
ops::ArgsortGradMLUKernel<paddle::platform::float16>,
ops::ArgsortGradMLUKernel<float>,
ops::ArgsortGradMLUKernel<int8_t>,
ops::ArgsortGradMLUKernel<uint8_t>,
ops::ArgsortGradMLUKernel<int16_t>,
ops::ArgsortGradMLUKernel<int>);
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <string>
#include "paddle/fluid/operators/assign_op.h"
#include "paddle/fluid/operators/mlu/mlu_baseop.h"
#include "paddle/fluid/platform/float16.h"
namespace paddle {
namespace operators {
template <typename T>
class AssignMLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* x = ctx.Input<phi::DenseTensor>("X");
auto* out = ctx.Output<phi::DenseTensor>("Out");
out->mutable_data<T>(ctx.GetPlace());
MLUCnnlTensorDesc x_desc(*x);
MLUCnnlTensorDesc out_desc(*out);
MLUCnnl::Assign(
ctx, x_desc.get(), GetBasePtr(x), out_desc.get(), GetBasePtr(out));
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_MLU_KERNEL(assign,
ops::AssignMLUKernel<int>,
ops::AssignMLUKernel<float>,
ops::AssignMLUKernel<plat::float16>,
ops::AssignMLUKernel<bool>)
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/assign_value_op.h"
namespace ops = paddle::operators;
REGISTER_OP_MLU_KERNEL(assign_value,
ops::AssignValueKernel<bool>,
ops::AssignValueKernel<int>,
ops::AssignValueKernel<int64_t>,
ops::AssignValueKernel<float>);
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/amp/fp16_type_traits.h"
#include "paddle/fluid/operators/batch_norm_op.h"
#include "paddle/fluid/operators/mlu/mlu_baseop.h"
namespace paddle {
namespace operators {
template <typename T>
class MLUBatchNormOpKernel : public framework::OpKernel<T> {
using MPDType = typename details::MPTypeTrait<T>::Type;
public:
void Compute(const framework::ExecutionContext &ctx) const override {
const auto &place = ctx.GetPlace();
const float epsilon = ctx.Attr<float>("epsilon");
float momentum = ctx.Attr<float>("momentum");
const bool is_test = ctx.Attr<bool>("is_test");
const bool use_global_stats = ctx.Attr<bool>("use_global_stats");
const bool trainable_stats = ctx.Attr<bool>("trainable_statistics");
bool test_mode = is_test && (!trainable_stats);
bool global_stats = test_mode || use_global_stats;
const std::string data_layout_str = ctx.Attr<std::string>("data_layout");
DataLayout data_layout = phi::StringToDataLayout(data_layout_str);
const auto *x = ctx.Input<phi::DenseTensor>("X");
const auto &x_dims = x->dims();
PADDLE_ENFORCE_GE(
x_dims.size(),
2,
platform::errors::InvalidArgument(
"The size of input X's dimensions should be larger than 1."
"But received: the size of input X's dimensions is [%d]",
x_dims.size()));
PADDLE_ENFORCE_LE(
x_dims.size(),
5,
platform::errors::InvalidArgument(
"The size of input X's dimensions should be less than 6."
"But received: the size of input X's dimensions is [%d]",
x_dims.size()));
const int N = x_dims[0];
const int C =
(data_layout == DataLayout::kNCHW ? x_dims[1]
: x_dims[x_dims.size() - 1]);
const int sample_size = x->numel() / N / C;
const auto *running_mean = ctx.Input<phi::DenseTensor>("Mean");
const auto *running_var = ctx.Input<phi::DenseTensor>("Variance");
const auto *scale = ctx.Input<phi::DenseTensor>("Scale");
const auto *bias = ctx.Input<phi::DenseTensor>("Bias");
auto *y = ctx.Output<phi::DenseTensor>("Y");
auto *mean_out = ctx.Output<phi::DenseTensor>("MeanOut");
auto *variance_out = ctx.Output<phi::DenseTensor>("VarianceOut");
auto *saved_mean = ctx.Output<phi::DenseTensor>("SavedMean");
auto *saved_variance = ctx.Output<phi::DenseTensor>("SavedVariance");
// alloc memory
y->mutable_data<T>(place);
mean_out->mutable_data<MPDType>(place);
variance_out->mutable_data<MPDType>(place);
saved_mean->mutable_data<MPDType>(place);
saved_variance->mutable_data<MPDType>(place);
phi::DenseTensor transformed_x;
phi::DenseTensor transformed_y;
const int transformed_dim_size = 4;
const int transformed_shape[transformed_dim_size] = {N, sample_size, 1, C};
MLUCnnlTensorDesc transformed_desc(transformed_dim_size,
transformed_shape,
ToCnnlDataType<T>(),
CNNL_LAYOUT_NHWC);
MLUCnnlTensorDesc others_input_desc(*scale);
// input dimension is 2 and the format is NCHW. The input can be regarded as
// NHWC format. Don't need to transpose.
bool need_transpose =
(data_layout == DataLayout::kNCHW && x_dims.size() != 2);
if (need_transpose) {
auto &dev_ctx = ctx.template device_context<MLUDeviceContext>();
transformed_x = ctx.AllocateTmpTensor<T, MLUDeviceContext>(
framework::DDim(transformed_shape, transformed_dim_size), dev_ctx);
transformed_y = ctx.AllocateTmpTensor<T, MLUDeviceContext>(
framework::DDim(transformed_shape, transformed_dim_size), dev_ctx);
const int x_reshaped[] = {N, C, sample_size, 1};
MLUCnnlTensorDesc x_reshaped_desc(
transformed_dim_size, x_reshaped, ToCnnlDataType<T>());
const std::vector<int> perm = {0, 2, 3, 1};
MLUCnnl::Transpose(ctx,
perm,
transformed_dim_size,
x_reshaped_desc.get(),
GetBasePtr(x),
transformed_desc.get(),
GetBasePtr(&transformed_x));
} else {
transformed_x = *x;
transformed_y = *y;
}
if (ctx.HasInput("MomentumTensor")) {
const auto *mom_tensor = ctx.Input<phi::DenseTensor>("MomentumTensor");
phi::DenseTensor mom_cpu;
framework::TensorCopySync(*mom_tensor, platform::CPUPlace(), &mom_cpu);
momentum = mom_cpu.data<float>()[0];
}
MLUCnnl::FusedBatchNorm(ctx,
!global_stats,
transformed_desc.get(),
GetBasePtr(&transformed_x),
others_input_desc.get(),
GetBasePtr(scale),
GetBasePtr(bias),
GetBasePtr(running_mean),
GetBasePtr(running_var),
epsilon,
momentum,
transformed_desc.get(),
GetBasePtr(&transformed_y),
GetBasePtr(mean_out),
GetBasePtr(variance_out),
GetBasePtr(saved_mean),
GetBasePtr(saved_variance));
if (need_transpose) {
const int y_reshaped[] = {N, C, sample_size, 1};
MLUCnnlTensorDesc y_reshaped_desc(
transformed_dim_size, y_reshaped, ToCnnlDataType<T>());
const std::vector<int> perm = {0, 3, 1, 2};
MLUCnnl::Transpose(ctx,
perm,
transformed_y.dims().size(),
transformed_desc.get(),
GetBasePtr(&transformed_y),
y_reshaped_desc.get(),
GetBasePtr(y));
}
}
};
template <typename T>
class MLUBatchNormGradOpKernel : public framework::OpKernel<T> {
using MPDType = typename details::MPTypeTrait<T>::Type;
public:
void Compute(const framework::ExecutionContext &ctx) const override {
const auto *x = ctx.Input<phi::DenseTensor>("X");
const auto *d_y = ctx.Input<phi::DenseTensor>(framework::GradVarName("Y"));
const auto *scale = ctx.Input<phi::DenseTensor>("Scale");
const auto *bias = ctx.Input<phi::DenseTensor>("Bias");
const auto *saved_mean = ctx.Input<phi::DenseTensor>("SavedMean");
// SavedVariance have been reverted in forward operator
const auto *saved_inv_variance =
ctx.Input<phi::DenseTensor>("SavedVariance");
const std::string data_layout_str = ctx.Attr<std::string>("data_layout");
bool use_global_stats = ctx.Attr<bool>("use_global_stats");
const bool is_test = ctx.Attr<bool>("is_test");
const float epsilon = ctx.Attr<float>("epsilon");
DataLayout data_layout = phi::StringToDataLayout(data_layout_str);
auto *d_x = ctx.Output<phi::DenseTensor>(framework::GradVarName("X"));
auto *d_scale =
ctx.Output<phi::DenseTensor>(framework::GradVarName("Scale"));
auto *d_bias = ctx.Output<phi::DenseTensor>(framework::GradVarName("Bias"));
auto &dev_ctx = ctx.template device_context<MLUDeviceContext>();
auto d_x_tmp =
ctx.AllocateTmpTensor<T, MLUDeviceContext>(x->dims(), dev_ctx);
auto scale_grad_tmp = ctx.AllocateTmpTensor<MPDType, MLUDeviceContext>(
scale->dims(), dev_ctx);
auto bias_grad_tmp =
ctx.AllocateTmpTensor<MPDType, MLUDeviceContext>(bias->dims(), dev_ctx);
if (d_x == nullptr) {
d_x = &d_x_tmp;
}
if (d_scale == nullptr) {
d_scale = &scale_grad_tmp;
}
if (d_bias == nullptr) {
d_bias = &bias_grad_tmp;
}
const auto &place = ctx.GetPlace();
d_x->mutable_data<T>(place);
d_scale->mutable_data<MPDType>(place);
d_bias->mutable_data<MPDType>(place);
use_global_stats = is_test || use_global_stats;
const auto &x_dims = x->dims();
PADDLE_ENFORCE_GE(
x_dims.size(),
2,
platform::errors::InvalidArgument(
"The size of input X's dimensions should be larger than 1."
"But received: the size of input X's dimensions is [%d]",
x_dims.size()));
PADDLE_ENFORCE_LE(
x_dims.size(),
5,
platform::errors::InvalidArgument(
"The size of input X's dimensions should be less than 6."
"But received: the size of input X's dimensions is [%d]",
x_dims.size()));
const int N = x_dims[0];
const int C =
(data_layout == DataLayout::kNCHW ? x_dims[1]
: x_dims[x_dims.size() - 1]);
const int sample_size = x->numel() / N / C;
phi::DenseTensor transformed_d_y;
phi::DenseTensor transformed_x;
phi::DenseTensor transformed_d_x;
const int transformed_dim_size = 4;
const int transformed_shape[transformed_dim_size] = {N, sample_size, 1, C};
MLUCnnlTensorDesc transformed_desc(transformed_dim_size,
transformed_shape,
ToCnnlDataType<T>(),
CNNL_LAYOUT_NHWC);
MLUCnnlTensorDesc others_input_desc(*scale);
bool need_transpose =
(data_layout == DataLayout::kNCHW && x_dims.size() != 2);
if (need_transpose) {
transformed_d_y = ctx.AllocateTmpTensor<T, MLUDeviceContext>(
framework::DDim(transformed_shape, transformed_dim_size), dev_ctx);
transformed_x = ctx.AllocateTmpTensor<T, MLUDeviceContext>(
framework::DDim(transformed_shape, transformed_dim_size), dev_ctx);
transformed_d_x = ctx.AllocateTmpTensor<T, MLUDeviceContext>(
framework::DDim(transformed_shape, transformed_dim_size), dev_ctx);
const int org_reshaped[] = {N, C, sample_size, 1};
MLUCnnlTensorDesc org_reshaped_desc(
transformed_dim_size, org_reshaped, ToCnnlDataType<T>());
const std::vector<int> perm = {0, 2, 3, 1};
MLUCnnl::Transpose(ctx,
perm,
transformed_dim_size,
org_reshaped_desc.get(),
GetBasePtr(d_y),
transformed_desc.get(),
GetBasePtr(&transformed_d_y));
MLUCnnl::Transpose(ctx,
perm,
transformed_dim_size,
org_reshaped_desc.get(),
GetBasePtr(x),
transformed_desc.get(),
GetBasePtr(&transformed_x));
} else {
transformed_d_y = *d_y;
transformed_x = *x;
transformed_d_x = *d_x;
}
if (use_global_stats) {
const auto *running_mean = ctx.Input<phi::DenseTensor>("Mean");
const auto *running_variance = ctx.Input<phi::DenseTensor>("Variance");
MLUCnnl::FusedBatchNormGrad(ctx,
false /*is_training*/,
transformed_desc.get(),
GetBasePtr(&transformed_d_y),
transformed_desc.get(),
GetBasePtr(&transformed_x),
others_input_desc.get(),
GetBasePtr(scale),
GetBasePtr(running_mean),
GetBasePtr(running_variance),
epsilon,
transformed_desc.get(),
GetBasePtr(&transformed_d_x),
GetBasePtr(d_scale),
GetBasePtr(d_bias));
} else {
MLUCnnl::FusedBatchNormGrad(ctx,
true /*is_training*/,
transformed_desc.get(),
GetBasePtr(&transformed_d_y),
transformed_desc.get(),
GetBasePtr(&transformed_x),
others_input_desc.get(),
GetBasePtr(scale),
GetBasePtr(saved_mean),
GetBasePtr(saved_inv_variance),
epsilon,
transformed_desc.get(),
GetBasePtr(&transformed_d_x),
GetBasePtr(d_scale),
GetBasePtr(d_bias));
}
if (need_transpose) {
const int d_x_reshaped[] = {N, C, sample_size, 1};
MLUCnnlTensorDesc d_x_reshaped_desc(
transformed_dim_size, d_x_reshaped, ToCnnlDataType<T>());
const std::vector<int> perm = {0, 3, 1, 2};
MLUCnnl::Transpose(ctx,
perm,
transformed_dim_size,
transformed_desc.get(),
GetBasePtr(&transformed_d_x),
d_x_reshaped_desc.get(),
GetBasePtr(d_x));
}
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_MLU_KERNEL(batch_norm,
ops::MLUBatchNormOpKernel<float>,
ops::MLUBatchNormOpKernel<plat::float16>);
REGISTER_OP_MLU_KERNEL(batch_norm_grad,
ops::MLUBatchNormGradOpKernel<float>,
ops::MLUBatchNormGradOpKernel<plat::float16>);
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/mlu/mlu_baseop.h"
namespace paddle {
namespace operators {
template <typename T>
class BCELossMLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* x = ctx.Input<phi::DenseTensor>("X");
auto* labels = ctx.Input<phi::DenseTensor>("Label");
auto* out = ctx.Output<phi::DenseTensor>("Out");
out->mutable_data<T>(ctx.GetPlace());
MLUCnnlTensorDesc x_desc(*x);
MLUCnnlTensorDesc label_desc(*labels);
MLUCnnlTensorDesc out_desc(*out);
MLUCnnl::BceLoss(ctx,
CNNL_BCE_LOSS_NONE,
x_desc.get(),
GetBasePtr(x),
label_desc.get(),
GetBasePtr(labels),
nullptr,
nullptr,
out_desc.get(),
GetBasePtr(out));
}
};
template <typename T>
class BCELossGradMLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* x = ctx.Input<phi::DenseTensor>("X");
auto* labels = ctx.Input<phi::DenseTensor>("Label");
auto* dout = ctx.Input<phi::DenseTensor>(framework::GradVarName("Out"));
auto* dx = ctx.Output<phi::DenseTensor>(framework::GradVarName("X"));
dx->mutable_data<T>(ctx.GetPlace());
MLUCnnlTensorDesc x_desc(*x);
MLUCnnlTensorDesc label_desc(*labels);
MLUCnnlTensorDesc dout_desc(*dout);
MLUCnnl::BceLossBackward(ctx,
CNNL_BCE_LOSS_NONE,
dout_desc.get(),
GetBasePtr(dout),
x_desc.get(),
GetBasePtr(x),
label_desc.get(),
GetBasePtr(labels),
nullptr,
nullptr,
x_desc.get(),
GetBasePtr(dx));
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_MLU_KERNEL(bce_loss,
ops::BCELossMLUKernel<float>,
ops::BCELossMLUKernel<plat::float16>);
REGISTER_OP_MLU_KERNEL(bce_loss_grad,
ops::BCELossGradMLUKernel<float>,
ops::BCELossGradMLUKernel<plat::float16>);
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/mlu/mlu_baseop.h"
#include "paddle/fluid/platform/device/mlu/device_context.h"
namespace paddle {
namespace operators {
template <typename T>
class CastMLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* input = ctx.Input<phi::DenseTensor>("X");
auto* output = ctx.Output<phi::DenseTensor>("Out");
auto src_type = static_cast<VT::Type>(ctx.Attr<int>("in_dtype"));
auto dst_type = static_cast<VT::Type>(ctx.Attr<int>("out_dtype"));
auto place = ctx.GetPlace();
if (src_type == dst_type) {
auto& dev_ctx = ctx.template device_context<platform::MLUDeviceContext>();
output->mutable_data<T>(place);
framework::TensorCopy(*input, place, dev_ctx, output);
return;
}
PADDLE_ENFORCE_EQ(MLUSupportsCast(src_type, dst_type),
true,
platform::errors::InvalidArgument(
"MLU not support cast [%d] to [%d]",
framework::DataTypeToString(src_type),
framework::DataTypeToString(dst_type)));
output->mutable_data(place, framework::TransToPhiDataType(dst_type));
MLUCnnlTensorDesc input_desc(*input);
MLUCnnlTensorDesc output_desc(*output);
cnnlCastDataType_t cast_type = GetCastDataType(src_type, dst_type);
MLUCnnl::Cast(ctx,
cast_type,
input_desc.get(),
GetBasePtr(input),
output_desc.get(),
GetBasePtr(output));
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_MLU_KERNEL(cast,
ops::CastMLUKernel<float>,
ops::CastMLUKernel<int>,
ops::CastMLUKernel<int16_t>,
ops::CastMLUKernel<uint8_t>,
ops::CastMLUKernel<bool>,
ops::CastMLUKernel<int64_t>,
ops::CastMLUKernel<paddle::platform::float16>);
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/mlu/mlu_baseop.h"
namespace paddle {
namespace operators {
template <typename T>
class ClipMLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* x = ctx.Input<phi::DenseTensor>("X");
auto* out = ctx.Output<phi::DenseTensor>("Out");
auto min = static_cast<T>(ctx.Attr<float>("min"));
auto max = static_cast<T>(ctx.Attr<float>("max"));
if (ctx.HasInput("Min")) {
phi::DenseTensor min_cpu;
auto* min_tensor = ctx.Input<phi::DenseTensor>("Min");
auto* min_data = min_tensor->data<T>();
if (platform::is_mlu_place(min_tensor->place())) {
paddle::framework::TensorCopySync(
*min_tensor, platform::CPUPlace(), &min_cpu);
min_data = min_cpu.data<T>();
}
min = min_data[0];
}
if (ctx.HasInput("Max")) {
phi::DenseTensor max_cpu;
auto* max_tensor = ctx.Input<phi::DenseTensor>("Max");
auto* max_data = max_tensor->data<T>();
if (platform::is_mlu_place(max_tensor->place())) {
paddle::framework::TensorCopySync(
*max_tensor, platform::CPUPlace(), &max_cpu);
max_data = max_cpu.data<T>();
}
max = max_data[0];
}
out->mutable_data<T>(ctx.GetPlace());
MLUCnnlTensorDesc x_desc(*x);
MLUCnnlTensorDesc out_desc(*out);
MLUCnnl::Clip(ctx,
x_desc.get(),
GetBasePtr(x),
static_cast<const void*>(&min),
static_cast<const void*>(&max),
GetBasePtr(out));
}
};
template <typename T>
class ClipGradMLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* x = ctx.Input<phi::DenseTensor>("X");
auto* dout = ctx.Input<phi::DenseTensor>(framework::GradVarName("Out"));
auto* dx = ctx.Output<phi::DenseTensor>(framework::GradVarName("X"));
dx->mutable_data<T>(ctx.GetPlace());
auto* min_tensor =
ctx.HasInput("Min") ? ctx.Input<phi::DenseTensor>("Min") : nullptr;
auto* max_tensor =
ctx.HasInput("Max") ? ctx.Input<phi::DenseTensor>("Max") : nullptr;
auto min_val = ctx.Attr<float>("min");
if (min_tensor) {
phi::DenseTensor min_data;
framework::TensorCopy(
*min_tensor,
platform::CPUPlace(),
ctx.template device_context<platform::DeviceContext>(),
&min_data);
ctx.template device_context<paddle::platform::MLUDeviceContext>().Wait();
min_val = static_cast<float>(min_data.data<T>()[0]);
}
auto max_val = ctx.Attr<float>("max");
if (max_tensor) {
phi::DenseTensor max_data;
framework::TensorCopy(
*max_tensor,
platform::CPUPlace(),
ctx.template device_context<platform::DeviceContext>(),
&max_data);
ctx.template device_context<paddle::platform::MLUDeviceContext>().Wait();
max_val = static_cast<float>(max_data.data<T>()[0]);
}
MLUCnnlTensorDesc x_desc(*x);
MLUCnnlTensorDesc dx_desc(*dx);
MLUCnnlTensorDesc dout_desc(*dout);
MLUCnnl::HardtanhBackward(ctx,
x_desc.get(),
GetBasePtr(x),
dout_desc.get(),
GetBasePtr(dout),
max_val,
min_val,
dx_desc.get(),
GetBasePtr(dx));
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_MLU_KERNEL(clip,
ops::ClipMLUKernel<float>,
ops::ClipMLUKernel<plat::float16>);
REGISTER_OP_MLU_KERNEL(clip_grad,
ops::ClipGradMLUKernel<float>,
ops::ClipGradMLUKernel<plat::float16>);
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/concat_op.h"
#include "paddle/fluid/operators/mlu/mlu_baseop.h"
#include "paddle/phi/core/tensor_utils.h"
namespace paddle {
namespace operators {
template <typename T>
class ConcatMLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto ins = ctx.MultiInput<phi::DenseTensor>("X");
phi::DenseTensor* out = ctx.Output<phi::DenseTensor>("Out");
PADDLE_ENFORCE_NOT_NULL(ins[0],
platform::errors::NotFound(
"The first input tensor is not initalized."));
auto axis = ctx.Attr<int>("axis");
auto ins_size = ins.size();
bool need_resize_out_dims = false;
if (ctx.HasInput("AxisTensor")) {
auto* axis_tensor = ctx.Input<phi::DenseTensor>("AxisTensor");
axis = phi::GetVectorFromTensor<int>(axis_tensor)[0];
need_resize_out_dims = true;
}
axis = ComputeAxis(static_cast<int64_t>(axis),
static_cast<int64_t>(ins[0]->dims().size()));
if (need_resize_out_dims) {
const size_t n = ins.size();
std::vector<framework::DDim> ins_dims(n);
for (size_t i = 0; i < n; i++) {
ins_dims[i] = ins[i]->dims();
}
framework::DDim out_dims =
phi::funcs::ComputeAndCheckShape(true, ins_dims, axis);
out->Resize(out_dims);
}
const int axis_t = axis;
const int ins_size_t = ins_size;
auto place = ctx.GetPlace();
out->mutable_data<T>(place);
// mlu should do sth
// init ins tensors
std::vector<const void*> inputs;
std::vector<MLUCnnlTensorDesc> input_descs;
std::vector<cnnlTensorDescriptor_t> desc_vector;
for (size_t i = 0; i < ins_size; i++) {
input_descs.emplace_back(MLUCnnlTensorDesc(
*ins[i], CNNL_LAYOUT_ARRAY, ToCnnlDataType(ins[i]->dtype())));
desc_vector.push_back(input_descs.back().get());
inputs.push_back(GetBasePtr(ins[i]));
}
// init out tensors
MLUCnnlTensorDesc output_desc(
*out, CNNL_LAYOUT_ARRAY, ToCnnlDataType(out->dtype()));
// MLU should do sth
MLUCnnl::Concat(ctx,
ins_size_t,
axis_t,
desc_vector.data(),
inputs.data(),
output_desc.get(),
GetBasePtr(out));
}
};
template <typename T>
class ConcatGradMLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* out_grad = ctx.Input<phi::DenseTensor>(framework::GradVarName("Out"));
auto ins = ctx.MultiInput<phi::DenseTensor>("X");
auto out_var_names = ctx.OutputNames(framework::GradVarName("X"));
auto outs = ctx.MultiOutput<phi::DenseTensor>(framework::GradVarName("X"));
auto axis = ctx.Attr<int>("axis");
int split_num = ins.size();
PADDLE_ENFORCE_NOT_NULL(ins[0],
platform::errors::NotFound(
"The first input tensor is not initalized."));
if (ctx.HasInput("AxisTensor")) {
auto* axis_tensor = ctx.Input<phi::DenseTensor>("AxisTensor");
axis = phi::GetVectorFromTensor<int>(axis_tensor)[0];
}
axis = ComputeAxis(static_cast<int64_t>(axis),
static_cast<int64_t>(ins[0]->dims().size()));
PADDLE_ENFORCE_GE(axis,
0,
platform::errors::InvalidArgument(
"concat_grad: axis should be larger than or "
"equal to 0, but received axis is %d.",
axis));
PADDLE_ENFORCE_LT(
axis,
out_grad->dims().size(),
platform::errors::InvalidArgument(
"concat_grad: axis should be less than ins[0]->dims()!"
"But received axis is %d, while ins[0]->dims()"
"size is %d.",
axis,
out_grad->dims().size()));
// get output tensor that the name is not kEmptyVarName
std::vector<void*> outputs_vec;
std::vector<phi::DenseTensor> tmp_outputs_vec;
std::vector<MLUCnnlTensorDesc> output_descs;
std::vector<cnnlTensorDescriptor_t> descs_vec;
for (size_t j = 0; j < outs.size(); ++j) {
if (out_var_names[j] != framework::kEmptyVarName &&
outs[j]->numel() != 0UL) {
outs[j]->mutable_data<T>(ctx.GetPlace());
output_descs.emplace_back(MLUCnnlTensorDesc(*outs[j]));
outputs_vec.push_back(GetBasePtr(outs[j]));
} else {
phi::DenseTensor tmp_tensor;
tmp_tensor.mutable_data<T>(ins[j]->dims(), ctx.GetPlace());
tmp_outputs_vec.push_back(tmp_tensor);
output_descs.emplace_back(MLUCnnlTensorDesc(*ins[j]));
outputs_vec.push_back(GetBasePtr(&(tmp_outputs_vec.back())));
}
descs_vec.push_back(output_descs.back().get());
}
MLUCnnlTensorDesc out_grad_desc(*out_grad);
MLUCnnl::Split(ctx,
static_cast<int>(split_num),
static_cast<int>(axis),
out_grad_desc.get(),
GetBasePtr(out_grad),
descs_vec.data(),
outputs_vec.data());
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_MLU_KERNEL(concat,
ops::ConcatMLUKernel<float>,
ops::ConcatMLUKernel<paddle::platform::float16>,
ops::ConcatMLUKernel<int64_t>,
ops::ConcatMLUKernel<bool>,
ops::ConcatMLUKernel<int>,
ops::ConcatMLUKernel<uint8_t>);
REGISTER_OP_MLU_KERNEL(concat_grad,
ops::ConcatGradMLUKernel<float>,
ops::ConcatGradMLUKernel<paddle::platform::float16>,
ops::ConcatGradMLUKernel<int64_t>,
ops::ConcatGradMLUKernel<bool>,
ops::ConcatGradMLUKernel<int>,
ops::ConcatGradMLUKernel<uint8_t>);
此差异已折叠。
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/conv_transpose_op.h"
#include "paddle/fluid/operators/mlu/mlu_baseop.h"
#include "paddle/phi/kernels/cpu/conv_util.h"
namespace paddle {
namespace operators {
using DataLayout = phi::DataLayout;
template <typename T>
class Conv2DTransposeMLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
const phi::DenseTensor* input = ctx.Input<phi::DenseTensor>("Input");
const phi::DenseTensor* filter = ctx.Input<phi::DenseTensor>("Filter");
phi::DenseTensor* output = ctx.Output<phi::DenseTensor>("Output");
output->mutable_data<T>(ctx.GetPlace());
std::vector<int> output_padding =
ctx.Attr<std::vector<int>>("output_padding");
const std::vector<int> strides = ctx.Attr<std::vector<int>>("strides");
std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings");
std::vector<int> dilations = ctx.Attr<std::vector<int>>("dilations");
const std::string data_format = ctx.Attr<std::string>("data_format");
int groups = ctx.Attr<int>("groups");
const std::string padding_algorithm =
ctx.Attr<std::string>("padding_algorithm");
// check dimension
const bool channel_last = data_format == "NHWC";
auto in_dims = input->dims();
auto filter_dims = filter->dims();
auto in_dims_size = in_dims.size();
framework::DDim in_data_dims;
framework::DDim filter_data_dims;
if (channel_last) {
in_data_dims = phi::slice_ddim(in_dims, 1, in_dims.size() - 1);
} else {
in_data_dims = phi::slice_ddim(in_dims, 2, in_dims.size());
}
filter_data_dims = phi::slice_ddim(filter_dims, 2, in_dims.size());
std::vector<int> ksize = phi::vectorize<int>(filter_data_dims);
phi::UpdatePaddingAndDilation(
&paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize);
phi::DenseTensor input_tensor(input->type());
phi::DenseTensor output_tensor(output->type());
input_tensor.set_layout(DataLayout::kNHWC);
output_tensor.set_layout(DataLayout::kNHWC);
const std::vector<int> perm_to_nhwc = {0, 2, 3, 1};
if (channel_last) {
input_tensor.ShareDataWith(*input);
output_tensor.ShareDataWith(*output);
} else {
// transpose input from NCHW to NHWC
TransposeFromMLUTensor<T>(ctx,
perm_to_nhwc,
input,
&input_tensor,
true /*need_reshape_or_alloc*/);
auto output_dims = output->dims();
output_tensor.mutable_data<T>(
{output_dims[0], output_dims[2], output_dims[3], output_dims[1]},
ctx.GetPlace());
}
// transpose filter from MCHW to MHWC
phi::DenseTensor trans_filter(filter->type());
TransposeFromMLUTensor<T>(ctx,
perm_to_nhwc,
filter,
&trans_filter,
true /*need_reshape_or_alloc*/);
// construct MLU attr
cnnlTensorLayout_t data_layout = CNNL_LAYOUT_NHWC;
MLUCnnlTensorDesc input_desc(
input_tensor, data_layout, ToCnnlDataType(input_tensor.dtype()));
MLUCnnlTensorDesc filter_desc(
trans_filter, data_layout, ToCnnlDataType(trans_filter.type()));
MLUCnnlTensorDesc output_desc(
output_tensor, data_layout, ToCnnlDataType(output_tensor.dtype()));
MLUCnnlConvolutionDesc conv_desc(in_dims_size,
paddings.data(),
strides.data(),
dilations.data(),
groups,
ToCnnlDataType<T>());
MLUCnnl::ConvBackpropInput(ctx,
conv_desc.get(),
filter_desc.get(),
GetBasePtr(&trans_filter),
input_desc.get(),
GetBasePtr(&input_tensor),
output_desc.get(),
GetBasePtr(&output_tensor));
if (!channel_last) {
// transpose output from NHWC to NCHW
const std::vector<int> perm_to_nchw = {0, 3, 1, 2};
TransposeFromMLUTensor<T>(ctx,
perm_to_nchw,
&output_tensor,
output,
false /*need_reshape_or_alloc*/);
}
}
};
template <typename T>
class Conv2DTransposeGradMLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
const phi::DenseTensor* input = ctx.Input<phi::DenseTensor>("Input");
const phi::DenseTensor* filter = ctx.Input<phi::DenseTensor>("Filter");
const phi::DenseTensor* output_grad =
ctx.Input<phi::DenseTensor>(framework::GradVarName("Output"));
phi::DenseTensor* input_grad =
ctx.Output<phi::DenseTensor>(framework::GradVarName("Input"));
phi::DenseTensor* filter_grad =
ctx.Output<phi::DenseTensor>(framework::GradVarName("Filter"));
if ((!input_grad) && (!filter_grad)) return;
std::vector<int> strides = ctx.Attr<std::vector<int>>("strides");
std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings");
std::vector<int> dilations = ctx.Attr<std::vector<int>>("dilations");
const int groups = ctx.Attr<int>("groups");
std::string padding_algorithm = ctx.Attr<std::string>("padding_algorithm");
const std::string data_format = ctx.Attr<std::string>("data_format");
const phi::DataLayout data_layout = phi::StringToDataLayout(data_format);
auto in_dims = input->dims();
auto filter_dims = filter->dims();
auto in_dims_size = in_dims.size();
const bool channel_last = (data_layout == phi::DataLayout::kNHWC);
framework::DDim in_data_dims;
if (channel_last) {
in_data_dims = phi::slice_ddim(in_dims, 1, in_dims.size() - 1);
} else {
in_data_dims = phi::slice_ddim(in_dims, 2, in_dims.size());
}
framework::DDim filter_data_dims =
phi::slice_ddim(filter_dims, 2, filter_dims.size());
std::vector<int> ksize = phi::vectorize<int>(filter_data_dims);
phi::UpdatePaddingAndDilation(
&paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize);
phi::DenseTensor input_tensor(input->type());
phi::DenseTensor output_grad_tensor(output_grad->type());
output_grad_tensor.set_layout(DataLayout::kNHWC);
const std::vector<int> perm_to_nhwc = {0, 2, 3, 1};
if (channel_last) {
input_tensor.ShareDataWith(*input);
output_grad_tensor.ShareDataWith(*output_grad);
} else {
// transpose input from NCHW to NHWC
TransposeFromMLUTensor<T>(ctx,
perm_to_nhwc,
input,
&input_tensor,
true /*need_reshape_or_alloc*/);
TransposeFromMLUTensor<T>(ctx,
perm_to_nhwc,
output_grad,
&output_grad_tensor,
true /*need_reshape_or_alloc*/);
}
// transpose filter from MCHW to MHWC
phi::DenseTensor trans_filter(filter->type());
TransposeFromMLUTensor<T>(ctx,
perm_to_nhwc,
filter,
&trans_filter,
true /*need_reshape_or_alloc*/);
// MLU descs
cnnlTensorLayout_t data_layout_mlu = CNNL_LAYOUT_NHWC;
MLUCnnlTensorDesc input_desc(
input_tensor, data_layout_mlu, ToCnnlDataType(input_tensor.dtype()));
MLUCnnlTensorDesc trans_filter_desc(
trans_filter, data_layout_mlu, ToCnnlDataType(trans_filter.type()));
MLUCnnlTensorDesc output_grad_desc(
output_grad_tensor,
data_layout_mlu,
ToCnnlDataType(output_grad_tensor.dtype()));
MLUCnnlConvolutionDesc conv_desc(in_dims_size,
paddings.data(),
strides.data(),
dilations.data(),
groups,
ToCnnlDataType<T>());
if (filter_grad) {
filter_grad->mutable_data<T>(ctx.GetPlace());
phi::DenseTensor filter_grad_tensor(filter_grad->type());
// filter_grad always MCHW
// filter_grad_tensor always MHWC
auto filter_grad_dims = filter_grad->dims();
filter_grad_tensor.mutable_data<T>({filter_grad_dims[0],
filter_grad_dims[2],
filter_grad_dims[3],
filter_grad_dims[1]},
ctx.GetPlace());
//}
filter_grad_tensor.set_layout(DataLayout::kNHWC);
MLUCnnlTensorDesc filter_grad_desc(
filter_grad_tensor,
data_layout_mlu,
ToCnnlDataType(filter_grad_tensor.dtype()));
MLUCnnl::ConvBackpropFilter(ctx,
conv_desc.get(),
output_grad_desc.get(),
GetBasePtr(output_grad),
input_desc.get(),
GetBasePtr(&input_tensor),
filter_grad_desc.get(),
GetBasePtr(&filter_grad_tensor));
// transpose output from MHWC to MCHW
const std::vector<int> perm_to_mchw = {0, 3, 1, 2};
TransposeFromMLUTensor<T>(ctx,
perm_to_mchw,
&filter_grad_tensor,
filter_grad,
false /*need_reshape_or_alloc*/);
}
if (input_grad) {
input_grad->mutable_data<T>(ctx.GetPlace());
phi::DenseTensor input_grad_tensor(input_grad->type());
input_tensor.set_layout(DataLayout::kNHWC);
if (channel_last) {
input_grad_tensor.ShareDataWith(*input_grad);
} else {
auto input_grad_dims = input_grad->dims();
input_grad_tensor.mutable_data<T>({input_grad_dims[0],
input_grad_dims[2],
input_grad_dims[3],
input_grad_dims[1]},
ctx.GetPlace());
}
MLUCnnlTensorDesc input_grad_desc(
input_grad_tensor,
data_layout_mlu,
ToCnnlDataType(input_grad_tensor.dtype()));
MLUCnnl::ConvolutionForward(ctx,
conv_desc.get(),
nullptr /*alpha*/,
nullptr /*beta*/,
nullptr /*bias_desc*/,
nullptr /*bias_ptr*/,
output_grad_desc.get(),
GetBasePtr(&output_grad_tensor),
trans_filter_desc.get(),
GetBasePtr(&trans_filter),
input_grad_desc.get(),
GetBasePtr(&input_grad_tensor));
if (!channel_last) {
// transpose output from NHWC to NCHW
const std::vector<int> perm_to_nchw = {0, 3, 1, 2};
TransposeFromMLUTensor<T>(ctx,
perm_to_nchw,
&input_grad_tensor,
input_grad,
false /*need_reshape_or_alloc*/);
}
}
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_MLU_KERNEL(conv2d_transpose,
ops::Conv2DTransposeMLUKernel<float>,
ops::Conv2DTransposeMLUKernel<plat::float16>);
REGISTER_OP_MLU_KERNEL(conv2d_transpose_grad,
ops::Conv2DTransposeGradMLUKernel<float>,
ops::Conv2DTransposeGradMLUKernel<plat::float16>);
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/mlu/mlu_baseop.h"
namespace paddle {
namespace operators {
template <typename T>
class CumSumMLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* x = ctx.Input<phi::DenseTensor>("X");
auto* out = ctx.Output<phi::DenseTensor>("Out");
int axis = ctx.Attr<int>("axis");
bool exclusive = ctx.Attr<bool>("exclusive");
bool reverse = ctx.Attr<bool>("reverse");
bool flatten = ctx.Attr<bool>("flatten");
out->mutable_data<T>(ctx.GetPlace());
phi::DenseTensor* input_ptr = const_cast<phi::DenseTensor*>(x);
phi::DenseTensor flat_x(x->type());
if (flatten) {
PADDLE_ENFORCE_EQ(
axis,
-1,
platform::errors::InvalidArgument(
"when flatten is true, attr axis must be default %d, but got %d",
-1,
axis));
flat_x.ShareDataWith(*x);
flat_x.Resize(phi::make_ddim({x->numel()}));
input_ptr = &flat_x;
}
const int true_axis = (axis < 0) ? input_ptr->dims().size() + axis : axis;
MLUCnnlTensorDesc input_desc(*input_ptr);
MLUCnnlTensorDesc out_desc(*out);
MLUCnnl::Cumsum(ctx,
true_axis,
exclusive,
reverse,
input_desc.get(),
GetBasePtr(input_ptr),
out_desc.get(),
GetBasePtr(out));
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_MLU_KERNEL(cumsum,
ops::CumSumMLUKernel<int>,
ops::CumSumMLUKernel<float>,
ops::CumSumMLUKernel<plat::float16>);
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/mlu/mlu_baseop.h"
namespace paddle {
namespace operators {
template <typename T>
class DeformableConvMLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* input = ctx.Input<phi::DenseTensor>("Input");
auto* offset = ctx.Input<phi::DenseTensor>("Offset");
auto* mask = ctx.Input<phi::DenseTensor>("Mask");
auto* filter = ctx.Input<phi::DenseTensor>("Filter");
auto* output = ctx.Output<phi::DenseTensor>("Output");
output->mutable_data<T>(ctx.GetPlace());
const int groups = ctx.Attr<int>("groups");
const int deformable_groups = ctx.Attr<int>("deformable_groups");
const int im2col_step = ctx.Attr<int>("im2col_step");
const std::vector<int> strides = ctx.Attr<std::vector<int>>("strides");
const std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings");
const std::vector<int> dilations = ctx.Attr<std::vector<int>>("dilations");
// TODO(fwg): Remove this check when cnnl fix the bug that groups > 1.
PADDLE_ENFORCE_EQ(
groups == 1,
true,
platform::errors::InvalidArgument(
"MLU deformable_conv kernel only support groups == 1, but get %d.",
groups));
// transform paddings from {h, w} to {top, bottom, left, right}.
const std::vector<int> trans_paddings{
paddings[0], paddings[0], paddings[1], paddings[1]};
MLUCnnlDCNDesc dcn_desc(input->dims().size(),
trans_paddings.data(),
strides.data(),
dilations.data(),
deformable_groups,
groups,
im2col_step);
const std::vector<int> perm_to_nhwc = {0, 2, 3, 1};
phi::DenseTensor trans_input(input->dtype());
TransposeFromMLUTensor<T>(
ctx, perm_to_nhwc, input, &trans_input, true /*need_reshape_or_alloc*/);
phi::DenseTensor trans_offset(offset->dtype());
TransposeFromMLUTensor<T>(ctx,
perm_to_nhwc,
offset,
&trans_offset,
true /*need_reshape_or_alloc*/);
phi::DenseTensor trans_mask(mask->dtype());
TransposeFromMLUTensor<T>(
ctx, perm_to_nhwc, mask, &trans_mask, true /*need_reshape_or_alloc*/);
phi::DenseTensor trans_filter(filter->dtype());
TransposeFromMLUTensor<T>(ctx,
perm_to_nhwc,
filter,
&trans_filter,
true /*need_reshape_or_alloc*/);
phi::DenseTensor tmp_output(output->dtype());
auto output_dims = output->dims();
tmp_output.mutable_data<T>(
{output_dims[0], output_dims[2], output_dims[3], output_dims[1]},
ctx.GetPlace());
cnnlTensorLayout_t data_layout = CNNL_LAYOUT_NHWC;
MLUCnnlTensorDesc input_desc(
trans_input, data_layout, ToCnnlDataType(trans_input.dtype()));
MLUCnnlTensorDesc offset_desc(
trans_offset, data_layout, ToCnnlDataType(trans_offset.dtype()));
MLUCnnlTensorDesc mask_desc(
trans_mask, data_layout, ToCnnlDataType(trans_mask.dtype()));
MLUCnnlTensorDesc filter_desc(
trans_filter, data_layout, ToCnnlDataType(trans_filter.dtype()));
MLUCnnlTensorDesc output_desc(
tmp_output, data_layout, ToCnnlDataType(tmp_output.dtype()));
MLUCnnl::DCNForward(ctx,
dcn_desc.get(),
input_desc.get(),
GetBasePtr(&trans_input),
offset_desc.get(),
GetBasePtr(&trans_offset),
mask_desc.get(),
GetBasePtr(&trans_mask),
filter_desc.get(),
GetBasePtr(&trans_filter),
nullptr,
nullptr,
output_desc.get(),
GetBasePtr(&tmp_output));
const std::vector<int> perm_to_nchw = {0, 3, 1, 2};
TransposeFromMLUTensor<T>(ctx,
perm_to_nchw,
&tmp_output,
output,
false /*need_reshape_or_alloc*/);
}
};
template <typename T>
class DeformableConvGradMLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
const phi::DenseTensor* output_grad =
ctx.Input<phi::DenseTensor>(framework::GradVarName("Output"));
auto* input_grad =
ctx.Output<phi::DenseTensor>(framework::GradVarName("Input"));
auto* filter_grad =
ctx.Output<phi::DenseTensor>(framework::GradVarName("Filter"));
auto* offset_grad =
ctx.Output<phi::DenseTensor>(framework::GradVarName("Offset"));
auto* mask_grad =
ctx.Output<phi::DenseTensor>(framework::GradVarName("Mask"));
const phi::DenseTensor* input = ctx.Input<phi::DenseTensor>("Input");
auto* offset = ctx.Input<phi::DenseTensor>("Offset");
auto* mask = ctx.Input<phi::DenseTensor>("Mask");
auto* filter = ctx.Input<phi::DenseTensor>("Filter");
int groups = ctx.Attr<int>("groups");
int deformable_groups = ctx.Attr<int>("deformable_groups");
int im2col_step = ctx.Attr<int>("im2col_step");
std::vector<int> strides = ctx.Attr<std::vector<int>>("strides");
std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings");
std::vector<int> dilations = ctx.Attr<std::vector<int>>("dilations");
// TODO(fwg): Remove this check when cnnl fix the bug that groups > 1.
PADDLE_ENFORCE_EQ(groups == 1,
true,
platform::errors::InvalidArgument(
"MLU deformable_conv_grad kernel only support groups "
"== 1, but get %d.",
groups));
// transform paddings from {h, w} to {top, bottom, left, right}.
const std::vector<int> trans_paddings{
paddings[0], paddings[0], paddings[1], paddings[1]};
MLUCnnlDCNDesc dcn_desc(input->dims().size(),
trans_paddings.data(),
strides.data(),
dilations.data(),
deformable_groups,
groups,
im2col_step);
phi::DenseTensor tmp_input_grad;
auto input_dims = input->dims();
tmp_input_grad.mutable_data<T>(
{input_dims[0], input_dims[2], input_dims[3], input_dims[1]},
ctx.GetPlace());
phi::DenseTensor tmp_filter_grad;
auto filter_dims = filter->dims();
tmp_filter_grad.mutable_data<T>(
{filter_dims[0], filter_dims[2], filter_dims[3], filter_dims[1]},
ctx.GetPlace());
phi::DenseTensor tmp_offset_grad;
auto offset_dims = offset->dims();
tmp_offset_grad.mutable_data<T>(
{offset_dims[0], offset_dims[2], offset_dims[3], offset_dims[1]},
ctx.GetPlace());
phi::DenseTensor tmp_mask_grad;
auto mask_dims = mask->dims();
tmp_mask_grad.mutable_data<T>(
{mask_dims[0], mask_dims[2], mask_dims[3], mask_dims[1]},
ctx.GetPlace());
const std::vector<int> perm_to_nhwc = {0, 2, 3, 1};
phi::DenseTensor trans_output_grad(output_grad->dtype());
TransposeFromMLUTensor<T>(ctx,
perm_to_nhwc,
output_grad,
&trans_output_grad,
true /*need_reshape_or_alloc*/);
phi::DenseTensor trans_input(input->dtype());
TransposeFromMLUTensor<T>(
ctx, perm_to_nhwc, input, &trans_input, true /*need_reshape_or_alloc*/);
phi::DenseTensor trans_offset(offset->dtype());
TransposeFromMLUTensor<T>(ctx,
perm_to_nhwc,
offset,
&trans_offset,
true /*need_reshape_or_alloc*/);
phi::DenseTensor trans_mask(mask->dtype());
TransposeFromMLUTensor<T>(
ctx, perm_to_nhwc, mask, &trans_mask, true /*need_reshape_or_alloc*/);
phi::DenseTensor trans_filter(filter->dtype());
TransposeFromMLUTensor<T>(ctx,
perm_to_nhwc,
filter,
&trans_filter,
true /*need_reshape_or_alloc*/);
cnnlTensorLayout_t data_layout = CNNL_LAYOUT_NHWC;
MLUCnnlTensorDesc output_grad_desc(
trans_output_grad,
data_layout,
ToCnnlDataType(trans_output_grad.dtype()));
MLUCnnlTensorDesc input_desc(
trans_input, data_layout, ToCnnlDataType(trans_input.dtype()));
MLUCnnlTensorDesc offset_desc(
trans_offset, data_layout, ToCnnlDataType(trans_offset.dtype()));
MLUCnnlTensorDesc mask_desc(
trans_mask, data_layout, ToCnnlDataType(trans_mask.dtype()));
MLUCnnlTensorDesc filter_desc(
trans_filter, data_layout, ToCnnlDataType(trans_filter.dtype()));
MLUCnnl::DCNBackwardData(ctx,
dcn_desc.get(),
input_desc.get(),
GetBasePtr(&trans_input),
offset_desc.get(),
GetBasePtr(&trans_offset),
mask_desc.get(),
GetBasePtr(&trans_mask),
filter_desc.get(),
GetBasePtr(&trans_filter),
output_grad_desc.get(),
GetBasePtr(&trans_output_grad),
input_desc.get(),
GetBasePtr(&tmp_input_grad),
offset_desc.get(),
GetBasePtr(&tmp_offset_grad),
mask_desc.get(),
GetBasePtr(&tmp_mask_grad));
MLUCnnl::DCNBackwardWeight(ctx,
dcn_desc.get(),
input_desc.get(),
GetBasePtr(&trans_input),
offset_desc.get(),
GetBasePtr(&trans_offset),
mask_desc.get(),
GetBasePtr(&trans_mask),
output_grad_desc.get(),
GetBasePtr(&trans_output_grad),
filter_desc.get(),
GetBasePtr(&tmp_filter_grad),
nullptr,
nullptr);
const std::vector<int> perm_to_nchw = {0, 3, 1, 2};
if (input_grad) {
input_grad->mutable_data<T>(ctx.GetPlace());
TransposeFromMLUTensor<T>(ctx,
perm_to_nchw,
&tmp_input_grad,
input_grad,
false /*need_reshape_or_alloc*/);
}
if (filter_grad) {
filter_grad->mutable_data<T>(ctx.GetPlace());
TransposeFromMLUTensor<T>(ctx,
perm_to_nchw,
&tmp_filter_grad,
filter_grad,
false /*need_reshape_or_alloc*/);
}
if (offset_grad) {
offset_grad->mutable_data<T>(ctx.GetPlace());
TransposeFromMLUTensor<T>(ctx,
perm_to_nchw,
&tmp_offset_grad,
offset_grad,
false /*need_reshape_or_alloc*/);
}
if (mask_grad) {
mask_grad->mutable_data<T>(ctx.GetPlace());
TransposeFromMLUTensor<T>(ctx,
perm_to_nchw,
&tmp_mask_grad,
mask_grad,
false /*need_reshape_or_alloc*/);
}
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_MLU_KERNEL(deformable_conv, ops::DeformableConvMLUKernel<float>);
REGISTER_OP_MLU_KERNEL(deformable_conv_grad,
ops::DeformableConvGradMLUKernel<float>);
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/mlu/mlu_baseop.h"
namespace paddle {
namespace operators {
template <typename T>
class DropoutMLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* x = ctx.Input<phi::DenseTensor>("X");
auto* out = ctx.Output<phi::DenseTensor>("Out");
auto dropout_prob = ctx.Attr<float>("dropout_prob");
auto is_test = ctx.Attr<bool>("is_test");
auto* seed_tensor =
ctx.HasInput("Seed") ? ctx.Input<phi::DenseTensor>("Seed") : nullptr;
auto dropout_implementation =
ctx.Attr<std::string>("dropout_implementation");
const bool is_upscale = (dropout_implementation == "upscale_in_train");
out->mutable_data<T>(ctx.GetPlace());
MLUCnnlTensorDesc x_desc(*x);
MLUCnnlTensorDesc out_desc(*out);
if (is_test && is_upscale) {
// dropout op for inference: out = input.
framework::TensorCopy(
*x,
ctx.GetPlace(),
ctx.template device_context<platform::MLUDeviceContext>(),
out);
return;
} else if (!is_test) {
// dropout op for training: out = input * mask / ( 1.0 - dropout_prob ) or
// out = input * mask.
int seed_data = 0;
if (seed_tensor) {
if (platform::is_mlu_place(seed_tensor->place())) {
memory::Copy(platform::CPUPlace(),
&seed_data,
seed_tensor->place(),
seed_tensor->data<int>(),
sizeof(int));
} else {
seed_data = *(seed_tensor->data<int>());
}
} else {
seed_data = ctx.Attr<bool>("fix_seed") ? ctx.Attr<int>("seed") : 0;
}
auto* mask = ctx.Output<phi::DenseTensor>("Mask");
mask->mutable_data<uint8_t>(ctx.GetPlace());
MLUCnnlTensorDesc mask_desc(*mask);
// Special case when dropout_prob is 1.0
if (dropout_prob == 1.0f) {
auto value_t = static_cast<T>(0.0f);
MLUCnnl::Fill(ctx,
CNNL_POINTER_MODE_HOST,
&value_t,
out_desc.get(),
GetBasePtr(out));
MLUCnnl::Fill(ctx,
CNNL_POINTER_MODE_HOST,
&value_t,
mask_desc.get(),
GetBasePtr(mask));
return;
}
// create mlu random generator
const int device_id = ctx.GetPlace().GetDeviceId();
auto mlu_gen_random = GetMLURandomGenerator(ctx, device_id, seed_data);
// compute out = input * mask / ( 1.0 - dropout_prob )
MLUCnnl::FusedDropout(ctx,
mlu_gen_random->get(),
x_desc.get(),
GetBasePtr(x),
dropout_prob,
GetBasePtr(&(mlu_gen_random->get_state())),
mask_desc.get(),
GetBasePtr(mask),
out_desc.get(),
GetBasePtr(out));
if (is_upscale) {
return;
}
}
// In downgrade_in_infer mode, need to multiply (1.0f - dropout_prob).
phi::DenseTensor scale_tensor(x->dtype());
phi::DenseTensor bias_tensor(x->dtype());
scale_tensor.mutable_data<T>({1}, ctx.GetPlace());
bias_tensor.mutable_data<T>({1}, ctx.GetPlace());
MLUCnnlTensorDesc scale_desc(scale_tensor);
MLUCnnlTensorDesc bias_desc(bias_tensor);
FillMLUTensorWithHostValue(
ctx, static_cast<T>(1.0f - dropout_prob), &scale_tensor);
FillMLUTensorWithHostValue(ctx, static_cast<T>(0.0f), &bias_tensor);
MLUCnnl::Scale(ctx,
0,
is_test ? x_desc.get() : out_desc.get(),
is_test ? GetBasePtr(x) : GetBasePtr(out),
scale_desc.get(),
GetBasePtr(&scale_tensor),
bias_desc.get(),
GetBasePtr(&bias_tensor),
out_desc.get(),
GetBasePtr(out));
}
};
template <typename T>
class DropoutGradMLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE_EQ(!ctx.Attr<bool>("is_test"),
true,
platform::errors::InvalidArgument(
"GradOp is only callable when is_test is false"));
auto* grad_x = ctx.Output<phi::DenseTensor>(framework::GradVarName("X"));
auto* grad_out = ctx.Input<phi::DenseTensor>(framework::GradVarName("Out"));
auto* mask = ctx.Input<phi::DenseTensor>("Mask");
auto dropout_prob = ctx.Attr<float>("dropout_prob");
auto dropout_impl = ctx.Attr<std::string>("dropout_implementation");
grad_x->mutable_data<T>(ctx.GetPlace());
MLUCnnlTensorDesc grad_x_desc(*grad_x);
if (dropout_prob == 1.) {
auto value_t = static_cast<T>(0.0f);
MLUCnnl::Fill(ctx,
CNNL_POINTER_MODE_HOST,
&value_t,
grad_x_desc.get(),
GetBasePtr(grad_x));
return;
}
// cast mask from uint8 to float32/float16
phi::DenseTensor cast_mask(grad_x->dtype());
cast_mask.Resize(mask->dims());
cast_mask.mutable_data<T>(ctx.GetPlace());
MLUCnnlTensorDesc mask_desc(*mask);
MLUCnnlTensorDesc cast_mask_desc(cast_mask);
cnnlCastDataType_t cast_type =
GetCastDataType(framework::TransToProtoVarType(mask->dtype()),
framework::TransToProtoVarType(cast_mask.dtype()));
MLUCnnl::Cast(ctx,
cast_type,
mask_desc.get(),
GetBasePtr(mask),
cast_mask_desc.get(),
GetBasePtr(&cast_mask));
const bool is_upscale = (dropout_impl == "upscale_in_train");
const float scale = is_upscale ? (1.0f / (1.0f - dropout_prob)) : (1.0f);
auto data_type = ToCnnlDataType<T>();
MLUCnnlTensorDesc grad_out_desc(*grad_out);
MLUCnnlOpTensorDesc op_tensor_desc(
CNNL_OP_TENSOR_MUL, data_type, CNNL_NOT_PROPAGATE_NAN);
MLUCnnl::OpTensor(ctx,
op_tensor_desc.get(),
cast_mask_desc.get(),
GetBasePtr(&cast_mask),
grad_out_desc.get(),
GetBasePtr(grad_out),
grad_x_desc.get(),
GetBasePtr(grad_x),
data_type,
scale);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_MLU_KERNEL(dropout,
ops::DropoutMLUKernel<float>,
ops::DropoutMLUKernel<plat::float16>);
REGISTER_OP_MLU_KERNEL(dropout_grad,
ops::DropoutGradMLUKernel<float>,
ops::DropoutGradMLUKernel<plat::float16>);
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/operators/expand_as_v2_op.h"
#include "paddle/fluid/operators/mlu/mlu_baseop.h"
namespace paddle {
namespace operators {
template <typename T>
class ExpandAsV2MLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto rank = context.Input<phi::DenseTensor>("X")->dims().size();
auto target_shape = context.Attr<std::vector<int>>("target_shape");
auto target_rank = target_shape.size();
PADDLE_ENFORCE_GE(target_rank,
rank,
platform::errors::InvalidArgument(
"The rank (%d) of the input 'target_tensor' for "
"expand_as_v2 op must be greater than or equal to "
"the rank (%d) of the input 'x'.",
target_rank,
rank));
PADDLE_ENFORCE_GE(
rank,
1,
platform::errors::InvalidArgument("The rank (%d) of the input 'x' for "
"expand_as_v2 op must be positive.",
rank));
PADDLE_ENFORCE_LE(target_rank,
MAX_RANK_SUPPORTED,
platform::errors::InvalidArgument(
"The rank (%d) of the input 'target_tensor' for "
"expand_as_v2 op must be less than or equal to %d.",
target_rank,
MAX_RANK_SUPPORTED));
ExpandAs(context);
}
protected:
void ExpandAs(const framework::ExecutionContext& context) const {
auto* in0 = context.Input<phi::DenseTensor>("X");
auto in_dims = in0->dims();
auto target_shape = context.Attr<std::vector<int>>("target_shape");
auto vec_in_dims = phi::vectorize<int>(in_dims);
auto diff = target_shape.size() - vec_in_dims.size();
vec_in_dims.insert(vec_in_dims.begin(), diff, 1);
for (size_t i = 0; i < vec_in_dims.size(); ++i) {
PADDLE_ENFORCE_NE(target_shape[i],
0,
platform::errors::InvalidArgument(
"The value of target shape cannot be zero."));
if (vec_in_dims[i] != 1) {
PADDLE_ENFORCE_EQ(
vec_in_dims[i],
target_shape[i],
platform::errors::InvalidArgument(
"The value (%d) of the non-singleton dimension does not match"
" the corresponding value (%d) in "
"target tensor for expand_as_v2 op.",
vec_in_dims[i],
target_shape[i]));
}
}
auto* out0 = context.Output<phi::DenseTensor>("Out");
framework::DDim out_dims = phi::make_ddim(target_shape);
out0->Resize(out_dims);
out0->mutable_data<T>(context.GetPlace());
MLUCnnlTensorDesc x_desc(*in0);
MLUCnnlTensorDesc out_desc(*out0);
MLUCnnl::BroadcastTo(context,
x_desc.get(),
GetBasePtr(in0),
out_desc.get(),
GetBasePtr(out0));
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_MLU_KERNEL(expand_as_v2,
ops::ExpandAsV2MLUKernel<float>,
ops::ExpandAsV2MLUKernel<int>,
ops::ExpandAsV2MLUKernel<int64_t>,
ops::ExpandAsV2MLUKernel<int8_t>,
ops::ExpandAsV2MLUKernel<uint8_t>,
ops::ExpandAsV2MLUKernel<bool>,
ops::ExpandAsV2MLUKernel<paddle::platform::float16>);
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_MLU
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/expand_v2_op.h"
#include "paddle/fluid/operators/mlu/mlu_baseop.h"
namespace paddle {
namespace operators {
template <typename T>
class ExpandV2MLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* X = ctx.Input<phi::DenseTensor>("X");
auto* Out = ctx.Output<phi::DenseTensor>("Out");
auto in_dims = X->dims();
auto expand_shape = get_expand_shape(ctx);
auto vec_in_dims = phi::vectorize<int>(in_dims);
auto diff = expand_shape.size() - vec_in_dims.size();
vec_in_dims.insert(vec_in_dims.begin(), diff, 1);
std::vector<int> final_expand_shape(vec_in_dims.size());
for (size_t i = 0; i < vec_in_dims.size(); ++i) {
PADDLE_ENFORCE_NE(expand_shape[i],
0,
platform::errors::InvalidArgument(
"The expanded size cannot be zero."));
if (i < diff) { // expand_shape = [3,4,-1,-1], X = [10,2] -->
// final_expand_shape = [3,4,10,2]
PADDLE_ENFORCE_GT(
expand_shape[i],
0,
platform::errors::InvalidArgument(
"The expanded size (%d) for non-existing dimensions must be "
"positive for expand_v2 op.",
expand_shape[i]));
final_expand_shape[i] = expand_shape[i];
} else if (expand_shape[i] > 0) { // expand_shape = [3,4,10,4], X =
// [10,1] --> final_expand_shape =
// [3,4,10,4]
if (vec_in_dims[i] != 1) {
PADDLE_ENFORCE_EQ(
vec_in_dims[i],
expand_shape[i],
platform::errors::InvalidArgument(
"The value (%d) of the non-singleton dimension does not match"
" the corresponding value (%d) in shape for expand_v2 op.",
vec_in_dims[i],
expand_shape[i]));
final_expand_shape[i] = expand_shape[i];
} else {
final_expand_shape[i] = expand_shape[i];
}
} else { // expand_shape = [3,4,-1,-1], X = [10,2] --> final_expand_shape
// = [3,4,10,2]
PADDLE_ENFORCE_EQ(
expand_shape[i],
-1,
platform::errors::InvalidArgument(
"When the value in shape is negative for expand_v2 op, "
"only -1 is supported, but the value received is %d.",
expand_shape[i]));
final_expand_shape[i] = vec_in_dims[i];
}
}
auto rank = X->dims().size();
PADDLE_ENFORCE_GE(
rank,
1,
platform::errors::InvalidArgument(
"The rank of the input 'X' for expand_v2_mlu op must be positive, "
"but the value received is %d.",
rank));
auto shape_size = final_expand_shape.size();
PADDLE_ENFORCE_GE(
shape_size,
rank,
platform::errors::InvalidArgument(
"The number (%d) of elements of 'shape' for expand_v2_mlu op must "
"be "
"greater than or equal to the rank (%d) of the input 'X'.",
shape_size,
rank));
framework::DDim out_dims = phi::make_ddim(final_expand_shape);
Out->Resize(out_dims);
auto place = ctx.GetPlace();
Out->mutable_data<T>(place);
MLUCnnlTensorDesc x_desc(*X);
MLUCnnlTensorDesc out_desc(*Out);
MLUCnnl::BroadcastTo(
ctx, x_desc.get(), GetBasePtr(X), out_desc.get(), GetBasePtr(Out));
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_MLU_KERNEL(expand_v2,
ops::ExpandV2MLUKernel<float>,
ops::ExpandV2MLUKernel<paddle::platform::float16>,
ops::ExpandV2MLUKernel<bool>,
ops::ExpandV2MLUKernel<int>,
ops::ExpandV2MLUKernel<int64_t>);
#endif
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/mlu/mlu_baseop.h"
namespace paddle {
namespace operators {
template <typename T>
class FillAnyLikeMLUKernel : public framework::OpKernel<T> {
public:
using CommonType = typename std::common_type<
float,
typename std::conditional<std::is_same<T, platform::float16>::value,
float,
T>::type>::type;
void Compute(const framework::ExecutionContext& ctx) const override {
auto* out = ctx.Output<phi::DenseTensor>("Out");
out->mutable_data<T>(ctx.GetPlace());
float value = ctx.Attr<float>("value");
auto common_type_value = static_cast<CommonType>(value);
PADDLE_ENFORCE_EQ(
(common_type_value >=
static_cast<CommonType>(std::numeric_limits<T>::lowest())) &&
(common_type_value <=
static_cast<CommonType>(std::numeric_limits<T>::max())),
true,
platform::errors::InvalidArgument(
"The filled value is out of range for target type, "
"current kernel type is %s, the range should between %f "
"and %f, but now value is %f.",
typeid(T).name(),
static_cast<CommonType>(std::numeric_limits<T>::lowest()),
static_cast<CommonType>(std::numeric_limits<T>::max()),
value));
PADDLE_ENFORCE_EQ(
std::isnan(value),
false,
platform::errors::InvalidArgument("The filled value is NaN."));
auto value_t = static_cast<T>(value);
MLUCnnlTensorDesc out_desc(*out, CNNL_LAYOUT_ARRAY, ToCnnlDataType<T>());
MLUCnnl::Fill(
ctx, CNNL_POINTER_MODE_HOST, &value_t, out_desc.get(), GetBasePtr(out));
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_MLU_KERNEL(fill_any_like,
ops::FillAnyLikeMLUKernel<int>,
ops::FillAnyLikeMLUKernel<int64_t>,
ops::FillAnyLikeMLUKernel<float>,
ops::FillAnyLikeMLUKernel<plat::float16>);
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/mlu/mlu_baseop.h"
#include "paddle/fluid/operators/utils.h"
#include "paddle/phi/kernels/funcs/math_function.h"
namespace paddle {
namespace operators {
template <typename T>
class FillConstantBatchSizeLikeOpMLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
auto data_type =
static_cast<framework::proto::VarType::Type>(ctx.Attr<int>("dtype"));
auto float_value = ctx.Attr<float>("value");
auto str_value = ctx.Attr<std::string>("str_value");
auto force_cpu = ctx.Attr<bool>("force_cpu");
auto *out = ctx.Output<phi::DenseTensor>("Out");
auto *in = ctx.Input<phi::DenseTensor>("Input");
if (in->lod().size() && ctx.Attr<int>("input_dim_idx") == 0) {
// set the correct batch size for the phi::DenseTensor.
auto odims = out->dims();
int output_dim_idx = ctx.Attr<int>("output_dim_idx");
odims[output_dim_idx] = static_cast<int>(in->lod().back().size()) - 1;
out->mutable_data<T>(odims, ctx.GetPlace());
}
T value;
if (str_value.empty()) {
value = static_cast<T>(float_value);
} else {
// handle NaN/Inf first, which cannot be read from stream.
if (str_value == "inf") {
value = static_cast<T>(std::numeric_limits<double>::infinity());
} else if (str_value == "-inf") {
value = static_cast<T>(-std::numeric_limits<double>::infinity());
} else if (str_value == "nan") {
value = static_cast<T>(std::numeric_limits<double>::quiet_NaN());
} else {
std::stringstream convert_stream(str_value);
if (std::is_same<int64_t, T>::value) {
int64_t tmp_value;
convert_stream >> tmp_value;
value = static_cast<T>(tmp_value);
} else {
double tmp_value;
convert_stream >> tmp_value;
value = static_cast<T>(tmp_value);
}
}
}
platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
bool cpu_place = force_cpu || ctx.GetPlace() == platform::CPUPlace();
if (cpu_place) {
auto &dev_ctx = *pool.Get(platform::CPUPlace());
phi::funcs::SetConstant<phi::CPUContext, T> functor;
out->mutable_data(platform::CPUPlace(),
framework::TransToPhiDataType(data_type));
functor(reinterpret_cast<const phi::CPUContext &>(dev_ctx),
out,
static_cast<T>(value));
} else {
out->mutable_data(ctx.GetPlace(),
framework::TransToPhiDataType(data_type));
const T *value_data = &value;
cnnlPointerMode_t pointer_mode = CNNL_POINTER_MODE_HOST;
MLUCnnlTensorDesc output_desc(*out);
MLUCnnl::Fill(
ctx, pointer_mode, value_data, output_desc.get(), GetBasePtr(out));
}
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_MLU_KERNEL(
fill_constant_batch_size_like,
ops::FillConstantBatchSizeLikeOpMLUKernel<int>,
ops::FillConstantBatchSizeLikeOpMLUKernel<float>,
ops::FillConstantBatchSizeLikeOpMLUKernel<plat::float16>);
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/mlu/mlu_baseop.h"
#include "paddle/fluid/operators/utils.h"
namespace paddle {
namespace operators {
template <typename T>
class FillConstantMLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
auto str_value = ctx.Attr<std::string>("str_value");
auto float_value = ctx.Attr<float>("value");
auto *out_var = ctx.Output<phi::DenseTensor>("Out");
T value;
if (str_value.empty()) {
value = static_cast<T>(float_value);
} else {
// handle NaN/Inf first, which cannot be read from stream.
if (str_value == "inf") {
value = static_cast<T>(std::numeric_limits<double>::infinity());
} else if (str_value == "-inf") {
value = static_cast<T>(-std::numeric_limits<double>::infinity());
} else if (str_value == "nan") {
value = static_cast<T>(std::numeric_limits<double>::quiet_NaN());
} else {
std::stringstream convert_stream(str_value);
if (std::is_same<int64_t, T>::value) {
int64_t tmp_value;
convert_stream >> tmp_value;
value = static_cast<T>(tmp_value);
} else {
double tmp_value;
convert_stream >> tmp_value;
value = static_cast<T>(tmp_value);
}
}
}
const T *value_data = &value;
cnnlPointerMode_t pointer_mode = CNNL_POINTER_MODE_HOST;
if (ctx.HasInput("ValueTensor")) {
auto *value_tensor = ctx.Input<phi::DenseTensor>("ValueTensor");
PADDLE_ENFORCE_EQ(
value_tensor->numel(),
1,
platform::errors::InvalidArgument(
"When use phi::DenseTensor as value to set phi::DenseTensor "
"value in fill_cosntant, "
"value input(ValueTensor) size must be 1, but get %d",
value_tensor->numel()));
value_data = value_tensor->data<T>();
auto tmp_place = value_tensor->place();
if (platform::is_mlu_place(tmp_place)) {
pointer_mode = CNNL_POINTER_MODE_DEVICE;
}
}
auto shape = GetShape(ctx);
out_var->mutable_data<T>(shape, ctx.GetPlace());
MLUCnnlTensorDesc output_desc(*out_var);
MLUCnnl::Fill(
ctx, pointer_mode, value_data, output_desc.get(), GetBasePtr(out_var));
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_MLU_KERNEL(
fill_constant,
paddle::operators::FillConstantMLUKernel<float>,
paddle::operators::FillConstantMLUKernel<bool>,
paddle::operators::FillConstantMLUKernel<int>,
paddle::operators::FillConstantMLUKernel<uint8_t>,
paddle::operators::FillConstantMLUKernel<int16_t>,
paddle::operators::FillConstantMLUKernel<int64_t>,
paddle::operators::FillConstantMLUKernel<paddle::platform::float16>);
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/flatten_op.h"
namespace paddle {
namespace operators {
template <typename DeviceContext, typename T>
class FlattenMLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &context) const override {
auto *in = context.Input<phi::DenseTensor>("X");
auto *out = context.Output<phi::DenseTensor>("Out");
auto &axes = context.Attr<int>("axis");
auto x_dims = in->dims();
auto out_dims = phi::make_ddim(GetOutputShape(axes, x_dims));
out->mutable_data(context.GetPlace(), in->type());
framework::TensorCopy(
*in,
context.GetPlace(),
context.template device_context<platform::DeviceContext>(),
out);
out->Resize(out_dims);
}
static std::vector<int32_t> GetOutputShape(const int axis,
const framework::DDim &in_dims) {
int64_t outer = 1, inner = 1;
for (int i = 0; i < in_dims.size(); ++i) {
if (i < axis) {
outer *= in_dims[i];
} else {
inner *= in_dims[i];
}
}
std::vector<int32_t> out_shape(2);
out_shape[0] = outer;
out_shape[1] = inner;
return out_shape;
}
};
template <typename DeviceContext, typename T>
class FlattenGradMLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
auto *d_x = ctx.Output<phi::DenseTensor>(framework::GradVarName("X"));
auto *d_out = ctx.Input<phi::DenseTensor>(framework::GradVarName("Out"));
auto in_dims = ctx.Input<phi::DenseTensor>("X")->dims();
d_x->mutable_data(ctx.GetPlace(), d_out->type());
framework::TensorCopy(
*d_out,
ctx.GetPlace(),
ctx.template device_context<platform::MLUDeviceContext>(),
d_x);
d_x->Resize(in_dims);
}
};
template <typename DeviceContext, typename T>
class Flatten2MLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &context) const override {
auto &axes = context.Attr<int>("axis");
auto *in = context.Input<phi::DenseTensor>("X");
auto x_dims = in->dims();
auto *out = context.Output<phi::DenseTensor>("Out");
auto out_dims = phi::make_ddim(
FlattenMLUKernel<DeviceContext, T>::GetOutputShape(axes, x_dims));
out->mutable_data(context.GetPlace(), in->type());
framework::TensorCopy(
*in,
context.GetPlace(),
context.template device_context<platform::DeviceContext>(),
out);
out->Resize(out_dims);
}
};
template <typename DeviceContext, typename T>
class Flatten2GradMLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
auto *d_x = ctx.Output<phi::DenseTensor>(framework::GradVarName("X"));
auto *d_out = ctx.Input<phi::DenseTensor>(framework::GradVarName("Out"));
auto xshape_dims = ctx.Input<phi::DenseTensor>("XShape")->dims();
auto x_dims = phi::slice_ddim(xshape_dims, 1, xshape_dims.size());
d_x->mutable_data(ctx.GetPlace(), d_out->type());
framework::TensorCopy(
*d_out,
ctx.GetPlace(),
ctx.template device_context<platform::DeviceContext>(),
d_x);
d_x->Resize(x_dims);
}
};
template <typename DeviceContext, typename T>
class FlattenContiguousRangeMLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &context) const override {
auto *in = context.Input<phi::DenseTensor>("X");
auto *out = context.Output<phi::DenseTensor>("Out");
out->mutable_data(context.GetPlace(), in->type());
auto &start_axis = context.Attr<int>("start_axis");
auto &stop_axis = context.Attr<int>("stop_axis");
// make out dims
auto in_dims = in->dims();
auto out_dims =
phi::make_ddim(GetOutputShape(start_axis, stop_axis, in_dims));
framework::TensorCopy(
*in,
context.GetPlace(),
context.template device_context<platform::DeviceContext>(),
out);
out->Resize(out_dims);
}
static std::vector<int32_t> GetOutputShape(const int start_axis,
const int stop_axis,
const framework::DDim &in_dims) {
int64_t outer = 1;
std::vector<int32_t> out_shape;
int in_dims_size = in_dims.size();
out_shape.reserve(in_dims_size - stop_axis + start_axis);
int real_start_axis = start_axis, real_stop_axis = stop_axis;
if (start_axis < 0) {
real_start_axis = start_axis + in_dims_size;
}
if (stop_axis < 0) {
real_stop_axis = stop_axis + in_dims_size;
}
for (int i = 0; i < real_start_axis; ++i) {
out_shape.push_back(in_dims[i]);
}
for (int i = real_start_axis; i <= real_stop_axis; i++) {
if (in_dims[i] == -1 || outer == -1) {
outer = -1;
} else {
outer *= in_dims[i];
}
}
out_shape.push_back(outer);
for (int i = real_stop_axis + 1; i < in_dims_size; i++) {
out_shape.push_back(in_dims[i]);
}
return out_shape;
}
};
template <typename DeviceContext, typename T>
class FlattenContiguousRangeGradMLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
auto *d_x = ctx.Output<phi::DenseTensor>(framework::GradVarName("X"));
auto *d_out = ctx.Input<phi::DenseTensor>(framework::GradVarName("Out"));
auto xshape_dims = ctx.Input<phi::DenseTensor>("XShape")->dims();
auto x_dims = phi::slice_ddim(xshape_dims, 1, xshape_dims.size());
d_x->mutable_data(ctx.GetPlace(), d_out->type());
framework::TensorCopy(
*d_out,
ctx.GetPlace(),
ctx.template device_context<paddle::platform::MLUDeviceContext>(),
d_x);
d_x->Resize(x_dims);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_MLU_KERNEL(
flatten,
ops::FlattenMLUKernel<paddle::platform::MLUDeviceContext, float>,
ops::FlattenMLUKernel<paddle::platform::MLUDeviceContext, double>,
ops::FlattenMLUKernel<paddle::platform::MLUDeviceContext, uint8_t>,
ops::FlattenMLUKernel<paddle::platform::MLUDeviceContext, int>,
ops::FlattenMLUKernel<paddle::platform::MLUDeviceContext, int8_t>,
ops::FlattenMLUKernel<paddle::platform::MLUDeviceContext, int64_t>);
REGISTER_OP_MLU_KERNEL(
flatten_grad,
ops::FlattenGradMLUKernel<paddle::platform::MLUDeviceContext, float>,
ops::FlattenGradMLUKernel<paddle::platform::MLUDeviceContext, double>,
ops::FlattenGradMLUKernel<paddle::platform::MLUDeviceContext, uint8_t>,
ops::FlattenGradMLUKernel<paddle::platform::MLUDeviceContext, int>,
ops::FlattenGradMLUKernel<paddle::platform::MLUDeviceContext, int8_t>,
ops::FlattenGradMLUKernel<paddle::platform::MLUDeviceContext, int64_t>);
REGISTER_OP_MLU_KERNEL(
flatten2,
ops::Flatten2MLUKernel<paddle::platform::MLUDeviceContext, float>,
ops::Flatten2MLUKernel<paddle::platform::MLUDeviceContext, double>,
ops::Flatten2MLUKernel<paddle::platform::MLUDeviceContext, uint8_t>,
ops::Flatten2MLUKernel<paddle::platform::MLUDeviceContext, int>,
ops::Flatten2MLUKernel<paddle::platform::MLUDeviceContext, int8_t>,
ops::Flatten2MLUKernel<paddle::platform::MLUDeviceContext, int64_t>);
REGISTER_OP_MLU_KERNEL(
flatten2_grad,
ops::Flatten2GradMLUKernel<paddle::platform::MLUDeviceContext, float>,
ops::Flatten2GradMLUKernel<paddle::platform::MLUDeviceContext, double>,
ops::Flatten2GradMLUKernel<paddle::platform::MLUDeviceContext, uint8_t>,
ops::Flatten2GradMLUKernel<paddle::platform::MLUDeviceContext, int>,
ops::Flatten2GradMLUKernel<paddle::platform::MLUDeviceContext, int8_t>,
ops::Flatten2GradMLUKernel<paddle::platform::MLUDeviceContext, int64_t>);
REGISTER_OP_MLU_KERNEL(
flatten_contiguous_range,
ops::FlattenContiguousRangeMLUKernel<paddle::platform::MLUDeviceContext,
float>,
ops::FlattenContiguousRangeMLUKernel<paddle::platform::MLUDeviceContext,
double>,
ops::FlattenContiguousRangeMLUKernel<paddle::platform::MLUDeviceContext,
uint8_t>,
ops::FlattenContiguousRangeMLUKernel<paddle::platform::MLUDeviceContext,
int>,
ops::FlattenContiguousRangeMLUKernel<paddle::platform::MLUDeviceContext,
int8_t>,
ops::FlattenContiguousRangeMLUKernel<paddle::platform::MLUDeviceContext,
int64_t>);
REGISTER_OP_MLU_KERNEL(
flatten_contiguous_range_grad,
ops::FlattenContiguousRangeGradMLUKernel<paddle::platform::MLUDeviceContext,
float>,
ops::FlattenContiguousRangeGradMLUKernel<paddle::platform::MLUDeviceContext,
double>,
ops::FlattenContiguousRangeGradMLUKernel<paddle::platform::MLUDeviceContext,
uint8_t>,
ops::FlattenContiguousRangeGradMLUKernel<paddle::platform::MLUDeviceContext,
int>,
ops::FlattenContiguousRangeGradMLUKernel<paddle::platform::MLUDeviceContext,
int8_t>,
ops::FlattenContiguousRangeGradMLUKernel<paddle::platform::MLUDeviceContext,
int64_t>);
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/operators/mlu/mlu_baseop.h"
#include "paddle/fluid/platform/device_context.h"
namespace paddle {
namespace operators {
template <typename T>
class GatherNdMLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
auto *x = ctx.Input<phi::DenseTensor>("X");
auto *index = ctx.Input<phi::DenseTensor>("Index");
auto *out = ctx.Output<phi::DenseTensor>("Out");
auto place = ctx.GetPlace();
out->template mutable_data<T>(place);
if (x->numel() == 0) return;
if (index->numel() == 0) {
auto &dev_ctx = ctx.template device_context<platform::MLUDeviceContext>();
framework::TensorCopy(*x, place, dev_ctx, out);
return;
}
const auto &index_type = framework::TransToProtoVarType(index->dtype());
bool index_type_match = index_type == framework::proto::VarType::INT32 ||
index_type == framework::proto::VarType::INT64;
PADDLE_ENFORCE_EQ(index_type_match,
true,
platform::errors::InvalidArgument(
"Index holds the wrong type, it holds [%s],"
"but desires to be [%s] or [%s]",
paddle::framework::DataTypeToString(index_type),
paddle::framework::DataTypeToString(
framework::proto::VarType::INT32),
paddle::framework::DataTypeToString(
framework::proto::VarType::INT64)));
MLUCnnlTensorDesc x_desc(*x);
MLUCnnlTensorDesc index_desc(*index);
MLUCnnlTensorDesc out_desc(*out);
MLUCnnl::GatherNd(ctx,
x_desc.get(),
GetBasePtr(x),
index_desc.get(),
GetBasePtr(index),
out_desc.get(),
GetBasePtr(out));
}
};
template <typename T>
class GatherNdGradMLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
auto *index = ctx.Input<phi::DenseTensor>("Index");
auto *dout = ctx.Input<phi::DenseTensor>(framework::GradVarName("Out"));
auto *dx = ctx.Output<phi::DenseTensor>(framework::GradVarName("X"));
auto *x = ctx.Input<phi::DenseTensor>("X");
if (dx->numel() == 0) return;
if (index->numel() == 0) {
auto &dev_ctx = ctx.template device_context<platform::MLUDeviceContext>();
framework::TensorCopy(*dout, ctx.GetPlace(), dev_ctx, dx);
return;
}
phi::DenseTensor tmp_tensor(index->type());
phi::DenseTensor tmp_tensor2(dout->type());
const auto index_dims = index->dims();
if (index_dims.size() == 1) {
tmp_tensor.ShareDataWith(*index);
std::vector<int64_t> new_dim = {1, index_dims[0]};
tmp_tensor.Resize(phi::make_ddim(new_dim));
index = &tmp_tensor;
tmp_tensor2.ShareDataWith(*dout);
std::vector<int64_t> new_dim2{1};
for (int i = index->numel(); i < x->dims().size(); i++) {
new_dim2.push_back(x->dims()[i]);
}
tmp_tensor2.Resize(phi::make_ddim(new_dim2));
dout = &tmp_tensor2;
}
dx->mutable_data<T>(ctx.GetPlace());
MLUCnnlTensorDesc dx_desc(*dx);
auto value = static_cast<T>(0);
MLUCnnl::Fill(
ctx, CNNL_POINTER_MODE_HOST, &value, dx_desc.get(), GetBasePtr(dx));
MLUCnnlTensorDesc index_desc(*index);
MLUCnnlTensorDesc dout_desc(*dout);
const cnnlScatterNdMode_t mode = CNNL_SCATTERND_ADD;
MLUCnnl::ScatterNd(ctx,
mode,
index_desc.get(),
GetBasePtr(index),
dout_desc.get(),
GetBasePtr(dout),
dx_desc.get(),
GetBasePtr(dx),
dx_desc.get(),
GetBasePtr(dx));
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_MLU_KERNEL(gather_nd,
ops::GatherNdMLUKernel<float>,
ops::GatherNdMLUKernel<paddle::platform::float16>);
REGISTER_OP_MLU_KERNEL(gather_nd_grad,
ops::GatherNdGradMLUKernel<paddle::platform::float16>,
ops::GatherNdGradMLUKernel<float>);
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/operators/mlu/mlu_baseop.h"
namespace paddle {
namespace operators {
template <typename T>
class GatherOpMLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
auto *x = ctx.Input<phi::DenseTensor>("X");
auto *index = ctx.Input<phi::DenseTensor>("Index");
auto axis = ctx.Attr<int>("axis");
const auto index_dims = index->dims();
if (index_dims.size() == 2) {
PADDLE_ENFORCE_EQ(
index_dims[1],
1,
platform::errors::InvalidArgument(
"The last dim of index should be 1 when it is 2D, but we get %d",
index_dims[1]));
} else {
PADDLE_ENFORCE_EQ(
index_dims.size(),
1,
platform::errors::InvalidArgument(
"The index should be 1D, when it is not 2D, but we get %d",
index_dims.size()));
}
auto *out = ctx.Output<phi::DenseTensor>("Out");
out->mutable_data<T>(ctx.GetPlace());
MLUCnnlTensorDesc x_desc(*x);
int index_shape_1d[1] = {static_cast<int>(index_dims[0])};
MLUCnnlTensorDesc index_desc(
1, index_shape_1d, ToCnnlDataType(index->dtype()));
MLUCnnlTensorDesc out_desc(*out);
MLUCnnl::GatherFunctor(ctx,
axis,
0 /*batch_dims*/,
x_desc.get(),
GetBasePtr(x),
index_desc.get(),
GetBasePtr(index),
out_desc.get(),
GetBasePtr(out));
}
};
template <typename T>
class GatherGradOpMLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
auto *index = ctx.Input<phi::DenseTensor>("Index");
auto *dout = ctx.Input<phi::DenseTensor>(framework::GradVarName("Out"));
auto *dx = ctx.Output<phi::DenseTensor>(framework::GradVarName("X"));
const auto index_dims = index->dims();
if (index_dims.size() == 2) {
PADDLE_ENFORCE_EQ(
index_dims[1],
1,
platform::errors::InvalidArgument(
"The last dim of index should be 1 when it is 2D, but we get %d",
index_dims[1]));
} else {
PADDLE_ENFORCE_EQ(
index_dims.size(),
1,
platform::errors::InvalidArgument(
"The index should be 1D, when it is not 2D, but we get %d",
index_dims.size()));
}
dx->mutable_data<T>(ctx.GetPlace());
MLUCnnlTensorDesc dx_desc(*dx);
auto value = static_cast<T>(0);
MLUCnnl::Fill(
ctx, CNNL_POINTER_MODE_HOST, &value, dx_desc.get(), GetBasePtr(dx));
int index_shape_1d[1] = {static_cast<int>(index_dims[0])};
MLUCnnlTensorDesc index_desc(
1, index_shape_1d, ToCnnlDataType(index->dtype()));
MLUCnnlTensorDesc dout_desc(*dout);
const cnnlScatterRefMode_t mode = CNNL_SCATTERREF_UPDATE;
MLUCnnl::ScatterRefFunctor(ctx,
dx_desc.get(),
GetBasePtr(dx),
dout_desc.get(),
GetBasePtr(dout),
index_desc.get(),
GetBasePtr(index),
mode);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_MLU_KERNEL(gather,
ops::GatherOpMLUKernel<float>,
ops::GatherOpMLUKernel<paddle::platform::float16>,
ops::GatherOpMLUKernel<int>);
REGISTER_OP_MLU_KERNEL(gather_grad,
ops::GatherGradOpMLUKernel<float>,
ops::GatherGradOpMLUKernel<paddle::platform::float16>,
ops::GatherGradOpMLUKernel<int>);
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <random>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/phi/core/generator.h"
namespace paddle {
namespace operators {
template <typename T>
class MLUGaussianRandomKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
float mean = context.Attr<float>("mean");
float std = context.Attr<float>("std");
auto* tensor = context.Output<phi::DenseTensor>("Out");
tensor->mutable_data<T>(context.GetPlace());
phi::DenseTensor cpu_tensor(tensor->type());
cpu_tensor.Resize(tensor->dims());
T* cpu_data = cpu_tensor.mutable_data<T>(platform::CPUPlace());
std::normal_distribution<T> dist(mean, std);
int64_t size = tensor->numel();
unsigned int seed = static_cast<unsigned int>(context.Attr<int>("seed"));
auto engine = phi::GetCPURandomEngine(seed);
for (int64_t i = 0; i < size; ++i) {
cpu_data[i] = dist(*engine);
}
auto& dev_ctx =
context.template device_context<paddle::platform::MLUDeviceContext>();
framework::TensorCopy(cpu_tensor, context.GetPlace(), dev_ctx, tensor);
dev_ctx.Wait();
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_MLU_KERNEL(gaussian_random, ops::MLUGaussianRandomKernel<float>);
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/mlu/mlu_baseop.h"
namespace paddle {
namespace operators {
template <typename T>
class GridSamplerMLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE_EQ(
platform::is_mlu_place(ctx.GetPlace()),
true,
platform::errors::Unavailable("This kernel only runs on MLU."));
// input and output data
const phi::DenseTensor* input = ctx.Input<phi::DenseTensor>("X");
const phi::DenseTensor* grid = ctx.Input<phi::DenseTensor>("Grid");
phi::DenseTensor* output = ctx.Output<phi::DenseTensor>("Output");
int n = input->dims()[0];
int c = input->dims()[1];
int out_h = grid->dims()[1];
int out_w = grid->dims()[2];
output->mutable_data<T>({n, c, out_h, out_w}, ctx.GetPlace());
// attrs
// paddle.nn.functional.grid_sample(x, grid, mode='bilinear',
// padding_mode='zeros', align_corners=True, name=None)
const std::string mode = ctx.Attr<std::string>("mode");
const std::string padding_mode = ctx.Attr<std::string>("padding_mode");
bool align_corners = ctx.Attr<bool>("align_corners");
const std::string data_format = phi::DataLayoutToString(input->layout());
PADDLE_ENFORCE_EQ(
mode == "bilinear",
true,
platform::errors::Unavailable(
"Only support bilinear mode in mlu grid_sample kernel."));
PADDLE_ENFORCE_EQ(
padding_mode == "zeros",
true,
platform::errors::Unavailable(
"Only support zeros padding_mode in mlu grid_sample kernel."));
phi::DenseTensor trans_input(input->dtype());
// transpose input from NCHW to NHWC
const std::vector<int> perm_to_nhwc = {0, 2, 3, 1};
TransposeFromMLUTensor<T>(
ctx, perm_to_nhwc, input, &trans_input, true /*need_reshape_or_alloc*/);
phi::DenseTensor tmp_output(output->dtype());
tmp_output.mutable_data<T>({n, out_h, out_w, c}, ctx.GetPlace());
MLUCnnlGridSampleDesc grid_sample_desc(mode, padding_mode, align_corners);
MLUCnnlTensorDesc input_desc(
trans_input, CNNL_LAYOUT_NHWC, ToCnnlDataType<T>());
MLUCnnlTensorDesc grid_desc(*grid, CNNL_LAYOUT_NHWC, ToCnnlDataType<T>());
MLUCnnlTensorDesc tmp_output_desc(
tmp_output, CNNL_LAYOUT_NHWC, ToCnnlDataType<T>());
MLUCnnl::GridSample(ctx,
grid_sample_desc.get(),
input_desc.get(),
GetBasePtr(&trans_input),
grid_desc.get(),
GetBasePtr(grid),
tmp_output_desc.get(),
GetBasePtr(&tmp_output));
// transpose output from NHWC to NCHW
const std::vector<int> perm_to_nchw = {
0,
3,
1,
2,
};
TransposeFromMLUTensor<T>(ctx,
perm_to_nchw,
&tmp_output,
output,
false /*need_reshape_or_alloc*/);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_MLU_KERNEL(grid_sampler,
ops::GridSamplerMLUKernel<float>,
ops::GridSamplerMLUKernel<plat::float16>);
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/mlu/mlu_baseop.h"
namespace paddle {
namespace operators {
template <typename T>
class HuberLossMLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto& dev_ctx = GetDevCtxFromCTX(ctx);
auto* x = ctx.Input<phi::DenseTensor>("X");
auto* y = ctx.Input<phi::DenseTensor>("Y");
auto* residual = ctx.Output<phi::DenseTensor>("Residual");
auto* out = ctx.Output<phi::DenseTensor>("Out");
auto delta = ctx.Attr<float>("delta");
auto place = ctx.GetPlace();
// compute y-x
cnnlDataType_t data_type = ToCnnlDataType<T>();
residual->mutable_data<T>(x->dims(), place);
MLUCnnlTensorDesc x_desc(*x);
MLUCnnlOpTensorDesc sub_op_desc(
CNNL_OP_TENSOR_SUB, data_type, CNNL_NOT_PROPAGATE_NAN);
MLUCnnl::OpTensor(ctx,
sub_op_desc.get(),
x_desc.get(),
GetBasePtr(y),
x_desc.get(),
GetBasePtr(x),
x_desc.get(),
GetBasePtr(residual),
data_type);
// compute smoothl1loss
out->mutable_data<T>(x->dims(), place);
cnnlSmoothL1LossAlgorithm_t smoothl1_algo =
CNNL_SMOOTHL1LOSS_REDUCTION_NONE; // defines whether to do reduction
// here
MLUCnnl::SmoothL1LossForward(ctx,
x_desc.get(),
GetBasePtr(x),
x_desc.get(), /* target has same shape as x */
GetBasePtr(y),
static_cast<float>(delta),
smoothl1_algo,
x_desc.get(), /* out has same shape as x */
GetBasePtr(out));
// compute multiply by delta
phi::DenseTensor scale_tensor, bias_tensor;
scale_tensor = ctx.AllocateTmpTensor<T, MLUDeviceContext>({1}, dev_ctx);
bias_tensor = ctx.AllocateTmpTensor<T, MLUDeviceContext>({1}, dev_ctx);
FillMLUTensorWithHostValue(ctx, static_cast<T>(delta), &scale_tensor);
FillMLUTensorWithHostValue(ctx, static_cast<T>(0.f), &bias_tensor);
const int axis = std::max(out->dims().size() - 1, 0);
MLUCnnlTensorDesc scale_desc(scale_tensor);
MLUCnnlTensorDesc bias_desc(bias_tensor);
MLUCnnlTensorDesc out_desc(*out);
MLUCnnl::Scale(ctx,
axis,
out_desc.get(),
GetBasePtr(out),
scale_desc.get(),
GetBasePtr(&scale_tensor),
bias_desc.get(),
GetBasePtr(&bias_tensor),
out_desc.get(),
GetBasePtr(out));
}
};
template <typename T>
class HuberLossGradMLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto& dev_ctx = GetDevCtxFromCTX(ctx);
auto* residual = ctx.Input<phi::DenseTensor>("Residual");
auto* dout = ctx.Input<phi::DenseTensor>(framework::GradVarName("Out"));
auto* dx = ctx.Output<phi::DenseTensor>(framework::GradVarName("X"));
auto* dy = ctx.Output<phi::DenseTensor>(framework::GradVarName("Y"));
auto delta = ctx.Attr<float>("delta");
auto place = ctx.GetPlace();
phi::DenseTensor t_grad_rd;
t_grad_rd =
ctx.AllocateTmpTensor<T, MLUDeviceContext>(residual->dims(), dev_ctx);
MLUCnnlTensorDesc t_grad_rd_desc(t_grad_rd);
if (dx || dy) {
phi::DenseTensor t_zero;
t_zero =
ctx.AllocateTmpTensor<T, MLUDeviceContext>(residual->dims(), dev_ctx);
FillMLUTensorWithHostValue(ctx, static_cast<T>(0.f), &t_zero);
MLUCnnlTensorDesc residual_desc(*residual);
MLUCnnlTensorDesc dout_desc(*dout);
cnnlSmoothL1LossAlgorithm_t smoothl1_algo =
CNNL_SMOOTHL1LOSS_REDUCTION_NONE; // defines whether to do reduction
// here
MLUCnnl::SmoothL1LossBackward(ctx,
residual_desc.get(),
GetBasePtr(residual),
residual_desc.get(),
GetBasePtr(&t_zero),
dout_desc.get(),
GetBasePtr(dout),
static_cast<float>(delta),
smoothl1_algo,
t_grad_rd_desc.get(),
GetBasePtr(&t_grad_rd));
}
// compute multiply by delta
phi::DenseTensor scale_tensor, bias_tensor;
scale_tensor = ctx.AllocateTmpTensor<T, MLUDeviceContext>({1}, dev_ctx);
bias_tensor = ctx.AllocateTmpTensor<T, MLUDeviceContext>({1}, dev_ctx);
FillMLUTensorWithHostValue(ctx, static_cast<T>(0.f), &bias_tensor);
const int axis = std::max(t_grad_rd.dims().size() - 1, 0);
MLUCnnlTensorDesc scale_desc(scale_tensor);
MLUCnnlTensorDesc bias_desc(bias_tensor);
if (dx) {
dx->mutable_data<T>(place);
FillMLUTensorWithHostValue(ctx, static_cast<T>(-delta), &scale_tensor);
MLUCnnlTensorDesc out_desc(*dx);
MLUCnnl::Scale(ctx,
axis,
t_grad_rd_desc.get(),
GetBasePtr(&t_grad_rd),
scale_desc.get(),
GetBasePtr(&scale_tensor),
bias_desc.get(),
GetBasePtr(&bias_tensor),
out_desc.get(),
GetBasePtr(dx));
}
if (dy) {
dy->mutable_data<T>(place);
FillMLUTensorWithHostValue(ctx, static_cast<T>(delta), &scale_tensor);
MLUCnnlTensorDesc out_desc(*dy);
MLUCnnl::Scale(ctx,
axis,
t_grad_rd_desc.get(),
GetBasePtr(&t_grad_rd),
scale_desc.get(),
GetBasePtr(&scale_tensor),
bias_desc.get(),
GetBasePtr(&bias_tensor),
out_desc.get(),
GetBasePtr(dy));
}
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_MLU_KERNEL(huber_loss,
ops::HuberLossMLUKernel<float>,
ops::HuberLossMLUKernel<plat::float16>);
REGISTER_OP_MLU_KERNEL(huber_loss_grad,
ops::HuberLossGradMLUKernel<float>,
ops::HuberLossGradMLUKernel<plat::float16>);
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/mlu/mlu_baseop.h"
namespace paddle {
namespace operators {
template <typename T>
class LabelSmoothMLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* in_t = ctx.Input<phi::DenseTensor>("X");
auto* dist_t = ctx.Input<phi::DenseTensor>("PriorDist");
auto* out_t = ctx.Output<phi::DenseTensor>("Out");
auto epsilon = ctx.Attr<float>("epsilon");
auto epsilon_gt = 1.0f - epsilon;
if (in_t->numel() == 0) return;
out_t->mutable_data<T>(ctx.GetPlace());
auto label_dim = in_t->dims()[in_t->dims().size() - 1];
MLUCnnlTensorDesc x_desc(*in_t);
MLUCnnlTensorDesc out_desc(*out_t);
auto data_type = ToCnnlDataType<T>();
MLUCnnlOpTensorDesc op_tensor_desc(
CNNL_OP_TENSOR_ADD, data_type, CNNL_NOT_PROPAGATE_NAN);
if (ctx.HasInput("PriorDist")) {
MLUCnnlTensorDesc dist_desc(*dist_t);
MLUCnnl::OpTensor(ctx,
op_tensor_desc.get(),
x_desc.get(),
GetBasePtr(in_t),
dist_desc.get(),
GetBasePtr(dist_t),
out_desc.get(),
GetBasePtr(out_t),
data_type,
epsilon_gt,
epsilon);
} else {
auto& dev_ctx = ctx.template device_context<MLUDeviceContext>();
phi::DenseTensor dist_tensor =
ctx.AllocateTmpTensor<T, MLUDeviceContext>({1, label_dim}, dev_ctx);
MLUCnnlTensorDesc dist_desc(dist_tensor);
auto value = static_cast<T>(1.0f / label_dim);
MLUCnnl::Fill(ctx,
CNNL_POINTER_MODE_HOST,
&value,
dist_desc.get(),
GetBasePtr(&dist_tensor));
MLUCnnl::OpTensor(ctx,
op_tensor_desc.get(),
x_desc.get(),
GetBasePtr(in_t),
dist_desc.get(),
GetBasePtr(&dist_tensor),
out_desc.get(),
GetBasePtr(out_t),
data_type,
epsilon_gt,
epsilon);
}
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_MLU_KERNEL(label_smooth,
ops::LabelSmoothMLUKernel<float>,
ops::LabelSmoothMLUKernel<plat::float16>);
此差异已折叠。
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/mlu/mlu_baseop.h"
namespace paddle {
namespace operators {
template <typename T>
class LookupTableV2MLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
auto *ids_t = ctx.Input<phi::DenseTensor>("Ids"); // int tensor
auto *output_t = ctx.Output<phi::DenseTensor>("Out"); // float tensor
auto *table_t = ctx.Input<phi::DenseTensor>("W");
int padding_idx = static_cast<int>(ctx.Attr<int64_t>("padding_idx"));
auto *table_var = ctx.InputVar("W");
PADDLE_ENFORCE_EQ(
table_var->IsType<phi::DenseTensor>(),
true,
platform::errors::InvalidArgument("mlu only accept phi::DenseTensor"));
output_t->mutable_data<T>(ctx.GetPlace());
MLUCnnlTensorDesc ids_desc(*ids_t);
MLUCnnlTensorDesc table_desc(*table_t);
MLUCnnlTensorDesc output_desc(*output_t);
MLUCnnl::EmbeddingForward(ctx,
padding_idx,
table_desc.get(),
GetBasePtr(table_t),
ids_desc.get(),
static_cast<const int *>(GetBasePtr(ids_t)),
output_desc.get(),
GetBasePtr(output_t));
}
};
template <typename T>
class LookupTableV2GradMLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
auto *table_var = ctx.InputVar("W");
PADDLE_ENFORCE_EQ(
table_var->IsType<phi::DenseTensor>(),
true,
platform::errors::PermissionDenied(
"Unsupported Variable Type , idx in "
"LookupTableV2GradMLUKernel should be phi::DenseTensor."));
bool is_sparse = ctx.Attr<bool>("is_sparse");
PADDLE_ENFORCE_EQ(
is_sparse,
false,
platform::errors::InvalidArgument(
"LookupTableV2GradMLUKernel dose NOT support is_sparse = True."));
auto *ids_t = ctx.Input<phi::DenseTensor>("Ids");
auto *output_grad_t =
ctx.Input<phi::DenseTensor>(framework::GradVarName("Out"));
auto *table_grad_t =
ctx.Output<phi::DenseTensor>(framework::GradVarName("W"));
table_grad_t->mutable_data<T>(ctx.GetPlace());
int padding_idx = static_cast<int>(ctx.Attr<int64_t>("padding_idx"));
int64_t ids_numel = ids_t->numel();
PADDLE_ENFORCE_EQ(
ids_numel <= std::numeric_limits<int32_t>::max(),
true,
platform::errors::OutOfRange(
"Number of ids greater than int32_t::max , please check "
"number of ids in LookupTableV2GradMLUKernel."));
phi::DenseTensor ids_int32(ids_t->dtype());
if (ids_t->dtype() != DataType::INT32) {
ids_int32.mutable_data<int>(ids_t->dims(), ctx.GetPlace());
MLUCnnlTensorDesc ids_desc(*ids_t);
MLUCnnlTensorDesc ids_int32_desc(ids_int32);
auto cast_type = GetCastDataType(ids_t->dtype(), DataType::INT32);
MLUCnnl::Cast(ctx,
cast_type,
ids_desc.get(),
GetBasePtr(ids_t),
ids_int32_desc.get(),
GetBasePtr(&ids_int32));
} else {
ids_int32 = *ids_t;
}
MLUCnnlTensorDesc ids_int32_desc(ids_int32);
MLUCnnlTensorDesc output_grad_desc(*output_grad_t);
MLUCnnlTensorDesc table_grad_desc(*table_grad_t);
MLUCnnl::EmbeddingBackward(ctx,
padding_idx,
false,
ids_int32_desc.get(),
GetBasePtr(&ids_int32),
output_grad_desc.get(),
GetBasePtr(output_grad_t),
table_grad_desc.get(),
GetBasePtr(table_grad_t));
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_MLU_KERNEL(lookup_table_v2,
ops::LookupTableV2MLUKernel<float>,
ops::LookupTableV2MLUKernel<int>,
ops::LookupTableV2MLUKernel<plat::float16>);
REGISTER_OP_MLU_KERNEL(lookup_table_v2_grad,
ops::LookupTableV2GradMLUKernel<float>,
ops::LookupTableV2GradMLUKernel<plat::float16>);
此差异已折叠。
此差异已折叠。
此差异已折叠。
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/mlu/mlu_baseop.h"
namespace paddle {
namespace operators {
template <typename T>
class MeshgridMLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto ins = ctx.MultiInput<phi::DenseTensor>("X");
auto outs = ctx.MultiOutput<phi::DenseTensor>("Out");
PADDLE_ENFORCE_EQ((ins.size() > 1) && (ins.size() < 7),
true,
platform::errors::InvalidArgument(
"Excepted phi::DenseTensor numbers between 2 and 6, "
"but only received d% .",
ins.size()));
int64_t size = ins.size();
std::vector<int64_t> shape(size);
for (int64_t i = 0; i < size; i++) {
switch (ins[i]->dims().size()) {
case 0:
shape[i] = 1;
break;
case 1:
shape[i] = ins[i]->dims()[0];
break;
default:
PADDLE_THROW(platform::errors::InvalidArgument(
"Expected scalar or 1D tensor in the tensor list but got tensor "
"%d: ",
i));
}
}
MLUCnnlTensorDesc out_desc(size, shape.data(), ToCnnlDataType<T>());
framework::DDim out_dims = phi::make_ddim(shape);
for (int64_t i = 0; i < size; i++) {
std::vector<int64_t> view_shape(size, 1);
view_shape[i] = shape[i];
outs[i]->Resize(out_dims);
outs[i]->mutable_data<T>(ctx.GetPlace());
MLUCnnlTensorDesc in_desc(size, view_shape.data(), ToCnnlDataType<T>());
MLUCnnl::BroadcastTo(ctx,
in_desc.get(),
GetBasePtr(ins[i]),
out_desc.get(),
GetBasePtr(outs[i]));
}
}
};
} // namespace operators
} // namespace paddle
REGISTER_OP_MLU_KERNEL(
meshgrid,
paddle::operators::MeshgridMLUKernel<int>,
paddle::operators::MeshgridMLUKernel<float>,
paddle::operators::MeshgridMLUKernel<int64_t>,
paddle::operators::MeshgridMLUKernel<paddle::platform::float16>);
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册