未验证 提交 c4a5c960 编写于 作者: Q Qi Li 提交者: GitHub

[X86] add new kernel of relu6 and reduce_mean, test=develop (#4431)

上级 ea4fc0bc
......@@ -88,3 +88,14 @@ REGISTER_LITE_KERNEL(sigmoid,
.BindInput("X", {LiteType::GetTensorTy(TARGET(kX86))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86))})
.Finalize();
// float
REGISTER_LITE_KERNEL(relu6,
kX86,
kFloat,
kNCHW,
paddle::lite::kernels::x86::Relu6Compute<float>,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kX86))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86))})
.Finalize();
......@@ -248,6 +248,42 @@ class SoftsignCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
virtual ~SoftsignCompute() = default;
};
// relu6(x) = min(max(0, x), 6)
template <typename T>
struct Relu6Functor {
float threshold;
explicit Relu6Functor(float threshold_) : threshold(threshold_) {}
template <typename Device, typename X, typename Out>
void operator()(Device d, X x, Out out) const {
out.device(d) =
x.cwiseMax(static_cast<T>(0)).cwiseMin(static_cast<T>(threshold));
}
};
template <typename T>
class Relu6Compute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
public:
using param_t = operators::ActivationParam;
void Run() override {
auto& param = *param_.get_mutable<operators::ActivationParam>();
param.Out->template mutable_data<T>();
auto X = param.X;
auto Out = param.Out;
auto place = lite::fluid::EigenDeviceType<TARGET(kX86)>();
CHECK(X);
CHECK(Out);
auto x = lite::fluid::EigenVector<T>::Flatten(*X);
auto out = lite::fluid::EigenVector<T>::Flatten(*Out);
Relu6Functor<T> functor(param.threshold);
functor(place, x, out);
}
virtual ~Relu6Compute() = default;
};
} // namespace x86
} // namespace kernels
} // namespace lite
......
......@@ -23,3 +23,13 @@ REGISTER_LITE_KERNEL(reduce_sum,
.BindInput("X", {LiteType::GetTensorTy(TARGET(kX86))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86))})
.Finalize();
REGISTER_LITE_KERNEL(reduce_mean,
kX86,
kFloat,
kNCHW,
paddle::lite::kernels::x86::ReduceMeanCompute<float>,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kX86))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86))})
.Finalize();
......@@ -31,11 +31,18 @@ struct SumFunctor {
}
};
#define HANDLE_DIM(NDIM, RDIM) \
if (ndim == NDIM && rdim == RDIM) { \
paddle::lite::kernels::x86:: \
ReduceFunctor<lite::TargetType::kX86, T, NDIM, RDIM, SumFunctor>( \
*input, output, dims, keep_dim); \
struct MeanFunctor {
template <typename X, typename Y, typename Dim>
void operator()(X* x, Y* y, const Dim& dim) {
y->device(lite::fluid::EigenDeviceType<TARGET(kX86)>()) = x->mean(dim);
}
};
#define HANDLE_DIM(NDIM, RDIM, FUNCTOR) \
if (ndim == NDIM && rdim == RDIM) { \
paddle::lite::kernels::x86:: \
ReduceFunctor<lite::TargetType::kX86, T, NDIM, RDIM, FUNCTOR>( \
*input, output, dims, keep_dim); \
}
template <typename T>
......@@ -64,19 +71,58 @@ class ReduceSumCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
} else {
int ndim = input->dims().size();
int rdim = dims.size();
HANDLE_DIM(4, 3);
HANDLE_DIM(4, 2);
HANDLE_DIM(4, 1);
HANDLE_DIM(3, 2);
HANDLE_DIM(3, 1);
HANDLE_DIM(2, 1);
HANDLE_DIM(1, 1);
HANDLE_DIM(4, 3, SumFunctor);
HANDLE_DIM(4, 2, SumFunctor);
HANDLE_DIM(4, 1, SumFunctor);
HANDLE_DIM(3, 2, SumFunctor);
HANDLE_DIM(3, 1, SumFunctor);
HANDLE_DIM(2, 1, SumFunctor);
HANDLE_DIM(1, 1, SumFunctor);
}
}
virtual ~ReduceSumCompute() = default;
};
template <typename T>
class ReduceMeanCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
public:
using param_t = operators::ReduceParam;
void Run() override {
auto& param = *param_.get_mutable<operators::ReduceParam>();
// auto& context = ctx_->As<X86Context>();
auto* input = param.x;
auto* output = param.output;
param.output->template mutable_data<T>();
const auto& dims = param.dim;
bool keep_dim = param.keep_dim;
if (dims.size() == 0) {
// Flatten and reduce 1-D tensor
auto x = lite::fluid::EigenVector<T>::Flatten(*input);
auto out = lite::fluid::EigenScalar<T>::From(output);
// auto& place = *platform::CPUDeviceContext().eigen_device();
auto reduce_dim = Eigen::array<int, 1>({{0}});
MeanFunctor functor;
functor(&x, &out, reduce_dim);
} else {
int ndim = input->dims().size();
int rdim = dims.size();
HANDLE_DIM(4, 3, MeanFunctor);
HANDLE_DIM(4, 2, MeanFunctor);
HANDLE_DIM(4, 1, MeanFunctor);
HANDLE_DIM(3, 2, MeanFunctor);
HANDLE_DIM(3, 1, MeanFunctor);
HANDLE_DIM(2, 1, MeanFunctor);
HANDLE_DIM(1, 1, MeanFunctor);
}
}
virtual ~ReduceMeanCompute() = default;
};
} // namespace x86
} // namespace kernels
} // namespace lite
......
......@@ -89,6 +89,9 @@ bool ActivationOp::AttachImpl(const cpp::OpDesc& opdesc, lite::Scope* scope) {
} else if (opdesc.Type() == "elu") {
param_.active_type = lite_api::ActivationType::kElu;
param_.Elu_alpha = opdesc.GetAttr<float>("alpha");
} else if (opdesc.Type() == "relu6") {
param_.active_type = lite_api::ActivationType::kRelu6;
param_.threshold = opdesc.GetAttr<float>("threshold");
}
VLOG(4) << "opdesc.Type():" << opdesc.Type();
......
......@@ -403,6 +403,8 @@ struct ActivationParam : ParamBase {
float relu_threshold{1.0f};
// elu
float Elu_alpha{1.0f};
// relu6
float threshold{6.0f};
///////////////////////////////////////////////////////////////////////////////////
// get a vector of input tensors
......
......@@ -58,6 +58,7 @@ class ActivationComputeTester : public arena::TestCase {
float hard_swish_offset = 3.0;
float relu_threshold_ = 1.0;
float elu_alpha_ = 1.0;
float threshold_ = 6.0;
DDim dims_{{1}};
std::string type_ = "";
activation_type_test act_type_ = RELU;
......@@ -170,7 +171,8 @@ class ActivationComputeTester : public arena::TestCase {
case RELU6: {
for (int i = 0; i < dims_.production(); i++) {
output_data[i] = x_data[i] > 0.f ? x_data[i] : 0.f;
output_data[i] = output_data[i] < 6.0 ? output_data[i] : 6.0;
output_data[i] =
output_data[i] < threshold_ ? output_data[i] : threshold_;
}
break;
}
......@@ -273,6 +275,9 @@ class ActivationComputeTester : public arena::TestCase {
if (act_type_ == ELU) {
op_desc->SetAttr("alpha", elu_alpha_);
}
if (act_type_ == RELU6) {
op_desc->SetAttr("threshold", threshold_);
}
}
void PrepareData() override {
......@@ -510,6 +515,8 @@ TEST(Activation_relu6, precision) {
#elif defined(LITE_WITH_HUAWEI_ASCEND_NPU)
place = TARGET(kHuaweiAscendNPU);
abs_error = 1e-2; // precision_mode default is force_fp16
#elif defined(LITE_WITH_X86)
place = TARGET(kX86);
#else
return;
#endif
......
......@@ -333,9 +333,10 @@ void test_reduce_mean(Place place) {
}
TEST(ReduceMean, precision) {
// #ifdef LITE_WITH_X86
// Place place(TARGET(kX86));
// #endif
#ifdef LITE_WITH_X86
Place place(TARGET(kX86));
test_reduce_mean(place);
#endif
#ifdef LITE_WITH_ARM
Place place(TARGET(kARM));
test_reduce_mean(place);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册