diff --git a/lite/api/paddle_place.h b/lite/api/paddle_place.h index e48686b913cc5b07f87db0a503ce7081bbe7d058..c9e45978399632739360a9e312de889648ca1f2d 100644 --- a/lite/api/paddle_place.h +++ b/lite/api/paddle_place.h @@ -100,7 +100,9 @@ enum class ActivationType : int { kSwish = 7, kExp = 8, kAbs = 9, - NUM = 10, + kHardSwish = 10, + kReciprocal = 11, + NUM = 12, }; static size_t PrecisionTypeLength(PrecisionType type) { diff --git a/lite/backends/arm/math/activation.cc b/lite/backends/arm/math/activation.cc index 9f478eab60538eeca38415afea4e0989eff5a04e..26e63e23f6acb761b61b397bb881d425e3442468 100644 --- a/lite/backends/arm/math/activation.cc +++ b/lite/backends/arm/math/activation.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "lite/backends/arm/math/activation.h" +#include #include #include "lite/backends/arm/math/funcs.h" @@ -711,6 +712,38 @@ void act_square(const float* din, float* dout, int size, int threads) { } } +template <> +void act_hard_swish(const float* din, + float* dout, + int size, + float threshold, + float scale, + float offset, + int threads) { + const float* ptr_in = din; + float* ptr_out = dout; + for (int i = 0; i < size; ++i) { + ptr_out[0] = std::min(std::max(0.f, ptr_in[0] + offset), threshold) * + ptr_in[0] / scale; + ptr_in++; + ptr_out++; + } +} + +template <> +void act_reciprocal(const float* din, + float* dout, + int size, + int threads) { + const float* ptr_in = din; + float* ptr_out = dout; + for (int i = 0; i < size; ++i) { + ptr_out[0] = 1.0 / ptr_in[0]; + ptr_in++; + ptr_out++; + } +} + #ifdef LITE_WITH_TRAIN template <> void act_square_grad(const float* din, diff --git a/lite/backends/arm/math/activation.h b/lite/backends/arm/math/activation.h index 63f4418d70db25f98dea2a405de1f4bb6b0b9111..ca6b146442a3ec324a9bd244ee4ce6ad0601d4d7 100644 --- a/lite/backends/arm/math/activation.h +++ b/lite/backends/arm/math/activation.h @@ -72,6 +72,17 @@ void act_rsqrt(const T* din, T* dout, int size, int threads); template void act_square(const T* din, T* dout, int size, int threads); +template +void act_hard_swish(const T* din, + T* dout, + int size, + float threshold, + float scale, + float offset, + int threads); +template +void act_reciprocal(const T* din, T* dout, int size, int threads); + #ifdef LITE_WITH_TRAIN template void act_square_grad( diff --git a/lite/core/op_lite.cc b/lite/core/op_lite.cc index a9ccd1b9ae9a5d45f8d0e5638b3aab1d73d1903c..f8a706179374a0c86e28cf9a3638f5df2c932540 100644 --- a/lite/core/op_lite.cc +++ b/lite/core/op_lite.cc @@ -157,5 +157,33 @@ Tensor *OpLite::GetMutableTensor(lite::Scope *scope, return var->GetMutable(); } +void OpLite::AttachInput(const cpp::OpDesc &op_desc, + lite::Scope *scope, + const std::string &input_name, + bool is_dispensable, + lite::Tensor **input_var) { + bool is_have_input = + op_desc.HasInput(input_name) && op_desc.Input(input_name).size() > 0; + CHECK(is_dispensable || is_have_input); + if (is_have_input) { + std::string input_var_name = op_desc.Input(input_name).front(); + *input_var = scope->FindVar(input_var_name)->GetMutable(); + } +} + +void OpLite::AttachOutput(const cpp::OpDesc &op_desc, + lite::Scope *scope, + const std::string &output_name, + bool is_dispensable, + lite::Tensor **output_var) { + bool is_have_output = + op_desc.HasOutput(output_name) && op_desc.Output(output_name).size() > 0; + CHECK(is_dispensable || is_have_output); + if (is_have_output) { + std::string output_var_name = op_desc.Output(output_name).front(); + *output_var = scope->FindVar(output_var_name)->GetMutable(); + } +} + } // namespace lite } // namespace paddle diff --git a/lite/core/op_lite.h b/lite/core/op_lite.h index 1cdc33825cb4ffb758b46ac4b9bee968b3fca055..428b188c468ded790e74c9cc4f5da5c7efe2fd00 100644 --- a/lite/core/op_lite.h +++ b/lite/core/op_lite.h @@ -105,6 +105,20 @@ class OpLite : public Registry { return kernel_.get(); } + // Attach input variable from scope by op_desc and input name + void AttachInput(const cpp::OpDesc &op_desc, + lite::Scope *scope, + const std::string &input_name, + bool is_dispensable, + lite::Tensor **input_var); + + // Attach output variable from scope by op_desc and output name + void AttachOutput(const cpp::OpDesc &op_desc, + lite::Scope *scope, + const std::string &output_name, + bool is_dispensable, + lite::Tensor **output_var); + virtual ~OpLite() = default; protected: diff --git a/lite/core/op_registry.cc b/lite/core/op_registry.cc index fe1dff3c99c1d2413888e78c89c999caea0ab030..84f54b57b86c012ac72e367d657263b156e6c301 100644 --- a/lite/core/op_registry.cc +++ b/lite/core/op_registry.cc @@ -152,6 +152,8 @@ KernelRegistry::KernelRegistry() INIT_FOR(kMLU, kInt16, kNCHW); INIT_FOR(kHost, kFloat, kNCHW); + INIT_FOR(kHost, kInt32, kNCHW); + INIT_FOR(kHost, kInt64, kNCHW); INIT_FOR(kHost, kAny, kNCHW); INIT_FOR(kHost, kFloat, kNHWC); INIT_FOR(kHost, kFloat, kAny); diff --git a/lite/core/op_registry.h b/lite/core/op_registry.h index 3c41c1fd8af240401c3edf0343433f8d8d9c85db..96c9fc2358199594cf9590385c2efdaf1c671425 100644 --- a/lite/core/op_registry.h +++ b/lite/core/op_registry.h @@ -135,6 +135,12 @@ class KernelRegistry final { KernelRegistryForTarget *, // + KernelRegistryForTarget *, // + KernelRegistryForTarget *, // KernelRegistryForTarget *, // diff --git a/lite/kernels/arm/activation_compute.cc b/lite/kernels/arm/activation_compute.cc index d609716ee53ec584b8340e9b72498ed95afd5820..ea60cf528ea71f0bc0ba0a162063bd76899622f9 100644 --- a/lite/kernels/arm/activation_compute.cc +++ b/lite/kernels/arm/activation_compute.cc @@ -179,6 +179,34 @@ void SquareCompute::Run() { x_data, output_data, x_dims.production(), ctx.threads()); } +void HardSwishCompute::Run() { + auto& param = this->Param(); + auto& ctx = this->ctx_->template As(); + auto x_dims = param.X->dims(); + auto x_data = param.X->data(); + auto output_data = param.Out->mutable_data(); + float threshold = param.hard_swish_threshold; + float scale = param.hard_swish_scale; + float offset = param.hard_swish_offset; + lite::arm::math::act_hard_swish(x_data, + output_data, + x_dims.production(), + threshold, + scale, + offset, + ctx.threads()); +} + +void ReciprocalCompute::Run() { + auto& param = this->Param(); + auto& ctx = this->ctx_->template As(); + auto x_dims = param.X->dims(); + auto x_data = param.X->data(); + auto output_data = param.Out->mutable_data(); + lite::arm::math::act_reciprocal( + x_data, output_data, x_dims.production(), ctx.threads()); +} + } // namespace arm } // namespace kernels } // namespace lite @@ -275,3 +303,21 @@ REGISTER_LITE_KERNEL( .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))}) .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))}) .Finalize(); +REGISTER_LITE_KERNEL(hard_swish, + kARM, + kFloat, + kNCHW, + paddle::lite::kernels::arm::HardSwishCompute, + def) + .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))}) + .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))}) + .Finalize(); +REGISTER_LITE_KERNEL(reciprocal, + kARM, + kFloat, + kNCHW, + paddle::lite::kernels::arm::ReciprocalCompute, + def) + .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))}) + .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))}) + .Finalize(); diff --git a/lite/kernels/arm/activation_compute.h b/lite/kernels/arm/activation_compute.h index 476d7bb0a32db193d9afb1451507699d0af71736..2e8deda786a1ea9af70499c7b33c8aa1c6e19370 100644 --- a/lite/kernels/arm/activation_compute.h +++ b/lite/kernels/arm/activation_compute.h @@ -148,6 +148,24 @@ class SquareCompute : public KernelLite { virtual ~SquareCompute() = default; }; +class HardSwishCompute : public KernelLite { + public: + using param_t = operators::ActivationParam; + + void Run() override; + + virtual ~HardSwishCompute() = default; +}; + +class ReciprocalCompute : public KernelLite { + public: + using param_t = operators::ActivationParam; + + void Run() override; + + virtual ~ReciprocalCompute() = default; +}; + } // namespace arm } // namespace kernels } // namespace lite diff --git a/lite/kernels/host/CMakeLists.txt b/lite/kernels/host/CMakeLists.txt index a52428aa097099150139de82627d5770c9b9071c..94fe384d0414d87f38fb0d1ab3e8ac1033423702 100644 --- a/lite/kernels/host/CMakeLists.txt +++ b/lite/kernels/host/CMakeLists.txt @@ -5,3 +5,4 @@ add_kernel(fetch_compute_host Host basic SRCS fetch_compute.cc DEPS ${lite_kerne add_kernel(reshape_compute_host Host basic SRCS reshape_compute.cc DEPS ${lite_kernel_deps} reshape_op) add_kernel(multiclass_nms_compute_host Host basic SRCS multiclass_nms_compute.cc DEPS ${lite_kernel_deps}) add_kernel(crf_decoding_compute_host Host extra SRCS crf_decoding_compute.cc DEPS ${lite_kernel_deps}) +add_kernel(ctc_align_compute_host Host extra SRCS ctc_align_compute.cc DEPS ${lite_kernel_deps}) diff --git a/lite/kernels/host/ctc_align_compute.cc b/lite/kernels/host/ctc_align_compute.cc new file mode 100644 index 0000000000000000000000000000000000000000..a62c2ee15ac2752d5d3349fbaaeb18f31ac4c5a0 --- /dev/null +++ b/lite/kernels/host/ctc_align_compute.cc @@ -0,0 +1,172 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/kernels/host/ctc_align_compute.h" +#include +#include +#include +#include +#include + +namespace paddle { +namespace lite { +namespace kernels { +namespace host { + +LoD ToAbs(const LoD& in) { + if (in.empty()) return in; + LoD result; + for (auto& src : in) { + std::vector dest(src.size() + 1, 0); + for (int i = 0; i < src.size(); i++) { + dest[i + 1] = dest[i] + src[i]; + } + result.emplace_back(dest); + } + return result; +} + +LoD ToNorm(const LoD& in) { + if (in.empty()) return in; + LoD result; + for (auto& src : in) { + std::vector dest(src.size() - 1, 0); + for (int i = 0; i < dest.size(); i++) { + dest[i] = src[i + 1] - src[i]; + } + result.emplace_back(dest); + } + return result; +} + +LoD ToAbsOffset(const LoD& in) { + // the lowest level stores relative offsets + if (in.empty() || in.size() == 1) return in; + LoD result = in; + for (auto level = static_cast(in.size() - 2); level >= 0; level--) { + for (size_t i = 0; i < in[level].size(); ++i) { + size_t index = in[level][i]; + result[level][i] = result[level + 1][index]; + } + } + return result; +} + +template +void CtcAlignCompute::Run() { + auto& param = this->template Param(); + auto* input = param.input; + auto* output = param.output; + size_t blank = static_cast(param.blank); + bool merge_repeated = param.merge_repeated; + size_t padding_value = static_cast(param.padding_value); + + const auto* input_data = input->template data(); + auto input_dims = input->dims(); + auto* output_data = output->template mutable_data(); + + if (input->lod().empty()) { + auto* input_length = param.input_length; + auto* output_length = param.output_length; + CHECK(input_length != nullptr); + CHECK(output_length != nullptr); + const auto* input_length_data = input_length->template data(); + auto* output_length_data = output_length->template mutable_data(); + + for (size_t batch_id = 0; batch_id < (unsigned)input_dims[0]; batch_id++) { + T prev_token = -1; + size_t output_idx = 0; + for (size_t i = 0; i < (unsigned)input_length_data[batch_id]; i++) { + size_t input_ind = batch_id * input_dims[1] + i; + if ((unsigned)input_data[input_ind] != blank && + !(merge_repeated && input_data[input_ind] == prev_token)) { + output_data[batch_id * input_dims[1] + output_idx] = + input_data[input_ind]; + ++output_idx; + } + prev_token = input_data[input_ind]; + } + output_length_data[batch_id] = output_idx; + for (size_t j = output_idx; j < (unsigned)input_dims[1]; j++) + output_data[batch_id * input_dims[1] + j] = padding_value; + } + } else { + const size_t level = 0; + + auto input_lod = input->lod(); + input_lod = ToAbs(input->lod()); + input_lod = ToAbsOffset(input_lod); + CHECK_EQ(input_dims[0], static_cast(input_lod[level].back())); + + const size_t num_sequences = input_lod[level].size() - 1; + // merge repeated tokens and delete blank + size_t output_idx = 0; + std::vector output_lod0(1, 0); + for (size_t seq_idx = 0; seq_idx < num_sequences; ++seq_idx) { + T prev_token = -1; + for (size_t i = input_lod[level][seq_idx]; + i < input_lod[level][seq_idx + 1]; + ++i) { + if ((unsigned)input_data[i] != blank && + !(merge_repeated && input_data[i] == prev_token)) { + output_data[output_idx] = input_data[i]; + ++output_idx; + } + prev_token = input_data[i]; + } + output_lod0.push_back(static_cast(output_idx)); + } + + LoD output_lod; + output_lod.push_back(output_lod0); + output_lod = ToNorm(output_lod); + output->set_lod(output_lod); + output->Resize({static_cast(output_lod0.back()), 1}); + if (output_lod0.back() == 0) { + output->Resize({1, 1}); + output_data = output->template mutable_data(); + output_data[0] = -1; + } + } +} + +} // namespace host +} // namespace kernels +} // namespace lite +} // namespace paddle +using ctc_align_int64 = + paddle::lite::kernels::host::CtcAlignCompute; +REGISTER_LITE_KERNEL(ctc_align, kHost, kInt64, kNCHW, ctc_align_int64, def) + .BindInput("Input", + {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kInt64))}) + .BindInput("InputLength", + {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kInt64))}) + .BindOutput("Output", + {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kInt64))}) + .BindOutput("OutputLength", + {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kInt64))}) + .Finalize(); + +using ctc_align_int32 = + paddle::lite::kernels::host::CtcAlignCompute; +REGISTER_LITE_KERNEL(ctc_align, kHost, kInt32, kNCHW, ctc_align_int32, def) + .BindInput("Input", + {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kInt32))}) + .BindInput("InputLength", + {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kInt32))}) + .BindOutput("Output", + {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kInt32))}) + .BindOutput("OutputLength", + {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kInt32))}) + .Finalize(); diff --git a/lite/kernels/host/ctc_align_compute.h b/lite/kernels/host/ctc_align_compute.h new file mode 100644 index 0000000000000000000000000000000000000000..737fb3be6c96d91a3cde4a8f9053c6f7b9c7ec69 --- /dev/null +++ b/lite/kernels/host/ctc_align_compute.h @@ -0,0 +1,36 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "lite/core/kernel.h" +#include "lite/core/op_registry.h" +#include "lite/core/tensor.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace host { + +template +class CtcAlignCompute : public KernelLite { + public: + void Run() override; + + virtual ~CtcAlignCompute() = default; +}; + +} // namespace host +} // namespace kernels +} // namespace lite +} // namespace paddle diff --git a/lite/kernels/x86/CMakeLists.txt b/lite/kernels/x86/CMakeLists.txt index 98f41dd27618c4785fdbfe7216bb15b430f3bcd0..2036a343d722d5c01a4b9dcd0d4cdf682a92d218 100644 --- a/lite/kernels/x86/CMakeLists.txt +++ b/lite/kernels/x86/CMakeLists.txt @@ -2,7 +2,7 @@ if(NOT LITE_WITH_X86) return() endif() -add_kernel(activation_compute_x86 X86 basic SRCS activation_compute.cc DEPS ${lite_kernel_deps} activation_ops math_function) +add_kernel(activation_compute_x86 X86 basic SRCS activation_compute.cc DEPS ${lite_kernel_deps} math_function) # lite_cc_library(mean_compute_x86 SRCS mean_compute.cc DEPS ${lite_kernel_deps}) # lite_cc_library(fill_constant_compute_x86 SRCS fill_constant_compute.cc DEPS ${lite_kernel_deps}) # lite_cc_library(sgd_compute_x86 SRCS sgd_compute.cc DEPS ${lite_kernel_deps}) diff --git a/lite/kernels/x86/activation_compute.h b/lite/kernels/x86/activation_compute.h index d41de08d7ceccceb0c0ea77c306459118b0eb309..65d270e02fab902a1dfa92ddf27de040ef43a1b9 100644 --- a/lite/kernels/x86/activation_compute.h +++ b/lite/kernels/x86/activation_compute.h @@ -21,7 +21,7 @@ #include "lite/core/op_lite.h" #include "lite/core/op_registry.h" #include "lite/fluid/eigen.h" -#include "lite/operators/activation_ops.h" +#include "lite/operators/op_params.h" namespace paddle { namespace lite { diff --git a/lite/operators/CMakeLists.txt b/lite/operators/CMakeLists.txt index a3f419a1963bd9066d0d879add56a9d1948dc414..c7fa674bff745df29b271e10c8c4d99687a889ed 100644 --- a/lite/operators/CMakeLists.txt +++ b/lite/operators/CMakeLists.txt @@ -14,7 +14,7 @@ add_operator(reshape_op basic SRCS reshape_op.cc DEPS ${op_DEPS} ) add_operator(batch_norm_op basic SRCS batch_norm_op.cc DEPS ${op_DEPS}) add_operator(feed_op basic SRCS feed_op.cc DEPS ${op_DEPS}) add_operator(fetch_op basic SRCS fetch_op.cc DEPS ${op_DEPS}) -add_operator(activation_ops basic SRCS activation_ops.cc DEPS ${op_DEPS}) +add_operator(activation_basic_ops basic SRCS activation_ops.cc DEPS ${op_DEPS}) add_operator(elementwise_ops basic SRCS elementwise_ops.cc DEPS ${op_DEPS}) add_operator(box_coder_op_lite basic SRCS box_coder_op.cc DEPS ${op_DEPS}) add_operator(multiclass_nms_op_lite basic SRCS multiclass_nms_op.cc DEPS ${op_DEPS}) @@ -60,6 +60,7 @@ add_operator(power_op extra SRCS power_op.cc DEPS ${op_DEPS}) add_operator(norm_op extra SRCS norm_op.cc DEPS ${op_DEPS}) # 3.extra ops +add_operator(activation_extra_ops extra SRCS activation_extra_ops.cc DEPS ${op_DEPS}) add_operator(search_group_padding extra SRCS search_group_padding_op.cc DEPS ${op_DEPS}) add_operator(lrn_op_lite extra SRCS lrn_op.cc DEPS ${op_DEPS}) add_operator(decode_bboxes_op_lite extra SRCS decode_bboxes_op.cc DEPS ${op_DEPS}) @@ -106,6 +107,7 @@ add_operator(conditional_block_op_lite extra SRCS conditional_block_op.cc DEPS $ add_operator(collect_fpn_proposals_op_lite extra SRCS collect_fpn_proposals_op.cc DEPS ${op_DEPS}) add_operator(distribute_fpn_proposals_op_lite extra SRCS distribute_fpn_proposals_op.cc DEPS ${op_DEPS}) add_operator(crf_decoding_op_lite extra SRCS crf_decoding_op.cc DEPS ${op_DEPS}) +add_operator(ctc_align_op_lite extra SRCS ctc_align_op.cc DEPS ${op_DEPS}) # for OCR specific add_operator(while_op extra SRCS while_op.cc DEPS ${op_DEPS}) diff --git a/lite/operators/activation_extra_ops.cc b/lite/operators/activation_extra_ops.cc new file mode 100644 index 0000000000000000000000000000000000000000..4c773b4327abd48532a1bc9283963bd0dad19da6 --- /dev/null +++ b/lite/operators/activation_extra_ops.cc @@ -0,0 +1,32 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License.i + +#include "lite/core/op_registry.h" +#include "lite/operators/activation_ops.h" + +// Extra activation ops +REGISTER_LITE_OP(square, paddle::lite::operators::ActivationOp); +REGISTER_LITE_OP(relu_clipped, paddle::lite::operators::ActivationOp); +REGISTER_LITE_OP(swish, paddle::lite::operators::ActivationOp); +REGISTER_LITE_OP(log, paddle::lite::operators::ActivationOp); +REGISTER_LITE_OP(exp, paddle::lite::operators::ActivationOp); +REGISTER_LITE_OP(abs, paddle::lite::operators::ActivationOp); +REGISTER_LITE_OP(floor, paddle::lite::operators::ActivationOp); +REGISTER_LITE_OP(hard_sigmoid, paddle::lite::operators::ActivationOp); +REGISTER_LITE_OP(sqrt, paddle::lite::operators::ActivationOp); +REGISTER_LITE_OP(rsqrt, paddle::lite::operators::ActivationOp); +REGISTER_LITE_OP(softsign, paddle::lite::operators::ActivationOp); +REGISTER_LITE_OP(gelu, paddle::lite::operators::ActivationOp); +REGISTER_LITE_OP(hard_swish, paddle::lite::operators::ActivationOp); +REGISTER_LITE_OP(reciprocal, paddle::lite::operators::ActivationOp); diff --git a/lite/operators/activation_ops.cc b/lite/operators/activation_ops.cc index 13abe0c53e95363e7f54c56819eaac26ef720072..a3d9895955d99b96609a8c35e2493b17a11b9181 100644 --- a/lite/operators/activation_ops.cc +++ b/lite/operators/activation_ops.cc @@ -74,6 +74,14 @@ bool ActivationOp::AttachImpl(const cpp::OpDesc& opdesc, lite::Scope* scope) { } else if (opdesc.Type() == "abs") { // abs param_.active_type = lite_api::ActivationType::kAbs; + } else if (opdesc.Type() == "hard_swish") { + // hard_swish + param_.active_type = lite_api::ActivationType::kHardSwish; + param_.hard_swish_threshold = opdesc.GetAttr("threshold"); + param_.hard_swish_scale = opdesc.GetAttr("scale"); + param_.hard_swish_offset = opdesc.GetAttr("offset"); + } else if (opdesc.Type() == "reciprocal") { + param_.active_type = lite_api::ActivationType::kReciprocal; } VLOG(4) << "opdesc.Type():" << opdesc.Type(); @@ -84,21 +92,11 @@ bool ActivationOp::AttachImpl(const cpp::OpDesc& opdesc, lite::Scope* scope) { } // namespace operators } // namespace lite } // namespace paddle -REGISTER_LITE_OP(square, paddle::lite::operators::ActivationOp); -REGISTER_LITE_OP(relu, paddle::lite::operators::ActivationOp); -REGISTER_LITE_OP(leaky_relu, paddle::lite::operators::ActivationOp); -REGISTER_LITE_OP(relu_clipped, paddle::lite::operators::ActivationOp); -REGISTER_LITE_OP(prelu, paddle::lite::operators::ActivationOp); + +// Baisc activation ops REGISTER_LITE_OP(sigmoid, paddle::lite::operators::ActivationOp); REGISTER_LITE_OP(tanh, paddle::lite::operators::ActivationOp); -REGISTER_LITE_OP(swish, paddle::lite::operators::ActivationOp); +REGISTER_LITE_OP(relu, paddle::lite::operators::ActivationOp); +REGISTER_LITE_OP(leaky_relu, paddle::lite::operators::ActivationOp); REGISTER_LITE_OP(relu6, paddle::lite::operators::ActivationOp); -REGISTER_LITE_OP(log, paddle::lite::operators::ActivationOp); -REGISTER_LITE_OP(exp, paddle::lite::operators::ActivationOp); -REGISTER_LITE_OP(abs, paddle::lite::operators::ActivationOp); -REGISTER_LITE_OP(floor, paddle::lite::operators::ActivationOp); -REGISTER_LITE_OP(hard_sigmoid, paddle::lite::operators::ActivationOp); -REGISTER_LITE_OP(sqrt, paddle::lite::operators::ActivationOp); -REGISTER_LITE_OP(rsqrt, paddle::lite::operators::ActivationOp); -REGISTER_LITE_OP(softsign, paddle::lite::operators::ActivationOp); -REGISTER_LITE_OP(gelu, paddle::lite::operators::ActivationOp); +REGISTER_LITE_OP(prelu, paddle::lite::operators::ActivationOp); diff --git a/lite/operators/ctc_align_op.cc b/lite/operators/ctc_align_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..ea8e0c27059258a4e7c857c80ab64eb381446035 --- /dev/null +++ b/lite/operators/ctc_align_op.cc @@ -0,0 +1,61 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/operators/ctc_align_op.h" +#include +#include "lite/core/op_lite.h" +#include "lite/core/op_registry.h" + +namespace paddle { +namespace lite { +namespace operators { + +bool CtcAlignOpLite::CheckShape() const { + CHECK_OR_FALSE(param_.input != nullptr); + CHECK_OR_FALSE(param_.output != nullptr); + + auto* input = param_.input; + auto* input_length = param_.input_length; + auto input_lod = input->lod(); + CHECK_OR_FALSE(!input_lod.empty() || input_length != nullptr); + return true; +} + +bool CtcAlignOpLite::InferShapeImpl() const { + auto input_dims = param_.input->dims(); + // It is tricky to set the wrong dimension here. + param_.output->Resize(input_dims); + if (param_.input_length != nullptr && param_.output_length != nullptr) { + param_.output_length->Resize({input_dims[0], 1}); + } + return true; +} + +bool CtcAlignOpLite::AttachImpl(const cpp::OpDesc& op_desc, + lite::Scope* scope) { + AttachInput(op_desc, scope, "Input", false, ¶m_.input); + AttachInput(op_desc, scope, "InputLength", true, ¶m_.input_length); + AttachOutput(op_desc, scope, "Output", false, ¶m_.output); + AttachOutput(op_desc, scope, "OutputLength", true, ¶m_.output_length); + param_.blank = op_desc.GetAttr("blank"); + param_.merge_repeated = op_desc.GetAttr("merge_repeated"); + param_.padding_value = op_desc.GetAttr("padding_value"); + return true; +} + +} // namespace operators +} // namespace lite +} // namespace paddle + +REGISTER_LITE_OP(ctc_align, paddle::lite::operators::CtcAlignOpLite); diff --git a/lite/operators/ctc_align_op.h b/lite/operators/ctc_align_op.h new file mode 100644 index 0000000000000000000000000000000000000000..7593860e06c3d0104ca1f7ea7281d23149408923 --- /dev/null +++ b/lite/operators/ctc_align_op.h @@ -0,0 +1,48 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include "lite/core/op_lite.h" +#include "lite/core/scope.h" +#include "lite/operators/op_params.h" +#include "lite/utils/all.h" + +namespace paddle { +namespace lite { +namespace operators { + +class CtcAlignOpLite : public OpLite { + public: + CtcAlignOpLite() {} + + explicit CtcAlignOpLite(const std::string &op_type) : OpLite(op_type) {} + + bool CheckShape() const override; + + bool InferShapeImpl() const override; + + bool AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) override; + + void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); } + + std::string DebugString() const override { return "ctc_align"; } + + private: + mutable CtcAlignParam param_; +}; + +} // namespace operators +} // namespace lite +} // namespace paddle diff --git a/lite/operators/op_params.h b/lite/operators/op_params.h index 5ed02a6eb61423c959e0940bce763622b29b6a33..466de112fb2983e325b2bec17e90018984d7e233 100644 --- a/lite/operators/op_params.h +++ b/lite/operators/op_params.h @@ -336,17 +336,22 @@ struct ConcatParam : ParamBase { /// ----------------------- activation operators ---------------------- struct ActivationParam : ParamBase { const lite::Tensor* X{}; + lite::Tensor* Out{}; + lite_api::ActivationType active_type; + bool has_active{false}; float Leaky_relu_alpha{0}; // leaky_relu param float Relu_clipped_coef{6}; // relu_clipped param std::string Prelu_mode{ "channel"}; // prelu param, can be "all", "channel" or "element" lite::Tensor* Prelu_alpha{}; // prelu param float Swish_beta; // swish param + // hard_sigmoid param float hard_sigmoid_slope{0.2}; float hard_sigmoid_offset{0.5}; - lite::Tensor* Out{}; - bool has_active{false}; - lite_api::ActivationType active_type; + // hard_swish param + float hard_swish_threshold{6.0}; + float hard_swish_scale{6.0}; + float hard_swish_offset{3.0}; }; struct ActivationGradParam : ParamBase { @@ -1444,6 +1449,16 @@ struct CrfDecodingParam : ParamBase { lite::Tensor* viterbi_path{}; }; +struct CtcAlignParam : ParamBase { + lite::Tensor* input{}; + lite::Tensor* input_length{}; + lite::Tensor* output{}; + lite::Tensor* output_length{}; + int blank{0}; + bool merge_repeated{true}; + int padding_value{0}; +}; + struct XPUResNet50Param : ParamBase { lite::Tensor* input{}; std::vector filter; diff --git a/lite/tests/kernels/CMakeLists.txt b/lite/tests/kernels/CMakeLists.txt index e108e35af76c6b5f2c5719b650b06d849a2f3887..60e5a928330c2bc132a35f26f81ea09887d9c313 100644 --- a/lite/tests/kernels/CMakeLists.txt +++ b/lite/tests/kernels/CMakeLists.txt @@ -61,6 +61,7 @@ if(LITE_BUILD_EXTRA) lite_cc_test(test_kernel_lookup_table_compute SRCS lookup_table_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_lookup_table_dequant_compute SRCS lookup_table_dequant_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_gather_compute SRCS gather_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_ctc_align_compute SRCS ctc_align_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) # for training kernel if (LITE_WITH_TRAIN) diff --git a/lite/tests/kernels/activation_compute_test.cc b/lite/tests/kernels/activation_compute_test.cc index 5a0b033b1b8c4d8f28aa05c3f2fcac40f2569bf4..c71eac8d4532eefd5569421807c85128746c6c8b 100644 --- a/lite/tests/kernels/activation_compute_test.cc +++ b/lite/tests/kernels/activation_compute_test.cc @@ -36,7 +36,9 @@ enum activation_type_test { FLOOR, RSQRT, GELU, - SQUARE + SQUARE, + HARD_SWISH, + RECIPROCAL }; class ActivationComputeTester : public arena::TestCase { @@ -49,6 +51,9 @@ class ActivationComputeTester : public arena::TestCase { float relu_clipped_coef_ = 6.; std::string prelu_mode_ = ""; float swish_beta_ = 0.; + float hard_swish_threshold = 6.0; + float hard_swish_scale = 6.0; + float hard_swish_offset = 3.0; DDim dims_{{1}}; std::string type_ = ""; activation_type_test act_type_ = RELU; @@ -199,6 +204,20 @@ class ActivationComputeTester : public arena::TestCase { } break; } + case HARD_SWISH: { + for (int i = 0; i < dims_.production(); i++) { + float max_value = std::max(0.f, x_data[i] + hard_swish_offset); + float min_value = std::min(max_value, hard_swish_threshold); + output_data[i] = min_value * x_data[i] / hard_swish_scale; + } + break; + } + case RECIPROCAL: { + for (int i = 0; i < dims_.production(); i++) { + output_data[i] = 1.0 / x_data[i]; + } + break; + } default: LOG(INFO) << "the type of activation is unknow."; } @@ -221,6 +240,11 @@ class ActivationComputeTester : public arena::TestCase { if (act_type_ == SWISH) { op_desc->SetAttr("beta", swish_beta_); } + if (act_type_ == HARD_SWISH) { + op_desc->SetAttr("threshold", hard_swish_threshold); + op_desc->SetAttr("scale", hard_swish_scale); + op_desc->SetAttr("offset", hard_swish_offset); + } } void PrepareData() override { @@ -552,5 +576,61 @@ TEST(Activation_gelu, precision) { } } +TEST(activation_hard_swish, precision) { + LOG(INFO) << "test hard_swish op"; + Place place; + float abs_error = 2e-5; + +#if defined(LITE_WITH_ARM) + place = TARGET(kARM); +#else + return; +#endif + + for (auto dims : std::vector>{ + {1, 3, 2, 4}, {2, 3, 4}, {5, 4}, {8}}) { + std::unique_ptr tester( + new ActivationComputeTester(place, + "def", + 0.01, + 6., + "all", + 0., + DDim(dims), + "hard_swish", + HARD_SWISH)); + arena::Arena arena(std::move(tester), place, abs_error); + arena.TestPrecision(); + } +} + +TEST(activation_reciprocal, precision) { + LOG(INFO) << "test reciprocal op"; + Place place; + float abs_error = 2e-5; + +#if defined(LITE_WITH_ARM) + place = TARGET(kARM); +#else + return; +#endif + + for (auto dims : std::vector>{ + {1, 3, 2, 4}, {2, 3, 4}, {5, 4}, {8}}) { + std::unique_ptr tester( + new ActivationComputeTester(place, + "def", + 0.01, + 6., + "all", + 0., + DDim(dims), + "reciprocal", + RECIPROCAL)); + arena::Arena arena(std::move(tester), place, abs_error); + arena.TestPrecision(); + } +} + } // namespace lite } // namespace paddle diff --git a/lite/tests/kernels/ctc_align_compute_test.cc b/lite/tests/kernels/ctc_align_compute_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..e32012549cab42858938388857c65e14f65be099 --- /dev/null +++ b/lite/tests/kernels/ctc_align_compute_test.cc @@ -0,0 +1,254 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include "lite/api/paddle_use_kernels.h" +#include "lite/api/paddle_use_ops.h" +#include "lite/core/arena/framework.h" + +namespace paddle { +namespace lite { + +class CtcAlignComputeTester : public arena::TestCase { + protected: + // common attributes for this op. + std::string input_ = "input"; + std::string input_length_ = "input_length"; + std::string output_ = "output"; + std::string output_length_ = "output_length"; + std::vector input_data_; + std::vector input_shape_; + std::vector> input_lod_; + std::vector input_length_data_; + std::vector input_length_shape_; + std::vector output_data_; + std::vector output_shape_; + std::vector> output_lod_; + std::vector output_length_data_; + std::vector output_length_shape_; + int blank_; + bool merge_repeated_; + int padding_value_; + + public: + CtcAlignComputeTester(const Place& place, + const std::string& alias, + const std::vector& input_data, + const std::vector input_shape, + const std::vector>& input_lod, + const std::vector& input_length_data, + const std::vector input_length_shape, + const int blank, + const bool merge_repeated, + const int padding_value, + const std::vector& output_data, + const std::vector& output_shape, + const std::vector>& output_lod, + const std::vector& output_length_data, + const std::vector& output_length_shape) + : TestCase(place, alias) { + input_data_ = input_data; + input_shape_ = input_shape; + input_lod_ = input_lod; + input_length_data_ = input_length_data; + input_length_shape_ = input_length_shape; + blank_ = blank; + merge_repeated_ = merge_repeated; + padding_value_ = padding_value; + output_data_ = output_data; + output_shape_ = output_shape; + output_lod_ = output_lod; + output_length_data_ = output_length_data; + output_length_shape_ = output_length_shape; + } + + void RunBaseline(Scope* scope) override { + auto* output_tensor = scope->NewTensor(output_); + output_tensor->Resize(output_shape_); + if (!output_lod_.empty()) { + output_tensor->set_lod(output_lod_); + } + auto* output_data = output_tensor->mutable_data(); + int64_t output_num = 1; + for (auto e : output_shape_) { + output_num *= e; + } + for (int i = 0; i < output_num; i++) { + output_data[i] = output_data_[i]; + } + + if (!input_length_data_.empty() && !output_length_data_.empty()) { + auto* output_length_tensor = scope->NewTensor(output_length_); + output_length_tensor->Resize(output_length_shape_); + auto* output_length_data = output_length_tensor->mutable_data(); + int64_t num = 1; + for (auto e : output_length_shape_) { + num *= e; + } + for (int i = 0; i < num; i++) { + output_length_data[i] = output_length_data_[i]; + } + } + } + + void PrepareOpDesc(cpp::OpDesc* op_desc) { + op_desc->SetType("ctc_align"); + op_desc->SetInput("Input", {input_}); + op_desc->SetOutput("Output", {output_}); + if (!input_length_data_.empty()) { + op_desc->SetInput("InputLength", {input_length_}); + op_desc->SetOutput("OutputLength", {output_length_}); + } + op_desc->SetAttr("blank", blank_); + op_desc->SetAttr("merge_repeated", merge_repeated_); + op_desc->SetAttr("padding_value", padding_value_); + } + + void PrepareData() override { + SetCommonTensor(input_, DDim(input_shape_), input_data_.data(), input_lod_); + if (!input_length_data_.empty()) { + SetCommonTensor( + input_length_, DDim(input_length_shape_), input_length_data_.data()); + } + } +}; +TEST(CtcAlign1, precision) { + LOG(INFO) << "test ctc_align op"; +#ifdef LITE_WITH_ARM + // Define variable + const std::vector& input_data = { + 0, 1, 2, 2, 0, 4, 0, 4, 5, 0, 6, 6, 0, 0, 7, 7, 7, 0}; + const std::vector input_shape = {18, 1}; + const std::vector> input_lod = {{11, 7}}; + const std::vector input_length_data = {}; + const std::vector input_length_shape = {}; + const int blank = 0; + const bool merge_repeated = false; + const int padding_value = 0; + const std::vector output_data = {1, 2, 2, 4, 4, 5, 6, 6, 7, 7, 7}; + const std::vector output_shape = {11, 1}; + const std::vector> output_lod = {{7, 4}}; + const std::vector output_length_data = {}; + const std::vector output_length_shape = {}; + + // Test + Place place(TARGET(kHost), PRECISION(kInt32)); + std::unique_ptr tester( + new CtcAlignComputeTester(place, + "def", + input_data, + input_shape, + input_lod, + input_length_data, + input_length_shape, + blank, + merge_repeated, + padding_value, + output_data, + output_shape, + output_lod, + output_length_data, + output_length_shape)); + arena::Arena arena(std::move(tester), place, 2e-5); + arena.TestPrecision(); +#endif +} + +TEST(CtcAlign2, precision) { + LOG(INFO) << "test ctc_align op"; +#ifdef LITE_WITH_ARM + // Define variable + const std::vector& input_data = { + 0, 1, 2, 2, 0, 4, 0, 4, 5, 0, 6, 0, 0, 7, 7, 7, 0, 0}; + const std::vector input_shape = {3, 6}; + const std::vector> input_lod = {}; + const std::vector input_length_data = {6, 5, 4}; + const std::vector input_length_shape = {3, 1}; + const int blank = 0; + const bool merge_repeated = true; + const int padding_value = 0; + const std::vector output_data = { + 1, 2, 4, 0, 0, 0, 4, 5, 6, 0, 0, 0, 7, 0, 0, 0, 0, 0}; + const std::vector output_shape = {3, 6}; + const std::vector> output_lod = {}; + const std::vector output_length_data = {3, 3, 1}; + const std::vector output_length_shape = {3, 1}; + + // Test + Place place(TARGET(kHost), PRECISION(kInt32)); + std::unique_ptr tester( + new CtcAlignComputeTester(place, + "def", + input_data, + input_shape, + input_lod, + input_length_data, + input_length_shape, + blank, + merge_repeated, + padding_value, + output_data, + output_shape, + output_lod, + output_length_data, + output_length_shape)); + arena::Arena arena(std::move(tester), place, 2e-5); + arena.TestPrecision(); +#endif +} + +TEST(CtcAlign3, precision) { + LOG(INFO) << "test ctc_align op"; +#ifdef LITE_WITH_ARM + // Define variable + const std::vector& input_data = { + 0, 1, 2, 2, 0, 4, 0, 4, 5, 0, 6, 0, 0, 7, 7, 7, 0, 0}; + const std::vector input_shape = {3, 6}; + const std::vector> input_lod = {}; + const std::vector input_length_data = {6, 5, 4}; + const std::vector input_length_shape = {3, 1}; + const int blank = 0; + const bool merge_repeated = false; + const int padding_value = 0; + const std::vector output_data = { + 1, 2, 2, 4, 0, 0, 4, 5, 6, 0, 0, 0, 7, 7, 7, 0, 0, 0}; + const std::vector output_shape = {3, 6}; + const std::vector> output_lod = {}; + const std::vector output_length_data = {4, 3, 3}; + const std::vector output_length_shape = {3, 1}; + + // Test + Place place(TARGET(kHost), PRECISION(kInt32)); + std::unique_ptr tester( + new CtcAlignComputeTester(place, + "def", + input_data, + input_shape, + input_lod, + input_length_data, + input_length_shape, + blank, + merge_repeated, + padding_value, + output_data, + output_shape, + output_lod, + output_length_data, + output_length_shape)); + arena::Arena arena(std::move(tester), place, 2e-5); + arena.TestPrecision(); +#endif +} +} // namespace lite +} // namespace paddle