diff --git a/lite/backends/arm/math/conv_impl.cc b/lite/backends/arm/math/conv_impl.cc index 96d0893bc0f0a1c145f4e58dd2caecfba78786ab..9412fc43f1eba87f5685256eeb6435bfad4438eb 100644 --- a/lite/backends/arm/math/conv_impl.cc +++ b/lite/backends/arm/math/conv_impl.cc @@ -573,6 +573,22 @@ template void conv_im2col_gemm_int8(const int8_t* i_data, ARMContext* ctx, const float* scale); +template void im2col(const float* data_im, + int channels, + int height, + int width, + int kernel_h, + int kernel_w, + int pad_top, + int pad_bottom, + int pad_left, + int pad_right, + int stride_h, + int stride_w, + int dilation_h, + int dilation_w, + float* data_col); + void conv_depthwise_3x3_fp32(const void* din, void* dout, int num, diff --git a/lite/backends/arm/math/conv_impl.h b/lite/backends/arm/math/conv_impl.h index 60f74b7feecc91a2fe8262a1fea4dce26430031d..28a2fb7e2a42a27e9ecd3d42b25f9942b481004e 100644 --- a/lite/backends/arm/math/conv_impl.h +++ b/lite/backends/arm/math/conv_impl.h @@ -359,6 +359,24 @@ void conv_compute_2x2_3x3_small(const float* input, const float* bias, const operators::ConvParam& param, ARMContext* ctx); + +template +void im2col(const Dtype* data_im, + int channels, + int height, + int width, + int kernel_h, + int kernel_w, + int pad_top, + int pad_bottom, + int pad_left, + int pad_right, + int stride_h, + int stride_w, + int dilation_h, + int dilation_w, + Dtype* data_col); + } // namespace math } // namespace arm } // namespace lite diff --git a/lite/kernels/arm/CMakeLists.txt b/lite/kernels/arm/CMakeLists.txt index f16cbe7f663a56cb41908e5dc360c349aea58842..c24e769ad10055a905de6aefc4879ed04e8b962e 100644 --- a/lite/kernels/arm/CMakeLists.txt +++ b/lite/kernels/arm/CMakeLists.txt @@ -68,6 +68,7 @@ add_kernel(reduce_max_compute_arm ARM extra SRCS reduce_max_compute.cc DEPS ${li add_kernel(sequence_expand_compute_arm ARM extra SRCS sequence_expand_compute.cc DEPS ${lite_kernel_deps} math_arm) add_kernel(im2sequence_compute_arm ARM extra SRCS im2sequence_compute.cc DEPS ${lite_kernel_deps} math_arm) add_kernel(sequence_pool_compute_arm ARM extra SRCS sequence_pool_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(sequence_conv_compute_arm ARM extra SRCS sequence_conv_compute.cc DEPS ${lite_kernel_deps} math_arm) add_kernel(layer_norm_compute_arm ARM extra SRCS layer_norm_compute.cc DEPS ${lite_kernel_deps} math_arm) add_kernel(gather_compute_arm ARM extra SRCS gather_compute.cc DEPS ${lite_kernel_deps} math_arm) add_kernel(reduce_prod_compute_arm ARM extra SRCS reduce_prod_compute.cc DEPS ${lite_kernel_deps} math_arm) @@ -88,6 +89,7 @@ add_kernel(gru_unit_compute_arm ARM extra SRCS gru_unit_compute.cc DEPS ${lite_k add_kernel(gru_compute_arm ARM extra SRCS gru_compute.cc DEPS ${lite_kernel_deps} math_arm) add_kernel(beam_search_decode_compute_arm ARM extra SRCS beam_search_decode_compute.cc DEPS ${lite_kernel_deps} math_arm) add_kernel(lookup_table_compute_arm ARM extra SRCS lookup_table_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(lookup_table_dequant_compute_arm ARM extra SRCS lookup_table_dequant_compute.cc DEPS ${lite_kernel_deps} math_arm) add_kernel(logical_compute_arm ARM extra SRCS logical_compute.cc DEPS ${lite_kernel_deps} math_arm) add_kernel(sequence_softmax_compute_arm ARM extra SRCS sequence_softmax_compute.cc DEPS ${lite_kernel_deps} math_arm) add_kernel(less_than_arm ARM extra SRCS compare_compute.cc DEPS ${lite_kernel_deps} math_arm) diff --git a/lite/kernels/arm/lookup_table_dequant_compute.cc b/lite/kernels/arm/lookup_table_dequant_compute.cc new file mode 100644 index 0000000000000000000000000000000000000000..6127e4279b14c40af1cde14b267581426f9ffaa1 --- /dev/null +++ b/lite/kernels/arm/lookup_table_dequant_compute.cc @@ -0,0 +1,100 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/kernels/arm/lookup_table_dequant_compute.h" +#include +#include +#include "lite/api/paddle_place.h" +#include "lite/backends/arm/math/funcs.h" +#include "lite/core/op_registry.h" +#include "lite/core/tensor.h" +#include "lite/core/type_system.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace arm { + +void dequant(const unsigned char *in, + float *out, + float min, + float max, + int emb_size, + int pow_2_bits) { + float scale = (max - min) / pow_2_bits; + for (int i = 0; i < emb_size; ++i) { + float x = scale * static_cast(in[i]) + min; + out[i] = x; + } +} + +void LookupTableDequantCompute::Run() { + auto ¶m = this->Param(); + // inputs + auto w = param.W; + auto ids = param.Ids; + // outputs + auto out = param.Out; + + auto table_dim = w->dims(); + int64_t ids_numel = ids->numel(); + auto ids_data = ids->data(); + + int64_t row_number = table_dim[0]; + int64_t quant_number = table_dim[1]; + int64_t row_width = (quant_number - 2) * 4; + + auto table_data = w->data(); + auto dout = out->mutable_data(); + int pow_2_bits = static_cast(pow(2, 8)); + + for (int64_t i = 0; i < ids_numel; ++i) { + int ids_int = ids_data[i]; + if (param.padding_idx != -1 && ids_data[i] == param.padding_idx) { + memset(dout + i * row_width, 0, row_width * sizeof(float)); + } else { + CHECK_LT(ids_data[i], row_number) + << "look uptable ids[i] < row_number check failed"; + CHECK_GE(ids_data[i], 0) << "lookuptable ids[i] >= 0 check failed"; + float min = *(table_data + ids_data[i] * quant_number); + float max = *(table_data + ids_data[i] * quant_number + 1); + int offset = ids_data[i] * quant_number + 2; + const unsigned char *tensor_buf = + reinterpret_cast(table_data + offset); + dequant( + tensor_buf, dout + i * row_width, min, max, row_width, pow_2_bits); + + // memcpy(dout + i * row_width, + // table_data + ids_int * row_width, + // row_width * sizeof(float)); + } + } + *(out->mutable_lod()) = ids->lod(); +} + +} // namespace arm +} // namespace kernels +} // namespace lite +} // namespace paddle + +REGISTER_LITE_KERNEL(lookup_table_dequant, + kARM, + kAny, + kNCHW, + paddle::lite::kernels::arm::LookupTableDequantCompute, + def) + .BindInput("W", {LiteType::GetTensorTy(TARGET(kARM))}) + .BindInput("Ids", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt64))}) + .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))}) + .Finalize(); diff --git a/lite/kernels/arm/lookup_table_dequant_compute.h b/lite/kernels/arm/lookup_table_dequant_compute.h new file mode 100644 index 0000000000000000000000000000000000000000..e1a41dcdf20fb287d3cc2022832cbb9a25e93eb4 --- /dev/null +++ b/lite/kernels/arm/lookup_table_dequant_compute.h @@ -0,0 +1,39 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include "lite/core/kernel.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace arm { + +class LookupTableDequantCompute + : public KernelLite { + public: + using param_t = operators::LookupTableDequantParam; + + LookupTableDequantCompute() = default; + + void Run() override; + + virtual ~LookupTableDequantCompute() = default; +}; + +} // namespace arm +} // namespace kernels +} // namespace lite +} // namespace paddle diff --git a/lite/kernels/arm/sequence_conv_compute.cc b/lite/kernels/arm/sequence_conv_compute.cc new file mode 100644 index 0000000000000000000000000000000000000000..a70b6717097ec0ffdaa24ba257bfdf8dbd536f3f --- /dev/null +++ b/lite/kernels/arm/sequence_conv_compute.cc @@ -0,0 +1,150 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "lite/kernels/arm/sequence_conv_compute.h" +#include +#include +#include +#include +#include "lite/backends/arm/math/conv_impl.h" +#include "lite/backends/arm/math/sgemm.h" +#include "lite/core/op_registry.h" +#include "lite/core/tensor.h" +#include "lite/core/type_system.h" +#include "lite/operators/op_params.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace arm { + +template +void local_naive_transpose(const Dtype* din, Dtype* dout, int m, int n) { + int k = 0; + for (int i = 0; i < n; ++i) { + for (int j = 0; j < m; ++j) { + dout[k++] = din[j * n + i]; + } + } +} + +void SequenceConvCompute::PrepareForRun() {} + +void SequenceConvCompute::Run() { + // param.X is in shape: [sequence_len, hidden_dim]; + // param.Filter is in shape: [kernel_size * hidden_dim, kernel_num] + // param.contextLength : kernel_size + // param.contextStart: for padding idx + // param.Out is in shape [new_sequence_len, kernel_num] + auto& param = this->Param(); + auto& ctx = this->ctx_->template As(); + const auto* in_data = param.X->data(); + const auto* filter_data = param.Filter->data(); + float* out_data = param.Out->mutable_data(); + int pad_start = param.contextStart; + int kernel_size = param.contextLength; + int kernel_num = param.Filter->dims()[1]; + int up_pad = std::max(0, -pad_start); + int down_pad = std::max(0, pad_start + kernel_size - 1); + auto hidden_dim = static_cast(param.X->dims()[1]); + auto sequence_len = static_cast(param.X->dims()[0]); + auto lod = param.X->lod(); + + // Im2Col + lite::Tensor col; + lite::Tensor tmp; + col.Resize({sequence_len, kernel_size * hidden_dim}); + auto* col_data = col.mutable_data(); + auto lod_level_0 = lod[0]; + int input_row_begin, input_row_end; + for (int i = 0; i < static_cast(lod_level_0.size()) - 1; i++) { + if (lod_level_0[i] == lod_level_0[i + 1]) continue; + input_row_begin = (pad_start > 0) + ? static_cast(lod_level_0[i]) + pad_start + : static_cast(lod_level_0[i]); + input_row_end = static_cast(lod_level_0[i + 1]); + + if (input_row_begin < input_row_end) { + // do im2col + auto* sub_in_data = in_data + input_row_begin * hidden_dim; + auto* sub_col_data = + col_data + input_row_begin * kernel_size * hidden_dim; + tmp.Resize({kernel_size * hidden_dim, input_row_end - input_row_begin}); + auto* tmp_data = tmp.mutable_data(); + // Image Col: [input_channels, filter_height, filter_width, output_height, + // output_width] + // sequence Col: [1, kernel_size, hidden_dim, sequence_len, 1] + paddle::lite::arm::math::im2col( + sub_in_data, + 1, + sequence_len, + hidden_dim, // C H W -> 1, seq_len, hidden_dim + kernel_size, + hidden_dim, // kernel_h, kernel_w + up_pad, + down_pad, + 0, + 0, // pad_top, pad_bottom, pad_left, pad_right + 1, + 1, + 1, + 1, // stride_h, stride_w, dilation_h, dilation_w + tmp_data); + local_naive_transpose(tmp_data, + sub_col_data, + kernel_size * hidden_dim, + input_row_end - input_row_begin); + } + } + + // SGDMM C := alpha * A * B + beta * C + // matmul: col * filter_data + // [sequence_len, kernel_size * hidden_dim] * [kernel_size * hidden_dim, + // kernel_num] + // = [sequence_len, kernel_num] + paddle::lite::operators::ActivationParam act_param; + paddle::lite::arm::math::sgemm(false, + false, // is_transB, + sequence_len, // M + kernel_num, // N + kernel_size * hidden_dim, // K + 1.0f, // alpha + col_data, // A + kernel_size * hidden_dim, // lda: k + filter_data, // B + kernel_num, // ldb: n + 0.f, // beta + out_data, // C + kernel_num, // ldc: n + NULL, // bias + false, // is_bias + act_param, // act_param + &ctx); // ctx +} + +} // namespace arm +} // namespace kernels +} // namespace lite +} // namespace paddle + +REGISTER_LITE_KERNEL(sequence_conv, + kARM, + kFloat, + kNCHW, + paddle::lite::kernels::arm::SequenceConvCompute, + def) + .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))}) + .BindInput("Filter", {LiteType::GetTensorTy(TARGET(kARM))}) + .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))}) + .Finalize(); diff --git a/lite/kernels/arm/sequence_conv_compute.h b/lite/kernels/arm/sequence_conv_compute.h new file mode 100644 index 0000000000000000000000000000000000000000..d63b72b006460577ad37c0b68808e02852142e52 --- /dev/null +++ b/lite/kernels/arm/sequence_conv_compute.h @@ -0,0 +1,39 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include "lite/core/kernel.h" +#include "lite/core/op_registry.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace arm { + +class SequenceConvCompute : public KernelLite { + public: + void PrepareForRun() override; + + void Run() override; + + virtual ~SequenceConvCompute() = default; + + private: +}; + +} // namespace arm +} // namespace kernels +} // namespace lite +} // namespace paddle diff --git a/lite/operators/CMakeLists.txt b/lite/operators/CMakeLists.txt index eb39924e715ea09c5c7bd5f4bec6a1128eace814..2f6f5a6d73770c09692e897492238e7c8018e14a 100644 --- a/lite/operators/CMakeLists.txt +++ b/lite/operators/CMakeLists.txt @@ -90,6 +90,7 @@ add_operator(reduce_prod_op_lite extra SRCS reduce_prod_op.cc DEPS ${op_DEPS}) add_operator(sequence_reshape_op_lite extra SRCS sequence_reshape_op.cc DEPS ${op_DEPS}) add_operator(sequence_reverse_op_lite extra SRCS sequence_reverse_op.cc DEPS ${op_DEPS}) add_operator(sequence_pool extra SRCS sequence_pool_op.cc DEPS ${op_DEPS}) +add_operator(sequence_conv extra SRCS sequence_conv_op.cc DEPS ${op_DEPS}) add_operator(sequence_pool_concat extra SRCS sequence_pool_concat_op.cc DEPS ${op_DEPS}) add_operator(reduce_sum_op_lite extra SRCS reduce_ops.cc DEPS ${op_DEPS}) add_operator(match_matrix_tensor_op_lite extra SRCS match_matrix_tensor_op.cc DEPS ${op_DEPS}) @@ -107,6 +108,7 @@ add_operator(distribute_fpn_proposals_op_lite extra SRCS distribute_fpn_proposal # for OCR specific add_operator(while_op extra SRCS while_op.cc DEPS ${op_DEPS}) add_operator(lookup_table_op extra SRCS lookup_table_op.cc DEPS ${op_DEPS}) +add_operator(lookup_table_dequant_op extra SRCS lookup_table_dequant_op.cc DEPS ${op_DEPS}) add_operator(lookup_table_v2_op extra SRCS lookup_table_v2_op.cc DEPS ${op_DEPS}) add_operator(beam_search_decode_op extra SRCS beam_search_decode_op.cc DEPS ${op_DEPS}) add_operator(logical_xor extra SRCS logical_op.cc DEPS ${op_DEPS}) diff --git a/lite/operators/lookup_table_dequant_op.cc b/lite/operators/lookup_table_dequant_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..b81043bfbfeed356e3d67065686057adfadcb25f --- /dev/null +++ b/lite/operators/lookup_table_dequant_op.cc @@ -0,0 +1,72 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/operators/lookup_table_dequant_op.h" +#include "lite/core/op_lite.h" +#include "lite/core/op_registry.h" + +namespace paddle { +namespace lite { +namespace operators { + +bool LookupTableDequantOpLite::CheckShape() const { + CHECK_OR_FALSE(param_.W) + CHECK_OR_FALSE(param_.Ids) + CHECK_OR_FALSE(param_.Out) + + const auto& table_dims = param_.W->dims(); + const auto& ids_dims = param_.Ids->dims(); + + int ids_rank = ids_dims.size(); + + CHECK_EQ_OR_FALSE(table_dims.size(), 2); + CHECK_EQ_OR_FALSE(ids_dims[ids_rank - 1], 1); + CHECK_GT_OR_FALSE(table_dims[1], 2); + return true; +} + +bool LookupTableDequantOpLite::InferShape() const { + const auto& table_dims = param_.W->dims(); + const auto& ids_dims = param_.Ids->dims(); + + auto out_dims = ids_dims; + int ids_rank = ids_dims.size(); + out_dims[ids_rank - 1] = (table_dims[1] - 2) * 4; + + param_.Out->Resize(out_dims); + param_.Out->set_lod(param_.Ids->lod()); + return true; +} + +bool LookupTableDequantOpLite::AttachImpl(const cpp::OpDesc& op_desc, + lite::Scope* scope) { + auto input = op_desc.Input("W").front(); + auto ids = op_desc.Input("Ids").front(); + auto out = op_desc.Output("Out").front(); + + param_.W = scope->FindVar(input)->GetMutable(); + param_.Ids = scope->FindVar(ids)->GetMutable(); + param_.Out = scope->FindVar(out)->GetMutable(); + + param_.padding_idx = op_desc.GetAttr("padding_idx"); + + return true; +} + +} // namespace operators +} // namespace lite +} // namespace paddle + +REGISTER_LITE_OP(lookup_table_dequant, + paddle::lite::operators::LookupTableDequantOpLite) diff --git a/lite/operators/lookup_table_dequant_op.h b/lite/operators/lookup_table_dequant_op.h new file mode 100644 index 0000000000000000000000000000000000000000..3a9683d5ca0d87365cb240b91dccab07cf26ca71 --- /dev/null +++ b/lite/operators/lookup_table_dequant_op.h @@ -0,0 +1,47 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include +#include "lite/core/op_lite.h" +#include "lite/core/scope.h" +#include "lite/utils/all.h" + +namespace paddle { +namespace lite { +namespace operators { + +class LookupTableDequantOpLite : public OpLite { + public: + LookupTableDequantOpLite() {} + explicit LookupTableDequantOpLite(const std::string &op_type) + : OpLite(op_type) {} + + bool CheckShape() const override; + + bool InferShape() const override; + + bool AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) override; + + void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); } + std::string DebugString() const override { return "LookupTableDequant"; } + + private: + mutable LookupTableDequantParam param_; +}; + +} // namespace operators +} // namespace lite +} // namespace paddle diff --git a/lite/operators/op_params.h b/lite/operators/op_params.h index bdb885be37ed1b9e543a3e318f2a4e5cc371a626..d3edbe2a0299688d7069e1e1d92a95e89566762b 100644 --- a/lite/operators/op_params.h +++ b/lite/operators/op_params.h @@ -668,6 +668,13 @@ struct LookupTableParam { int64_t padding_idx{-1}; }; +struct LookupTableDequantParam { + lite::Tensor* W{nullptr}; + lite::Tensor* Ids{nullptr}; + lite::Tensor* Out{nullptr}; + int64_t padding_idx{-1}; +}; + struct Im2SequenceParam { const lite::Tensor* X{}; const lite::Tensor* Y{}; @@ -772,6 +779,15 @@ struct SequencePoolParam { #endif }; +struct SequenceConvParam { + const lite::Tensor* X{}; + const lite::Tensor* Filter{}; + lite::Tensor* Out{}; + int contextStart{0}; + int contextStride{1}; + int contextLength; +}; + struct SequencePoolConcatParam { std::vector X{}; lite::Tensor* Out{}; diff --git a/lite/operators/sequence_conv_op.cc b/lite/operators/sequence_conv_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..89596a22c616b45d0e72cc14501e4f6c148ad86c --- /dev/null +++ b/lite/operators/sequence_conv_op.cc @@ -0,0 +1,94 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/operators/sequence_conv_op.h" +#include "lite/core/op_registry.h" + +namespace paddle { +namespace lite { +namespace operators { + +bool SequenceConvOp::CheckShape() const { + CHECK_OR_FALSE(param_.X); + CHECK_OR_FALSE(param_.Filter); + CHECK_OR_FALSE(param_.Out); + + // currently we only support the case that + // the contextStride is equal to 1 + int context_length = param_.contextLength; + int context_start = param_.contextStart; + CHECK_EQ_OR_FALSE(param_.contextStride, 1UL); + CHECK_GT_OR_FALSE(context_start, -context_length); + CHECK_GE_OR_FALSE(0, context_start); + + const auto *filter = param_.Filter; + auto lod = param_.X->lod(); + auto filter_dims = filter->dims(); + auto in_dims = param_.X->dims(); + CHECK_EQ_OR_FALSE(in_dims.size(), 2UL); + CHECK_EQ_OR_FALSE(filter_dims.size(), 2UL); + CHECK_EQ_OR_FALSE(lod.size(), 1UL); + CHECK_EQ_OR_FALSE(filter_dims[0], context_length * in_dims[1]); + CHECK_GE_OR_FALSE(in_dims[0], (static_cast(lod[0].size()) - 1)); + return true; +} + +bool SequenceConvOp::InferShape() const { + const auto *input = param_.X; + const auto *filter = param_.Filter; + auto in_dims = input->dims(); + auto filter_dims = filter->dims(); + auto out_dims = in_dims; + out_dims[1] = filter_dims[1]; + param_.Out->Resize(out_dims); + param_.Out->set_lod(param_.X->lod()); + return true; +} + +bool SequenceConvOp::AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) { + // required params + param_.X = const_cast( + &scope->FindVar(opdesc.Input("X").front())->Get()); + param_.Filter = const_cast( + &scope->FindVar(opdesc.Input("Filter").front())->Get()); + param_.Out = + scope->FindVar(opdesc.Output("Out").front())->GetMutable(); + param_.contextStart = opdesc.GetAttr("contextStart"); + param_.contextStride = opdesc.GetAttr("contextStride"); + param_.contextLength = opdesc.GetAttr("contextLength"); + + // PaddingData is not supported for now + std::vector input_arg_names = opdesc.InputArgumentNames(); + if (std::find(input_arg_names.begin(), + input_arg_names.end(), + "PaddingData") != input_arg_names.end()) { + auto padding_data_arguments = opdesc.Input("PaddingData"); + CHECK_EQ_OR_FALSE(padding_data_arguments.size(), 0); + } + + // paddingTrainable == True is not supported for now. + if (opdesc.HasAttr("paddingTrainable")) { + CHECK_OR_FALSE(!opdesc.GetAttr("paddingTrainable")); + } + CHECK(param_.X); + CHECK(param_.Filter); + CHECK(param_.Out); + return true; +} + +} // namespace operators +} // namespace lite +} // namespace paddle + +REGISTER_LITE_OP(sequence_conv, paddle::lite::operators::SequenceConvOp); diff --git a/lite/operators/sequence_conv_op.h b/lite/operators/sequence_conv_op.h new file mode 100644 index 0000000000000000000000000000000000000000..34d65d3cc9324aea7b50a1d939a594b817889896 --- /dev/null +++ b/lite/operators/sequence_conv_op.h @@ -0,0 +1,43 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include +#include "lite/core/op_lite.h" +#include "lite/core/scope.h" +#include "lite/utils/all.h" + +namespace paddle { +namespace lite { +namespace operators { + +class SequenceConvOp : public OpLite { + public: + SequenceConvOp() {} + explicit SequenceConvOp(const std::string &op_type) : OpLite(op_type) {} + bool CheckShape() const override; + bool InferShape() const override; + bool AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) override; + + void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); } + std::string DebugString() const override { return "sequence_conv"; } + + private: + mutable SequenceConvParam param_; +}; + +} // namespace operators +} // namespace lite +} // namespace paddle diff --git a/lite/tests/kernels/CMakeLists.txt b/lite/tests/kernels/CMakeLists.txt index 3d6eb9eb8ace0dde1fba92af88ab3af20a87a2ed..09a1ce63c6348f173827311e8efcea773a6fb1a3 100644 --- a/lite/tests/kernels/CMakeLists.txt +++ b/lite/tests/kernels/CMakeLists.txt @@ -38,6 +38,7 @@ if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA AND NOT LITE_WITH_BM) AND (LITE_ if(LITE_BUILD_EXTRA) lite_cc_test(test_gru_unit SRCS gru_unit_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_sequence_pool_compute SRCS sequence_pool_compute_test.cc DEPS ${bm_kernels} arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_sequence_conv_compute SRCS sequence_conv_compute_test.cc DEPS ${bm_kernels} arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_reduce_max_compute SRCS reduce_max_compute_test.cc DEPS arena_framework ${bm_kernels} ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_unsqueeze_compute SRCS unsqueeze_compute_test.cc DEPS arena_framework ${bm_kernels} ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_assign_compute SRCS assign_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) @@ -54,6 +55,7 @@ if(LITE_BUILD_EXTRA) lite_cc_test(test_kernel_search_aligned_mat_mul_compute SRCS search_aligned_mat_mul_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_search_seq_fc_compute SRCS search_seq_fc_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_lookup_table_compute SRCS lookup_table_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_lookup_table_dequant_compute SRCS lookup_table_dequant_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_gather_compute SRCS gather_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) endif() lite_cc_test(test_kernel_pad2d_compute SRCS pad2d_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) diff --git a/lite/tests/kernels/lookup_table_dequant_compute_test.cc b/lite/tests/kernels/lookup_table_dequant_compute_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..30f551e8b7e29ef373145bdb3496c1885907531a --- /dev/null +++ b/lite/tests/kernels/lookup_table_dequant_compute_test.cc @@ -0,0 +1,150 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include "lite/api/paddle_use_kernels.h" +#include "lite/api/paddle_use_ops.h" +#include "lite/core/arena/framework.h" +#include "lite/tests/utils/fill_data.h" + +namespace paddle { +namespace lite { + +void dequant(const unsigned char* in, + float* out, + float min, + float max, + int emb_size, + int pow_2_bits) { + float scale = (max - min) / pow_2_bits; + for (int i = 0; i < emb_size; ++i) { + float x = scale * static_cast(in[i]) + min; + out[i] = x; + } +} + +class LookupTableDequantComputeTest : public arena::TestCase { + protected: + // common attributes for this op. + std::string op_type_ = "lookup_table_dequant"; + std::string ids_ = "ids"; + std::string w_ = "w"; + std::string out_ = "out"; + DDim ids_dims_{{2, 1}}; + DDim w_dims_{{8, 4}}; + int64_t padding_idx_ = -1; + + public: + LookupTableDequantComputeTest(const Place& place, + const std::string& alias, + const DDim& ids_dims, + const DDim& w_dims, + int64_t padding_idx) + : TestCase(place, alias), + ids_dims_(ids_dims), + w_dims_(w_dims), + padding_idx_(padding_idx) {} + + void RunBaseline(Scope* scope) override { + auto ids = scope->FindTensor(ids_); + auto w = scope->FindTensor(w_); + auto ids_dims = ids->dims(); + auto w_dims = w->dims(); + + auto out = scope->NewTensor(out_); + CHECK(out); + + int ids_rank = ids_dims.size(); + CHECK_EQ(ids_dims[ids_rank - 1], 1); + CHECK_EQ(w_dims.size(), 2); + + std::vector out_dims; + for (int i = 0; i < ids_rank - 1; ++i) { + out_dims.push_back(ids_dims[i]); + } + out_dims.push_back((w_dims[1] - 2) * 4); + out->Resize(out_dims); + out->set_lod(ids->lod()); + + auto ids_data = ids->data(); + auto ids_size = ids_dims.production(); + auto w_data = w->data(); + auto w_rows = w_dims[0]; + auto quant_number = w_dims[1]; + auto w_cols = (quant_number - 2) * 4; + auto out_data = out->mutable_data(); + int pow_2_bits = static_cast(pow(2, 8)); + + for (int64_t i = 0; i < ids_size; i++) { + auto id = ids_data[i]; + if (padding_idx_ != -1 && id == padding_idx_) { + memset(out_data + i * w_cols, 0, w_cols * sizeof(float)); + } else { + CHECK_LT(id, w_rows) << "lookup_table ids[i] expected < " << w_rows + << " but got " << id; + CHECK_GE(id, 0) << "lookup_table ids[i] expected >= 0 but got " << id; + float min = *(w_data + ids_data[i] * quant_number); + float max = *(w_data + ids_data[i] * quant_number + 1); + int offset = ids_data[i] * quant_number + 2; + const unsigned char* tensor_buf = + reinterpret_cast(w_data + offset); + dequant( + tensor_buf, out_data + i * w_cols, min, max, w_cols, pow_2_bits); + } + } + } + + void PrepareOpDesc(cpp::OpDesc* op_desc) { + op_desc->SetType(op_type_); + op_desc->SetInput("Ids", {ids_}); + op_desc->SetInput("W", {w_}); + op_desc->SetOutput("Out", {out_}); + op_desc->SetAttr("padding_idx", padding_idx_); + } + + void PrepareData() override { + std::vector ids(ids_dims_.production()); + fill_data_rand( + ids.data(), 0, w_dims_[0] - 1, ids_dims_.production()); + + std::vector w(w_dims_.production()); + fill_data_rand(w.data(), -1.f, 1.f, w_dims_.production()); + + SetCommonTensor(ids_, ids_dims_, ids.data()); + SetCommonTensor(w_, w_dims_, w.data()); + } +}; + +TEST(LookupTableDequant, precision) { +#ifdef LITE_WITH_ARM + float abs_error = 2e-5; + Place place = {TARGET(kARM), PRECISION(kAny)}; + for (auto ids_dims : + std::vector>{{5, 2, 3, 1}, {2, 3, 1}, {3, 1}}) { + for (auto w_dims : + std::vector>{{4, 3}, {6, 8}, {12, 15}}) { + for (auto padding_idx : std::vector{-1}) { + std::unique_ptr tester( + new LookupTableDequantComputeTest( + place, "def", DDim(ids_dims), DDim(w_dims), padding_idx)); + arena::Arena arena(std::move(tester), place, abs_error); + arena.TestPrecision(); + } + } + } +#endif +} + +} // namespace lite +} // namespace paddle diff --git a/lite/tests/kernels/sequence_conv_compute_test.cc b/lite/tests/kernels/sequence_conv_compute_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..342e5664f33dba334c7bc934af09fc75b2435a85 --- /dev/null +++ b/lite/tests/kernels/sequence_conv_compute_test.cc @@ -0,0 +1,149 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include "lite/api/paddle_use_kernels.h" +#include "lite/api/paddle_use_ops.h" +#include "lite/core/arena/framework.h" + +namespace paddle { +namespace lite { + +class SequenceConvComputeTester : public arena::TestCase { + public: + SequenceConvComputeTester(const Place& place, + const std::string& alias, + LoD lod, + DDim dims, + const int& contextStart, + const int& contextStride, + const int& contextLength, + const int& kernel_num) + : TestCase(place, alias), + lod_(lod), + dims_(dims), + contextStart_(contextStart), + contextStride_(contextStride), + contextLength_(contextLength), + kernel_num_(kernel_num) {} + + void PrepareOpDesc(cpp::OpDesc* op_desc) { + op_desc->SetType("sequence_conv"); + op_desc->SetInput("X", {input_name_}); + op_desc->SetInput("Filter", {filter_name_}); + op_desc->SetOutput("Out", {output_name_}); + op_desc->SetAttr("contextStart", contextStart_); + op_desc->SetAttr("contextStride", contextStride_); + op_desc->SetAttr("contextLength", contextLength_); + } + + void PrepareData() override { + DDim filter_dims( + std::vector{contextLength_ * dims_[1], kernel_num_}); + + std::vector din(dims_.production()); + for (int i = 0; i < dims_[0]; i++) { + for (int j = 0; j < dims_[1]; j++) { + din[i * dims_[1] + j] = + (2.0 * i + 3.0 * j) / (2.0 * dims_[0] + 3.0 * dims_[1]) - 0.5; + } + } + SetCommonTensor(input_name_, dims_, din.data(), lod_); + + std::vector dfilter(filter_dims.production()); + for (int i = 0; i < filter_dims[0]; i++) { + for (int j = 0; j < filter_dims[1]; j++) { + dfilter[i * filter_dims[1] + j] = + (1.5 * i + 2.0 * j) / + (1.5 * filter_dims[0] + 2.0 * filter_dims[1]) - + 0.5; + } + } + SetCommonTensor(filter_name_, filter_dims, dfilter.data(), lod_); + } + + void RunBaseline(Scope* scope) override { + // calculate res the output in this scope + // to compare with the Paddle-Lite calculated one + + auto* output = scope->NewTensor(output_name_); + CHECK(output); + std::vector output_shape({4, 3}); + output->Resize(DDim(output_shape)); + auto output_dims = output->dims(); + auto output_data = output->mutable_data(); + std::vector> res; + if (contextStart_ == -2) { + res = {{-0.08867277, -0.17257819, -0.2564836}, + {0.194508, 0.05720823, -0.08009153}, + {0.73512584, 0.5749428, 0.41475973}, + {0.5635012, 0.49485126, 0.42620137}}; + } else if (contextStart_ == -1) { + res = {{0.194508, 0.05720823, -0.08009153}, + {0.73512584, 0.5749428, 0.41475973}, + {0.5635012, 0.49485126, 0.42620137}, + {0.2517162, 0.23646072, 0.22120519}}; + } else if (contextStart_ == 0) { + res = {{0.73512584, 0.5749428, 0.41475973}, + {0.5635012, 0.49485126, 0.42620137}, + {0.2517162, 0.23646072, 0.22120519}, + {0.02574372, 0.03337148, 0.04099924}}; + } else { + fprintf(stderr, "not supported contextStart_\n"); + exit(-1); + } + for (int i = 0; i < output_shape[0]; i++) { + for (int j = 0; j < output_shape[1]; j++) { + output_data[i * output_shape[1] + j] = res[i][j]; + } + } + (output->mutable_lod())->push_back(lod_[0]); + } + + protected: + std::string input_name_ = "x"; + std::string filter_name_ = "filter"; + std::string output_name_ = "out"; + LoD lod_; + DDim dims_; + int contextStart_; + int contextStride_; + int contextLength_; + int kernel_num_; +}; + +void TestNormalCase(Place place, float abs_error = 2e-5) { + std::vector> lod{{0, 4}}; + std::vector dims{4, 5}; + std::vector candidate_pad_idx{-2, -1, 0}; + for (int pad_idx : candidate_pad_idx) { + std::unique_ptr tester(new SequenceConvComputeTester( + place, "def", lod, DDim(dims), pad_idx, 1, 3, 3)); + arena::Arena arena(std::move(tester), place, abs_error); + arena.TestPrecision(); + } +} + +TEST(sequence_conv, precision) { +#ifdef LITE_WITH_ARM + float abs_error = 2e-5; + Place place(TARGET(kARM)); + + TestNormalCase(place, abs_error); +#endif +} + +} // namespace lite +} // namespace paddle