From 2148bf49ab108cffec7da5ff428bc2b364e0f52f Mon Sep 17 00:00:00 2001 From: hong19860320 <9973393+hong19860320@users.noreply.github.com> Date: Sat, 16 Nov 2019 16:21:21 +0800 Subject: [PATCH] [LITE][X86] Add search_aligned_mat_mul and search_seq_fc op for X86 (#2428) --- lite/kernels/x86/CMakeLists.txt | 4 + .../x86/search_aligned_mat_mul_compute.cc | 27 +++ .../x86/search_aligned_mat_mul_compute.h | 83 +++++++ lite/kernels/x86/search_seq_fc_compute.cc | 27 +++ lite/kernels/x86/search_seq_fc_compute.h | 73 ++++++ lite/operators/CMakeLists.txt | 3 + lite/operators/op_params.h | 8 + lite/operators/search_aligned_mat_mul_op.cc | 101 ++++++++ lite/operators/search_aligned_mat_mul_op.h | 47 ++++ lite/operators/search_seq_fc_op.cc | 80 +++++++ lite/operators/search_seq_fc_op.h | 47 ++++ lite/tests/kernels/CMakeLists.txt | 2 + .../search_aligned_mat_mul_compute_test.cc | 220 ++++++++++++++++++ .../kernels/search_seq_fc_compute_test.cc | 177 ++++++++++++++ 14 files changed, 899 insertions(+) create mode 100644 lite/kernels/x86/search_aligned_mat_mul_compute.cc create mode 100644 lite/kernels/x86/search_aligned_mat_mul_compute.h create mode 100644 lite/kernels/x86/search_seq_fc_compute.cc create mode 100644 lite/kernels/x86/search_seq_fc_compute.h create mode 100644 lite/operators/search_aligned_mat_mul_op.cc create mode 100644 lite/operators/search_aligned_mat_mul_op.h create mode 100644 lite/operators/search_seq_fc_op.cc create mode 100644 lite/operators/search_seq_fc_op.h create mode 100644 lite/tests/kernels/search_aligned_mat_mul_compute_test.cc create mode 100644 lite/tests/kernels/search_seq_fc_compute_test.cc diff --git a/lite/kernels/x86/CMakeLists.txt b/lite/kernels/x86/CMakeLists.txt index 8bdc2a17c1..552e7ff109 100644 --- a/lite/kernels/x86/CMakeLists.txt +++ b/lite/kernels/x86/CMakeLists.txt @@ -47,6 +47,10 @@ add_kernel(search_grnn_compute_x86 X86 basic SRCS search_grnn_compute.cc DEPS ${ add_kernel(sequence_concat_compute_x86 X86 basic SRCS sequence_concat_compute.cc DEPS ${lite_kernel_deps}) add_kernel(var_conv_2d_compute_x86 X86 basic SRCS var_conv_2d_compute.cc DEPS ${lite_kernel_deps} blas fluid_data_type) +# for content-dnn specific +add_kernel(search_aligned_mat_mul_compute_x86 X86 extra SRCS search_aligned_mat_mul_compute.cc DEPS ${lite_kernel_deps} blas) +add_kernel(search_seq_fc_compute_x86 X86 extra SRCS search_seq_fc_compute.cc DEPS ${lite_kernel_deps} blas) + if(NOT LITE_WITH_X86) return() endif() diff --git a/lite/kernels/x86/search_aligned_mat_mul_compute.cc b/lite/kernels/x86/search_aligned_mat_mul_compute.cc new file mode 100644 index 0000000000..df88ca6867 --- /dev/null +++ b/lite/kernels/x86/search_aligned_mat_mul_compute.cc @@ -0,0 +1,27 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/kernels/x86/search_aligned_mat_mul_compute.h" + +REGISTER_LITE_KERNEL( + search_aligned_mat_mul, + kX86, + kFloat, + kNCHW, + paddle::lite::kernels::x86::SearchAlignedMatMulCompute, + def) + .BindInput("X", {LiteType::GetTensorTy(TARGET(kX86))}) + .BindInput("Y", {LiteType::GetTensorTy(TARGET(kX86))}) + .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86))}) + .Finalize(); diff --git a/lite/kernels/x86/search_aligned_mat_mul_compute.h b/lite/kernels/x86/search_aligned_mat_mul_compute.h new file mode 100644 index 0000000000..ea6b546c2c --- /dev/null +++ b/lite/kernels/x86/search_aligned_mat_mul_compute.h @@ -0,0 +1,83 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include "lite/backends/x86/math/blas.h" +#include "lite/core/kernel.h" +#include "lite/core/op_registry.h" +#include "lite/core/types.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace x86 { + +template +class SearchAlignedMatMulCompute + : public KernelLite { + public: + using param_t = operators::MatMulParam; + + void Run() override { + auto& context = ctx_->As(); + auto& param = *param_.get_mutable(); + + auto x = param.X; + auto y = param.Y; + auto out = param.Out; + bool x_transpose = param.transpose_X; + bool y_transpose = param.transpose_Y; + float alpha = param.alpha; + const auto x_dims = x->dims(); + const auto y_dims = y->dims(); + const auto& x_lod = x->lod(); + const auto& y_lod = y->lod(); + const auto& x_lod_0 = x_lod[0]; + const auto& y_lod_0 = y_lod[0]; + + int seq_num = x_lod_0.size() - 1; + int x_inner_size = x_dims[1]; + int y_inner_size = y_dims[1]; + int x_batch_size = x_lod_0[1]; + int y_batch_size = y_lod_0[1]; + int M = x_transpose ? x_inner_size : x_batch_size; + int N = y_transpose ? y_batch_size : y_inner_size; + int X_K = x_transpose ? x_batch_size : x_inner_size; + int Y_K = y_transpose ? y_inner_size : y_batch_size; + CHECK_EQ(X_K, Y_K) << "K of Input(X) and Input(Y) is not equal"; + int K = X_K; + + lite::x86::math::MatDescriptor mat_dim_a; + mat_dim_a.height_ = M; + mat_dim_a.width_ = K; + mat_dim_a.stride_ = x_batch_size * x_inner_size; + mat_dim_a.batch_size_ = seq_num; + mat_dim_a.trans_ = x_transpose; + lite::x86::math::MatDescriptor mat_dim_b; + mat_dim_b.height_ = K; + mat_dim_b.width_ = N; + mat_dim_b.stride_ = y_batch_size * y_inner_size; + mat_dim_b.batch_size_ = seq_num; + mat_dim_b.trans_ = y_transpose; + auto blas = lite::x86::math::GetBlas(context); + blas.MatMul(*x, mat_dim_a, *y, mat_dim_b, static_cast(alpha), out, T(0)); + } + + virtual ~SearchAlignedMatMulCompute() = default; +}; + +} // namespace x86 +} // namespace kernels +} // namespace lite +} // namespace paddle diff --git a/lite/kernels/x86/search_seq_fc_compute.cc b/lite/kernels/x86/search_seq_fc_compute.cc new file mode 100644 index 0000000000..e0845bd74c --- /dev/null +++ b/lite/kernels/x86/search_seq_fc_compute.cc @@ -0,0 +1,27 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/kernels/x86/search_seq_fc_compute.h" + +REGISTER_LITE_KERNEL(search_seq_fc, + kX86, + kFloat, + kNCHW, + paddle::lite::kernels::x86::SearchSeqFcCompute, + def) + .BindInput("X", {LiteType::GetTensorTy(TARGET(kX86))}) + .BindInput("W", {LiteType::GetTensorTy(TARGET(kX86))}) + .BindInput("b", {LiteType::GetTensorTy(TARGET(kX86))}) + .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86))}) + .Finalize(); diff --git a/lite/kernels/x86/search_seq_fc_compute.h b/lite/kernels/x86/search_seq_fc_compute.h new file mode 100644 index 0000000000..80ef54b30b --- /dev/null +++ b/lite/kernels/x86/search_seq_fc_compute.h @@ -0,0 +1,73 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include "lite/backends/x86/math/blas.h" +#include "lite/core/kernel.h" +#include "lite/core/op_registry.h" +#include "lite/core/types.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace x86 { + +template +class SearchSeqFcCompute : public KernelLite { + public: + using param_t = operators::SearchSeqFcParam; + + void Run() override { + auto& context = ctx_->As(); + auto& param = *param_.get_mutable(); + + auto x = param.x; + auto w = param.w; + auto b = param.b; + auto out = param.out; + auto out_size = param.out_size; + const auto x_dims = x->dims(); + const auto w_dims = w->dims(); + const auto out_dims = out->dims(); + CHECK_EQ(x_dims.size(), 2) << "The Input(X) should be 2-D tensor."; + CHECK_EQ(w_dims.size(), 2) << "W should be 2-D tensor."; + CHECK_EQ(out_dims.size(), 2) << "The Output(Out) should be 2-D tensor."; + CHECK_EQ(x_dims[1], w_dims[1]) << "Wrong shape: x_dims[1] != w_dims[1]"; + CHECK_EQ(w_dims[0], out_size) << "Wrong shape: w_dims[0] != out_size"; + CHECK_EQ(out_dims[0], x_dims[0]) << "Wrong shape: out_dims[0] != x_dims[0]"; + CHECK_EQ(out_dims[1], out_size) << "Wrong shape: out_dims[1] != out_size"; + + auto blas = lite::x86::math::GetBlas(context); + blas.MatMul(*x, false, *w, true, out); + + if (b != nullptr) { + auto b_dims = b->dims(); + CHECK_EQ(b_dims.size(), 1) << "b should be 1-D tensor."; + CHECK_EQ(b_dims[0], w_dims[0]) << "Wrong shape: b_dims[0] != w_dims[0]"; + int M = x_dims[0]; + int N = w_dims[0]; + for (int i = 0; i < M; i++) { + blas.AXPY( + N, static_cast(1), b->data(), out->mutable_data() + i * N); + } + } + } + + virtual ~SearchSeqFcCompute() = default; +}; + +} // namespace x86 +} // namespace kernels +} // namespace lite +} // namespace paddle diff --git a/lite/operators/CMakeLists.txt b/lite/operators/CMakeLists.txt index 39eac353af..fff000eb7a 100644 --- a/lite/operators/CMakeLists.txt +++ b/lite/operators/CMakeLists.txt @@ -114,6 +114,9 @@ add_operator(increment_op extra SRCS increment_op.cc DEPS ${op_DEPS}) add_operator(layer_norm_op extra SRCS layer_norm_op.cc DEPS ${op_DEPS}) add_operator(sequence_softmax_op extra SRCS sequence_softmax_op.cc DEPS ${op_DEPS}) +# for content-dnn specific +add_operator(search_aligned_mat_mul_op extra SRCS search_aligned_mat_mul_op.cc DEPS ${op_DEPS}) +add_operator(search_seq_fc_op extra SRCS search_seq_fc_op.cc DEPS ${op_DEPS}) if (NOT LITE_WITH_X86) lite_cc_test(test_fc_op SRCS fc_op_test.cc diff --git a/lite/operators/op_params.h b/lite/operators/op_params.h index 360bcbecaf..f809700350 100644 --- a/lite/operators/op_params.h +++ b/lite/operators/op_params.h @@ -89,6 +89,14 @@ struct FcParam { WITH_INT8_CONFIG }; +struct SearchSeqFcParam { + lite::Tensor* x{nullptr}; + lite::Tensor* w{nullptr}; + lite::Tensor* b{nullptr}; + lite::Tensor* out{nullptr}; + int out_size; +}; + // For Interpolate Op struct InterpolateParam { lite::Tensor* X{}; diff --git a/lite/operators/search_aligned_mat_mul_op.cc b/lite/operators/search_aligned_mat_mul_op.cc new file mode 100644 index 0000000000..43a276e3c7 --- /dev/null +++ b/lite/operators/search_aligned_mat_mul_op.cc @@ -0,0 +1,101 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/operators/search_aligned_mat_mul_op.h" +#include "lite/core/op_registry.h" + +namespace paddle { +namespace lite { +namespace operators { + +bool SearchAlignedMatMulOpLite::CheckShape() const { + CHECK_OR_FALSE(param_.X); + CHECK_OR_FALSE(param_.Y); + CHECK_OR_FALSE(param_.Out); + + return true; +} + +bool SearchAlignedMatMulOpLite::InferShape() const { + const auto x_dims = param_.X->dims(); + const auto y_dims = param_.Y->dims(); + const auto& x_lod = param_.X->lod(); + const auto& y_lod = param_.Y->lod(); + bool x_transpose = param_.transpose_X; + bool y_transpose = param_.transpose_Y; + + CHECK_EQ(x_dims.size(), 2) << "X should be 2-D tensor"; + CHECK_EQ(y_dims.size(), 2) << "Y should be 2-D tensor"; + CHECK(!x_lod.empty()) << "The Input(X) must hold lod info."; + CHECK(!y_lod.empty()) << "The Input(Y) must hold lod info."; + + const auto& x_lod_0 = x_lod[0]; + const auto& y_lod_0 = y_lod[0]; + CHECK_GE(x_lod_0.size(), 2) << "The Input(X)'s lod info is corrupted."; + CHECK_GE(y_lod_0.size(), 2) << "The Input(Y)'s lod info is corrupted."; + CHECK_EQ(x_dims[0], static_cast(x_lod_0.back())) + << "The Input(X)'s lod info mismatches the actual tensor shape."; + CHECK_EQ(y_dims[0], static_cast(y_lod_0.back())) + << "The Input(Y)'s lod info mismatches the actual tensor shape."; + CHECK_EQ(x_lod_0.size(), y_lod_0.size()) + << "The Length of X and Y must be equal."; + + int seq_num = x_lod_0.size() - 1; + int x_inner_size = x_dims[1]; + int y_inner_size = y_dims[1]; + int x_batch_size = x_lod_0[1]; + int y_batch_size = y_lod_0[1]; + int M = x_transpose ? x_inner_size : x_batch_size; + int N = y_transpose ? y_batch_size : y_inner_size; + int X_K = x_transpose ? x_batch_size : x_inner_size; + int Y_K = y_transpose ? y_inner_size : y_batch_size; + CHECK_EQ(X_K, Y_K) << "K of Input(X) and Input(Y) is not equal"; + + LoD out_lod; + std::vector out_lod_0(seq_num + 1); + out_lod_0[0] = 0; + for (int i = 0; i < seq_num; i++) { + out_lod_0[i + 1] = out_lod_0[i] + M; + } + out_lod.push_back(out_lod_0); + DDim out_dims( + {static_cast(out_lod_0.back()), static_cast(N)}); + param_.Out->set_lod(out_lod); + param_.Out->Resize(out_dims); + return true; +} + +bool SearchAlignedMatMulOpLite::AttachImpl(const cpp::OpDesc& op_desc, + lite::Scope* scope) { + CHECK(!op_desc.Input("X").empty()); + CHECK(!op_desc.Input("Y").empty()); + CHECK(!op_desc.Output("Out").empty()); + auto X = op_desc.Input("X").front(); + auto Y = op_desc.Input("Y").front(); + auto Out = op_desc.Output("Out").front(); + param_.X = GetVar(scope, X); + param_.Y = GetVar(scope, Y); + param_.Out = GetMutableVar(scope, Out); + param_.transpose_X = op_desc.GetAttr("transpose_X"); + param_.transpose_Y = op_desc.GetAttr("transpose_Y"); + param_.alpha = op_desc.GetAttr("alpha"); + return true; +} + +} // namespace operators +} // namespace lite +} // namespace paddle + +REGISTER_LITE_OP(search_aligned_mat_mul, + paddle::lite::operators::SearchAlignedMatMulOpLite); diff --git a/lite/operators/search_aligned_mat_mul_op.h b/lite/operators/search_aligned_mat_mul_op.h new file mode 100644 index 0000000000..7321b7e9d1 --- /dev/null +++ b/lite/operators/search_aligned_mat_mul_op.h @@ -0,0 +1,47 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include +#include "lite/core/op_lite.h" +#include "lite/core/scope.h" +#include "lite/utils/all.h" + +namespace paddle { +namespace lite { +namespace operators { + +class SearchAlignedMatMulOpLite : public OpLite { + public: + SearchAlignedMatMulOpLite() {} + + explicit SearchAlignedMatMulOpLite(const std::string &type) : OpLite(type) {} + + bool CheckShape() const override; + + bool InferShape() const override; + + void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); } + + bool AttachImpl(const cpp::OpDesc &op_desc, lite::Scope *scope) override; + std::string DebugString() const override { return "search_aligned_mat_mul"; } + + private: + mutable MatMulParam param_; +}; + +} // namespace operators +} // namespace lite +} // namespace paddle diff --git a/lite/operators/search_seq_fc_op.cc b/lite/operators/search_seq_fc_op.cc new file mode 100644 index 0000000000..c5cca5331a --- /dev/null +++ b/lite/operators/search_seq_fc_op.cc @@ -0,0 +1,80 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/operators/search_seq_fc_op.h" +#include "lite/core/op_registry.h" + +namespace paddle { +namespace lite { +namespace operators { + +bool SearchSeqFcOpLite::CheckShape() const { + CHECK_OR_FALSE(param_.x); + CHECK_OR_FALSE(param_.w); + CHECK_OR_FALSE(param_.out); + return true; +} + +bool SearchSeqFcOpLite::InferShape() const { + const auto x_dims = param_.x->dims(); + const auto w_dims = param_.w->dims(); + const auto& x_lod = param_.x->lod(); + auto out_size = param_.out_size; + CHECK_EQ(x_dims.size(), 2) << "The Input(X) should be 2-D tensor."; + CHECK(!x_lod.empty()) << "The Input(X) must hold lod info."; + const auto& x_lod_0 = x_lod[0]; + CHECK_GE(x_lod_0.size(), 2) << "The Input(X)'s lod info is corrupted."; + CHECK_EQ(x_dims[0], static_cast(x_lod_0.back())) + << "The Input(X)'s lod info mismatches the actual tensor shape."; + CHECK_EQ(w_dims.size(), 2) << "W should be 2-D tensor."; + CHECK_EQ(x_dims[1], w_dims[1]) << "Wrong shape: x_dims[1] != w_dims[1]"; + CHECK_EQ(w_dims[0], out_size) << "Wrong shape: w_dims[0] != out_size"; + + if (param_.b != nullptr) { + const auto b_dims = param_.b->dims(); + CHECK_EQ(b_dims.size(), 1) << "b should be 1-D tensor."; + CHECK_EQ(b_dims[0], w_dims[0]) << "Wrong shape: b_dims[0] != w_dims[0]"; + } + + param_.out->set_lod(x_lod); + param_.out->Resize({x_dims[0], w_dims[0]}); + return true; +} + +bool SearchSeqFcOpLite::AttachImpl(const cpp::OpDesc& op_desc, + lite::Scope* scope) { + CHECK(!op_desc.Input("X").empty()); + CHECK(!op_desc.Input("W").empty()); + CHECK(!op_desc.Output("Out").empty()); + auto x = op_desc.Input("X").front(); + auto w = op_desc.Input("W").front(); + auto out = op_desc.Output("Out").front(); + param_.x = scope->FindVar(x)->GetMutable(); + param_.w = scope->FindVar(w)->GetMutable(); + param_.out = scope->FindVar(out)->GetMutable(); + param_.out_size = op_desc.GetAttr("out_size"); + bool has_bias = op_desc.GetAttr("has_bias"); + if (has_bias) { + CHECK(!op_desc.Input("b").empty()); + auto b = op_desc.Input("b").front(); + param_.b = scope->FindVar(b)->GetMutable(); + } + return true; +} + +} // namespace operators +} // namespace lite +} // namespace paddle + +REGISTER_LITE_OP(search_seq_fc, paddle::lite::operators::SearchSeqFcOpLite); diff --git a/lite/operators/search_seq_fc_op.h b/lite/operators/search_seq_fc_op.h new file mode 100644 index 0000000000..3c4f7d82bf --- /dev/null +++ b/lite/operators/search_seq_fc_op.h @@ -0,0 +1,47 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include +#include "lite/core/op_lite.h" +#include "lite/core/scope.h" +#include "lite/utils/all.h" + +namespace paddle { +namespace lite { +namespace operators { + +class SearchSeqFcOpLite : public OpLite { + public: + SearchSeqFcOpLite() {} + + explicit SearchSeqFcOpLite(const std::string &type) : OpLite(type) {} + + bool CheckShape() const override; + + bool InferShape() const override; + + void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); } + + bool AttachImpl(const cpp::OpDesc &op_desc, lite::Scope *scope) override; + std::string DebugString() const override { return "search_seq_fc"; } + + private: + mutable SearchSeqFcParam param_; +}; + +} // namespace operators +} // namespace lite +} // namespace paddle diff --git a/lite/tests/kernels/CMakeLists.txt b/lite/tests/kernels/CMakeLists.txt index 02d40ce6cc..549fabab5a 100644 --- a/lite/tests/kernels/CMakeLists.txt +++ b/lite/tests/kernels/CMakeLists.txt @@ -39,6 +39,8 @@ if(LITE_BUILD_EXTRA) lite_cc_test(test_kernel_anchor_generator_compute SRCS anchor_generator_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) #lite_cc_test(test_kernel_generate_proposals_compute SRCS generate_proposals_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) #lite_cc_test(test_kernel_roi_align_compute SRCS roi_align_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_search_aligned_mat_mul_compute SRCS search_aligned_mat_mul_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_search_seq_fc_compute SRCS search_seq_fc_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) endif() lite_cc_test(test_kernel_pad2d_compute SRCS pad2d_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_prior_box_compute SRCS prior_box_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) diff --git a/lite/tests/kernels/search_aligned_mat_mul_compute_test.cc b/lite/tests/kernels/search_aligned_mat_mul_compute_test.cc new file mode 100644 index 0000000000..cb824931ae --- /dev/null +++ b/lite/tests/kernels/search_aligned_mat_mul_compute_test.cc @@ -0,0 +1,220 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include "lite/api/paddle_use_kernels.h" +#include "lite/api/paddle_use_ops.h" +#include "lite/core/arena/framework.h" +#include "lite/tests/utils/fill_data.h" +#include "lite/tests/utils/naive_math_impl.h" + +namespace paddle { +namespace lite { + +class SearchAlignedMatMulComputeTester : public arena::TestCase { + protected: + // common attributes for this op. + std::string x_ = "X"; + std::string y_ = "Y"; + bool x_transpose_; + bool y_transpose_; + float alpha_; + std::string out_ = "Out"; + DDim x_dims_; + DDim y_dims_; + LoD x_lod_; + LoD y_lod_; + + public: + SearchAlignedMatMulComputeTester(const Place& place, + const std::string& alias, + bool x_transpose, + bool y_transpose, + float alpha, + const DDim& x_dims, + const DDim& y_dims, + const LoD& x_lod, + const LoD& y_lod) + : TestCase(place, alias), + x_transpose_(x_transpose), + y_transpose_(y_transpose), + alpha_(alpha), + x_dims_(x_dims), + y_dims_(y_dims), + x_lod_(x_lod), + y_lod_(y_lod) {} + + void RunBaseline(Scope* scope) override { + auto x = scope->FindTensor(x_); + auto y = scope->FindTensor(y_); + CHECK(x); + CHECK(y); + const auto x_data = x->data(); + const auto y_data = y->data(); + auto out = scope->NewTensor(out_); + CHECK(out); + + const auto x_dims = x->dims(); + const auto y_dims = y->dims(); + const auto& x_lod = x->lod(); + const auto& y_lod = y->lod(); + const auto& x_lod_0 = x_lod[0]; + const auto& y_lod_0 = y_lod[0]; + + int seq_num = x_lod_0.size() - 1; + int x_inner_size = x_dims[1]; + int y_inner_size = y_dims[1]; + int x_batch_size = x_lod_0[1]; + int y_batch_size = y_lod_0[1]; + int M = x_transpose_ ? x_inner_size : x_batch_size; + int N = y_transpose_ ? y_batch_size : y_inner_size; + int X_K = x_transpose_ ? x_batch_size : x_inner_size; + int Y_K = y_transpose_ ? y_inner_size : y_batch_size; + CHECK_EQ(X_K, Y_K) << "K of Input(X) and Input(Y) is not equal"; + int K = X_K; + int x_stride = x_batch_size * x_inner_size; + int y_stride = y_batch_size * y_inner_size; + int out_stride = M * N; + int lda = x_transpose_ ? M : K; + int ldb = y_transpose_ ? K : N; + int ldc = N; + + LoD out_lod; + std::vector out_lod_0(seq_num + 1); + out_lod_0[0] = 0; + for (int i = 0; i < seq_num; i++) { + out_lod_0[i + 1] = out_lod_0[i] + M; + } + out_lod.push_back(out_lod_0); + DDim out_dims( + {static_cast(out_lod_0.back()), static_cast(N)}); + out->set_lod(out_lod); + out->Resize(out_dims); + + auto out_data = out->mutable_data(); + for (int i = 0; i < seq_num; i++) { + basic_gemm(x_transpose_, + y_transpose_, + M, + N, + K, + alpha_, + x_data + i * x_stride, + lda, + y_data + i * y_stride, + ldb, + 0, + out_data + i * out_stride, + ldc, + nullptr, + false, + false); + } + } + + void PrepareOpDesc(cpp::OpDesc* op_desc) { + op_desc->SetType("search_aligned_mat_mul"); + op_desc->SetInput("X", {x_}); + op_desc->SetInput("Y", {y_}); + op_desc->SetOutput("Out", {out_}); + op_desc->SetAttr("transpose_X", x_transpose_); + op_desc->SetAttr("transpose_Y", y_transpose_); + op_desc->SetAttr("alpha", alpha_); + } + + void PrepareData() override { + std::vector x_data(x_dims_.production()); + std::vector y_data(y_dims_.production()); + fill_data_rand(x_data.data(), -1.f, 1.f, x_dims_.production()); + fill_data_rand(y_data.data(), -1.f, 1.f, y_dims_.production()); + SetCommonTensor(x_, x_dims_, x_data.data(), x_lod_); + SetCommonTensor(y_, y_dims_, y_data.data(), y_lod_); + } +}; + +void test_search_aligned_mat_mul(Place place) { + for (int seq_num : {1, 2}) { + for (int x_batch_size : {1, 3}) { + for (int x_inner_size : {1, 5}) { + for (int out_inner_size : {1, 4}) { + for (bool x_transpose : {true, false}) { + for (bool y_transpose : {true, false}) { + for (float alpha : {1., 2.}) { + // infer x_dims and y_dims + int y_batch_size; + int y_inner_size; + if (x_transpose) { + if (y_transpose) { + y_batch_size = out_inner_size; + y_inner_size = x_batch_size; + } else { + y_batch_size = x_batch_size; + y_inner_size = out_inner_size; + } + } else { + if (y_transpose) { + y_batch_size = out_inner_size; + y_inner_size = x_inner_size; + } else { + y_batch_size = x_inner_size; + y_inner_size = out_inner_size; + } + } + std::vector x_lod_0(seq_num + 1); + std::vector y_lod_0(seq_num + 1); + x_lod_0[0] = 0; + y_lod_0[0] = 0; + for (int i = 0; i < seq_num; i++) { + x_lod_0[i + 1] = x_lod_0[i] + x_batch_size; + y_lod_0[i + 1] = y_lod_0[i] + y_batch_size; + } + LoD x_lod; + LoD y_lod; + x_lod.push_back(x_lod_0); + y_lod.push_back(y_lod_0); + DDim x_dims({static_cast(x_lod_0.back()), + static_cast(x_inner_size)}); + DDim y_dims({static_cast(y_lod_0.back()), + static_cast(y_inner_size)}); + + std::unique_ptr tester( + new SearchAlignedMatMulComputeTester(place, + "def", + x_transpose, + y_transpose, + alpha, + x_dims, + y_dims, + x_lod, + y_lod)); + arena::Arena arena(std::move(tester), place, 5e-4); + arena.TestPrecision(); + } + } + } + } + } + } + } +} + +TEST(SearchAlignedMatMul, precision) { +#ifdef LITE_WITH_X86 + Place place(TARGET(kX86)); + test_search_aligned_mat_mul(place); +#endif +} + +} // namespace lite +} // namespace paddle diff --git a/lite/tests/kernels/search_seq_fc_compute_test.cc b/lite/tests/kernels/search_seq_fc_compute_test.cc new file mode 100644 index 0000000000..988d3a27cc --- /dev/null +++ b/lite/tests/kernels/search_seq_fc_compute_test.cc @@ -0,0 +1,177 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include "lite/api/paddle_use_kernels.h" +#include "lite/api/paddle_use_ops.h" +#include "lite/core/arena/framework.h" +#include "lite/tests/utils/fill_data.h" +#include "lite/tests/utils/naive_math_impl.h" + +namespace paddle { +namespace lite { + +class SearchSeqFcOPTest : public arena::TestCase { + protected: + // common attributes for this op. + std::string x_ = "x"; + std::string w_ = "w"; + std::string b_ = "b"; + std::string out_ = "out"; + DDim x_dims_; + DDim w_dims_; + DDim b_dims_; + LoD x_lod_; + bool has_bias_; + int out_size_; + + public: + SearchSeqFcOPTest(const Place& place, + const std::string& alias, + DDim x_dims, + DDim w_dims, + DDim b_dims, + LoD x_lod, + bool has_bias, + int out_size) + : TestCase(place, alias), + x_dims_(x_dims), + w_dims_(w_dims), + b_dims_(b_dims), + x_lod_(x_lod), + has_bias_(has_bias), + out_size_(out_size) {} + + void RunBaseline(Scope* scope) override { + auto x = scope->FindTensor(x_); + auto w = scope->FindTensor(w_); + CHECK(x); + CHECK(w); + auto out = scope->NewTensor(out_); + CHECK(out); + + const auto x_data = x->data(); + const auto w_data = w->data(); + const auto x_dims = x->dims(); + const auto w_dims = w->dims(); + const auto& x_lod = x->lod(); + CHECK_EQ(x_dims.size(), 2) << "The Input(X) should be 2-D tensor."; + CHECK(!x_lod.empty()) << "The Input(X) must hold lod info."; + const auto& x_lod_0 = x_lod[0]; + CHECK_GE(x_lod_0.size(), 2) << "The Input(X)'s lod info is corrupted."; + CHECK_EQ(x_dims[0], static_cast(x_lod_0.back())) + << "The Input(X)'s lod info mismatches the actual tensor shape."; + CHECK_EQ(w_dims.size(), 2) << "W should be 2-D tensor."; + CHECK_EQ(x_dims[1], w_dims[1]) << "Wrong shape: x_dims[1] != w_dims[1]"; + CHECK_EQ(w_dims[0], out_size_) << "Wrong shape: w_dims[0] != out_size"; + + const float* b_data = nullptr; + if (has_bias_) { + auto b = scope->FindTensor(b_); + CHECK(b); + auto b_dims = b->dims(); + CHECK_EQ(b_dims.size(), 1) << "b should be 1-D tensor."; + CHECK_EQ(b_dims[0], w_dims[0]) << "Wrong shape: b_dims[0] != w_dims[0]"; + b_data = b->data(); + } + + out->set_lod(x_lod); + out->Resize({x_dims[0], w_dims[0]}); + + int M = x_dims[0]; + int K = x_dims[1]; + int N = w_dims[0]; + auto out_data = out->mutable_data(); + basic_gemm(false, + true, + M, + N, + K, + 1.f, + x_data, + K, + w_data, + K, + 0, + out_data, + N, + nullptr, + false, + false); + if (b_data != nullptr) { + for (int i = 0; i < M; i++) { + for (int j = 0; j < N; j++) { + out_data[i * N + j] += b_data[j]; + } + } + } + } + + void PrepareOpDesc(cpp::OpDesc* op_desc) { + op_desc->SetType("search_seq_fc"); + op_desc->SetInput("X", {x_}); + op_desc->SetInput("W", {w_}); + if (has_bias_) { + op_desc->SetInput("b", {b_}); + } + op_desc->SetAttr("has_bias", has_bias_); + op_desc->SetAttr("out_size", out_size_); + op_desc->SetOutput("Out", {out_}); + } + + void PrepareData() override { + std::vector x_data(x_dims_.production()); + std::vector w_data(w_dims_.production()); + fill_data_rand(x_data.data(), -1.f, 1.f, x_dims_.production()); + fill_data_rand(w_data.data(), -1.f, 1.f, w_dims_.production()); + SetCommonTensor(x_, x_dims_, x_data.data(), x_lod_); + SetCommonTensor(w_, w_dims_, w_data.data()); + if (has_bias_) { + std::vector b_data(b_dims_.production()); + fill_data_rand(b_data.data(), -1.f, 1.f, b_dims_.production()); + SetCommonTensor(b_, b_dims_, b_data.data()); + } + } +}; + +void test_search_seq_fc(Place place) { + for (auto x_lod_0 : {std::vector({0, 1, 3}), + std::vector({0, 3, 4, 5})}) { + for (auto feature_size : {2, 9}) { + for (auto out_size : {3, 5}) { + for (auto has_bias : {true, false}) { + DDim x_dims({static_cast(x_lod_0.back()), feature_size}); + DDim w_dims({out_size, feature_size}); + DDim b_dims({has_bias ? out_size : 0}); + LoD x_lod; + x_lod.push_back(x_lod_0); + std::unique_ptr tester(new SearchSeqFcOPTest( + place, "def", x_dims, w_dims, b_dims, x_lod, has_bias, out_size)); + arena::Arena arena(std::move(tester), place, 6e-5); + arena.TestPrecision(); + } + } + } + } +} + +TEST(SearchSeqFcOP, precision) { +#ifdef LITE_WITH_X86 + Place place(TARGET(kX86)); + test_search_seq_fc(place); +#endif +} + +} // namespace lite +} // namespace paddle -- GitLab