diff --git a/paddle/fluid/lite/kernels/arm/fc_compute.cc b/paddle/fluid/lite/kernels/arm/fc_compute.cc index b26551e0533a5ae68c930cc1b9512ba0ca13253a..dcf3e4d81e6e0889b82b4238693fcf53dcf81bc7 100644 --- a/paddle/fluid/lite/kernels/arm/fc_compute.cc +++ b/paddle/fluid/lite/kernels/arm/fc_compute.cc @@ -22,6 +22,10 @@ namespace lite { namespace kernels { namespace arm { +void FcCompute::PrepareForRun() { + // TODO(TJ): transpose weight +} + void FcCompute::Run() { auto& param = this->Param(); auto x_dims = param.input->dims(); @@ -54,9 +58,8 @@ void FcCompute::Run() { lite::arm::math::fill_bias_fc(o_data, b_data, x_h, n); } } else { - // use sgemmv - // sgemv((const float*)weights, (const float*)din, (float*)dout, - // false, n, x_w, _param->_flag_bias, (float*)bias, false); + lite::arm::math::sgemv(w_data, i_data, o_data, false, n, x_w, + b_data != nullptr, b_data, false); } } diff --git a/paddle/fluid/lite/kernels/arm/fc_compute.h b/paddle/fluid/lite/kernels/arm/fc_compute.h index 414517843354f638ed37f54ef596dc6db53193ce..b72b24b4844cab777a7703c2a2ad2577d2c90e19 100644 --- a/paddle/fluid/lite/kernels/arm/fc_compute.h +++ b/paddle/fluid/lite/kernels/arm/fc_compute.h @@ -25,6 +25,8 @@ class FcCompute : public KernelLite { public: using param_t = operators::FcParam; + void PrepareForRun() override; + void Run() override; TargetType target() const override; diff --git a/paddle/fluid/lite/kernels/arm/mul_compute.h b/paddle/fluid/lite/kernels/arm/mul_compute.h new file mode 100644 index 0000000000000000000000000000000000000000..4d1abba94c277f56065ca61c617eefffbf173785 --- /dev/null +++ b/paddle/fluid/lite/kernels/arm/mul_compute.h @@ -0,0 +1,77 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "paddle/fluid/lite/core/kernel.h" +#include "paddle/fluid/lite/core/op_registry.h" +#include "paddle/fluid/lite/core/types.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace arm { + +template +void mul_compute_eigen(const T* x, int x_h, int x_w, const T* y, int y_h, + int y_w, T* out) { + using matrix_t = + Eigen::Matrix; + + Eigen::Map X(x, x_h, x_w); + Eigen::Map Y(y, y_h, y_w); + Eigen::Map Out(out, x_h, y_w); + + Out = X * Y; +} + +class MulCompute : public KernelLite { + public: + using param_t = operators::MulParam; + + void Run() override { + auto& param = Param(); + core::dim2 x_shape( + {static_cast( + param.x->dims().Slice(0, param.x_num_col_dims).production()), + static_cast( + param.x->dims() + .Slice(param.x_num_col_dims, param.x->dims().size()) + .production())}); + core::dim2 y_shape( + {static_cast( + param.y->dims().Slice(0, param.y_num_col_dims).production()), + static_cast( + param.y->dims() + .Slice(param.y_num_col_dims, param.y->dims().size()) + .production())}); + + mul_compute_eigen(param.x->data(), x_shape.x, x_shape.y, // + param.y->data(), y_shape.x, y_shape.y, // + param.output->mutable_data()); + } + + virtual ~MulCompute() = default; +}; + +} // namespace arm +} // namespace kernels +} // namespace lite +} // namespace paddle + +REGISTER_LITE_KERNEL(mul, kARM, kFloat, kNCHW, + paddle::lite::kernels::arm::MulCompute, def) + .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))}) + .BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM))}) + .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))}) + .Finalize(); diff --git a/paddle/fluid/lite/kernels/arm/mul_compute_test.cc b/paddle/fluid/lite/kernels/arm/mul_compute_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..ee7c1b655faa4b16795b7cd9a2a1fcb13f86ecb9 --- /dev/null +++ b/paddle/fluid/lite/kernels/arm/mul_compute_test.cc @@ -0,0 +1,149 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include "paddle/fluid/lite/arm/math/funcs.h" +#include "paddle/fluid/lite/core/op_registry.h" +#include "paddle/fluid/lite/kernels/arm/fc_compute.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace arm { + +TEST(fc_arm, retrive_op) { + auto fc = + KernelRegistry::Global().Create("fc"); + ASSERT_FALSE(fc.empty()); + ASSERT_TRUE(fc.front()); +} + +TEST(fc_arm, init) { + FcCompute fc; + ASSERT_EQ(fc.precision(), PRECISION(kFloat)); + ASSERT_EQ(fc.target(), TARGET(kARM)); +} + +TEST(fc_arm, compare_test) { + lite::Tensor x, w, b, out, ref; + constexpr int batch_size = 2; + x.Resize({batch_size, 3}); + w.Resize({3, 4}); + b.Resize({1, 4}); + out.Resize({batch_size, 4}); + ref.Resize({batch_size, 4}); + + auto x_data = x.mutable_data(); + auto w_data = w.mutable_data(); + auto b_data = b.mutable_data(); + auto out_data = out.mutable_data(); + auto ref_data = ref.mutable_data(); + + for (int64_t i = 0; i < x.dims().product(); i++) { + x_data[i] = static_cast(i); + } + for (int64_t i = 0; i < w.dims().product(); i++) { + w_data[i] = static_cast(i); + } + for (int64_t i = 0; i < b.dims().product(); i++) { + b_data[i] = static_cast(i); + } + + lite::arm::math::fc_compute_eigen(x_data, batch_size, 3, // + w_data, 3, 4, // + b_data, ref_data); + + // fc compute kernel + FcCompute fc; + operators::FcParam param; + + param.in_num_col_dims = 1; + param.input = &x; + param.w = &w; + param.bias = &b; + param.output = &out; + param.in_mat_dims = x.dims(); + + DeviceInfo::Init(); + std::unique_ptr ctx(new KernelContext); + ctx->As(); + fc.SetParam(param); + fc.SetContext(std::move(ctx)); + fc.Run(); + + VLOG(3) << "output vs ref"; + for (int i = 0; i < out.dims().product(); i++) { + VLOG(3) << out_data[i] << " vs " << ref_data[i]; + } + + for (int i = 0; i < out.dims().product(); ++i) { + EXPECT_NEAR(out_data[i], ref_data[i], 1e-5); + } +} + +TEST(fc_arm, num_col_dims) { + FcCompute fc; + operators::FcParam param; + + lite::Tensor x; + lite::Tensor w; + lite::Tensor bias; + lite::Tensor output; + + x.Resize({1, 2, 3}); + w.Resize({3, 4}); + bias.Resize({1, 4}); + output.Resize({2, 4}); + + auto* x_data = x.mutable_data(); + auto* w_data = w.mutable_data(); + auto* bias_data = bias.mutable_data(); + auto* output_data = output.mutable_data(); + + for (int64_t i = 0; i < x.dims().product(); i++) { + x_data[i] = static_cast(i); + } + for (int64_t i = 0; i < w.dims().product(); i++) { + w_data[i] = static_cast(i); + } + for (int64_t i = 0; i < bias.dims().product(); i++) { + bias_data[i] = static_cast(i); + } + for (int64_t i = 0; i < output.dims().product(); i++) { + output_data[i] = static_cast(i); + } + + param.in_num_col_dims = 2; + param.input = &x; + param.w = &w; + param.bias = &bias; + param.output = &output; + param.in_mat_dims = x.dims(); + + std::unique_ptr ctx(new KernelContext); + ctx->As(); + DeviceInfo::Init(); + + fc.SetParam(param); + fc.SetContext(std::move(ctx)); + fc.Run(); +} + +} // namespace arm +} // namespace kernels +} // namespace lite +} // namespace paddle + +USE_LITE_KERNEL(fc, kARM, kFloat, kNCHW, def); diff --git a/paddle/fluid/lite/operators/op_params.h b/paddle/fluid/lite/operators/op_params.h index cd87a9d2d39e7a09392baa59b5a6eb19e8414015..528160c1fb07b1c455c39d20195a3940408a9af2 100644 --- a/paddle/fluid/lite/operators/op_params.h +++ b/paddle/fluid/lite/operators/op_params.h @@ -57,6 +57,7 @@ struct FcParam { lite::Tensor* output{}; lite::DDim in_mat_dims; int in_num_col_dims{1}; + bool weight_transposed{false}; }; struct ReluParam {