提交 c341bc41 编写于 作者: N nhzlx

Merge branch 'incubate/lite' of http://10.87.145.36/inference/paddlelite into xzl/incubate/lite

...@@ -23,10 +23,6 @@ namespace kernels { ...@@ -23,10 +23,6 @@ namespace kernels {
namespace arm { namespace arm {
void FcCompute::PrepareForRun() { void FcCompute::PrepareForRun() {
// TODO(TJ): transpose weight
}
void FcCompute::Run() {
auto& param = this->Param<operators::FcParam>(); auto& param = this->Param<operators::FcParam>();
auto x_dims = param.input->dims(); auto x_dims = param.input->dims();
auto w_dims = param.w->dims(); auto w_dims = param.w->dims();
...@@ -35,29 +31,52 @@ void FcCompute::Run() { ...@@ -35,29 +31,52 @@ void FcCompute::Run() {
CHECK_EQ(w_dims.size(), 2UL); CHECK_EQ(w_dims.size(), 2UL);
CHECK_EQ(param.output->dims().size(), 2UL); CHECK_EQ(param.output->dims().size(), 2UL);
m_ = x_dims.Slice(0, param.in_num_col_dims).production();
k_ = x_dims.Slice(param.in_num_col_dims, x_dims.size()).production();
n_ = w_dims[1];
CHECK_EQ(k_, static_cast<int>(w_dims[0]));
if (m_ == 1) {
if (!transed_weight_) {
transed_weight_ = new Tensor;
}
transed_weight_->Resize({n_, k_});
const auto* w_data = param.w->data<float>();
auto* t_data = transed_weight_->mutable_data<float>();
int i = 0;
for (int nn = 0; nn < n_; ++nn) {
for (int kk = 0; kk < k_; ++kk) {
t_data[i++] = w_data[kk * n_ + nn];
}
}
}
}
void FcCompute::Run() {
auto& param = this->Param<operators::FcParam>();
const auto* i_data = param.input->data<float>(); const auto* i_data = param.input->data<float>();
const auto* w_data = param.w->data<float>(); const auto* w_data = param.w->data<float>();
const auto* b_data = param.bias ? param.bias->data<float>() : nullptr; const auto* b_data = param.bias ? param.bias->data<float>() : nullptr;
auto* o_data = param.output->mutable_data<float>(); auto* o_data = param.output->mutable_data<float>();
int x_h = x_dims.Slice(0, param.in_num_col_dims).production();
int x_w = x_dims.Slice(param.in_num_col_dims, x_dims.size()).production();
int n = w_dims[1];
CHECK_EQ(x_w, static_cast<int>(w_dims[0]));
auto& ctx = this->ctx_->template As<ARMContext>(); auto& ctx = this->ctx_->template As<ARMContext>();
if (x_h > 1) { if (m_ > 1) {
float* packed_in = static_cast<float*>(ctx.workspace_data<float>()) + float* packed_in = static_cast<float*>(ctx.workspace_data<float>()) +
ctx.l2_cache_size() / sizeof(float); ctx.l2_cache_size() / sizeof(float);
lite::arm::math::prepackA(packed_in, i_data, x_w, 0, x_h, 0, x_w, false, lite::arm::math::prepackA(packed_in, i_data, k_, 0, m_, 0, k_, false, &ctx);
&ctx); lite::arm::math::sgemm_prepack(packed_in, w_data, b_data, o_data, m_, n_,
lite::arm::math::sgemm_prepack(packed_in, w_data, b_data, o_data, x_h, n, k_, false, false, false, &ctx);
x_w, false, false, false, &ctx);
if (param.bias) { if (param.bias) {
CHECK_EQ(param.bias->numel(), n); CHECK_EQ(param.bias->numel(), n_);
lite::arm::math::fill_bias_fc(o_data, b_data, x_h, n); lite::arm::math::fill_bias_fc(o_data, b_data, m_, n_);
} }
} else { } else {
lite::arm::math::sgemv(w_data, i_data, o_data, false, n, x_w, CHECK(transed_weight_);
const auto* t_data = transed_weight_->data<float>();
lite::arm::math::sgemv(t_data, i_data, o_data, false, n_, k_,
b_data != nullptr, b_data, false); b_data != nullptr, b_data, false);
} }
} }
......
...@@ -29,7 +29,15 @@ class FcCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> { ...@@ -29,7 +29,15 @@ class FcCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
void Run() override; void Run() override;
virtual ~FcCompute() = default; ~FcCompute() override {
if (transed_weight_) {
delete transed_weight_;
}
};
private:
lite::Tensor* transed_weight_{nullptr};
int m_, n_, k_;
}; };
} // namespace arm } // namespace arm
......
...@@ -14,6 +14,11 @@ ...@@ -14,6 +14,11 @@
#include "paddle/fluid/lite/kernels/arm/fc_compute.h" #include "paddle/fluid/lite/kernels/arm/fc_compute.h"
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <algorithm>
#include <iostream>
#include <memory>
#include <random>
#include <utility>
#include <vector> #include <vector>
#include "paddle/fluid/lite/arm/math/funcs.h" #include "paddle/fluid/lite/arm/math/funcs.h"
#include "paddle/fluid/lite/core/op_registry.h" #include "paddle/fluid/lite/core/op_registry.h"
...@@ -23,6 +28,17 @@ namespace lite { ...@@ -23,6 +28,17 @@ namespace lite {
namespace kernels { namespace kernels {
namespace arm { namespace arm {
template <typename T>
void FillData(T* a, const int n, const T lower = static_cast<T>(-2.f),
const T upper = static_cast<T>(2.f)) {
static unsigned int seed = 100;
std::mt19937 rng(seed++);
std::uniform_real_distribution<double> uniform_dist(0, 1);
for (int i = 0; i < n; ++i) {
a[i] = static_cast<T>(uniform_dist(rng) * (upper - lower) + lower);
}
}
TEST(fc_arm, retrive_op) { TEST(fc_arm, retrive_op) {
auto fc = auto fc =
KernelRegistry::Global().Create<TARGET(kARM), PRECISION(kFloat)>("fc"); KernelRegistry::Global().Create<TARGET(kARM), PRECISION(kFloat)>("fc");
...@@ -37,108 +53,117 @@ TEST(fc_arm, init) { ...@@ -37,108 +53,117 @@ TEST(fc_arm, init) {
} }
TEST(fc_arm, compare_test) { TEST(fc_arm, compare_test) {
lite::Tensor x, w, b, out, ref; using T = float;
constexpr int batch_size = 2;
x.Resize({batch_size, 3}); for (int m : {1, 2, 3, 4}) {
w.Resize({3, 4}); for (int n : {1, 2, 3, 4}) {
b.Resize({1, 4}); for (int k : {1, 2, 3, 4}) {
out.Resize({batch_size, 4}); for (bool with_bias : {true, false}) {
ref.Resize({batch_size, 4}); VLOG(3) << "m: " << m << ", n: " << n << ", k: " << k
<< (with_bias ? ", with bias" : "");
auto x_data = x.mutable_data<float>(); lite::Tensor x, w, b, out, ref;
auto w_data = w.mutable_data<float>();
auto b_data = b.mutable_data<float>(); x.Resize({m, k});
auto out_data = out.mutable_data<float>(); w.Resize({k, n});
auto ref_data = ref.mutable_data<float>(); b.Resize({1, n});
out.Resize({m, n});
for (int64_t i = 0; i < x.dims().product(); i++) { ref.Resize({m, n});
x_data[i] = static_cast<float>(i);
} auto* x_data = x.mutable_data<T>();
for (int64_t i = 0; i < w.dims().product(); i++) { auto* w_data = w.mutable_data<T>();
w_data[i] = static_cast<float>(i); auto* b_data = with_bias ? b.mutable_data<T>() : nullptr;
}
for (int64_t i = 0; i < b.dims().product(); i++) { auto* out_data = out.mutable_data<T>();
b_data[i] = static_cast<float>(i); auto* ref_data = ref.mutable_data<T>();
}
FillData<T>(x_data, x.dims().production());
lite::arm::math::fc_compute_eigen(x_data, batch_size, 3, // FillData<T>(w_data, w.dims().production());
w_data, 3, 4, // FillData<T>(out_data, out.dims().production(), 0, 0);
b_data, ref_data); FillData<T>(ref_data, ref.dims().production(), 0, 0);
// fc compute kernel if (with_bias) {
FcCompute fc; FillData<T>(b_data, b.dims().production());
operators::FcParam param; }
param.in_num_col_dims = 1; FcCompute fc;
param.input = &x; operators::FcParam param;
param.w = &w;
param.bias = &b; param.input = &x;
param.output = &out; param.w = &w;
param.in_mat_dims = x.dims(); param.bias = with_bias ? &b : nullptr;
param.output = &out;
DeviceInfo::Init(); param.in_num_col_dims = 1;
std::unique_ptr<KernelContext> ctx(new KernelContext); param.in_mat_dims = x.dims();
ctx->As<ARMContext>();
fc.SetParam(param); DeviceInfo::Init();
fc.SetContext(std::move(ctx)); std::unique_ptr<KernelContext> ctx(new KernelContext);
fc.Run(); ctx->As<ARMContext>();
fc.SetParam(param);
VLOG(3) << "output vs ref"; fc.SetContext(std::move(ctx));
for (int i = 0; i < out.dims().product(); i++) { fc.PrepareForRun();
VLOG(3) << out_data[i] << " vs " << ref_data[i]; fc.Run();
}
lite::arm::math::fc_compute_eigen(x_data, m, k, w_data, k, n, b_data,
for (int i = 0; i < out.dims().product(); ++i) { ref_data);
EXPECT_NEAR(out_data[i], ref_data[i], 1e-5); for (int i = 0; i < out.dims().production(); i++) {
EXPECT_NEAR(out_data[i], ref_data[i], 1e-3);
}
}
}
}
} }
} }
TEST(fc_arm, num_col_dims) { TEST(fc_arm, num_col_dims) {
FcCompute fc; using T = float;
operators::FcParam param;
for (bool with_bias : {true, false}) {
lite::Tensor x; lite::Tensor x, w, b, out, ref;
lite::Tensor w;
lite::Tensor bias; x.Resize({1, 2, 3});
lite::Tensor output; w.Resize({3, 4});
b.Resize({1, 4});
x.Resize({1, 2, 3}); out.Resize({2, 4});
w.Resize({3, 4}); ref.Resize({2, 4});
bias.Resize({1, 4});
output.Resize({2, 4}); auto* x_data = x.mutable_data<float>();
auto* w_data = w.mutable_data<float>();
auto* x_data = x.mutable_data<float>(); auto* b_data = with_bias ? b.mutable_data<T>() : nullptr;
auto* w_data = w.mutable_data<float>();
auto* bias_data = bias.mutable_data<float>(); auto* out_data = out.mutable_data<T>();
auto* output_data = output.mutable_data<float>(); auto* ref_data = ref.mutable_data<T>();
for (int64_t i = 0; i < x.dims().product(); i++) { FillData<T>(x_data, x.dims().production());
x_data[i] = static_cast<float>(i); FillData<T>(w_data, w.dims().production());
} FillData<T>(out_data, out.dims().production(), 0, 0);
for (int64_t i = 0; i < w.dims().product(); i++) { FillData<T>(ref_data, ref.dims().production(), 0, 0);
w_data[i] = static_cast<float>(i); if (with_bias) {
FillData<T>(b_data, b.dims().production());
}
FcCompute fc;
operators::FcParam param;
param.input = &x;
param.w = &w;
param.bias = with_bias ? &b : nullptr;
param.output = &out;
param.in_num_col_dims = 2;
param.in_mat_dims = x.dims();
std::unique_ptr<KernelContext> ctx(new KernelContext);
ctx->As<ARMContext>();
DeviceInfo::Init();
fc.SetParam(param);
fc.SetContext(std::move(ctx));
fc.PrepareForRun();
fc.Run();
lite::arm::math::fc_compute_eigen(x_data, 2, 3, w_data, 3, 4, b_data,
ref_data);
for (int i = 0; i < out.dims().production(); i++) {
EXPECT_NEAR(out_data[i], ref_data[i], 1e-3);
}
} }
for (int64_t i = 0; i < bias.dims().product(); i++) {
bias_data[i] = static_cast<float>(i);
}
for (int64_t i = 0; i < output.dims().product(); i++) {
output_data[i] = static_cast<float>(i);
}
param.in_num_col_dims = 2;
param.input = &x;
param.w = &w;
param.bias = &bias;
param.output = &output;
param.in_mat_dims = x.dims();
std::unique_ptr<KernelContext> ctx(new KernelContext);
ctx->As<ARMContext>();
DeviceInfo::Init();
fc.SetParam(param);
fc.SetContext(std::move(ctx));
fc.Run();
} }
} // namespace arm } // namespace arm
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册