提交 299be048 编写于 作者: T tensor-tang

fix mul kernel test

上级 24e4be6a
...@@ -124,6 +124,20 @@ TEST(conv_arm, init) { ...@@ -124,6 +124,20 @@ TEST(conv_arm, init) {
TEST(conv_arm, compute) { TEST(conv_arm, compute) {
DeviceInfo::Init(); DeviceInfo::Init();
#if 1
for (auto n : {2}) {
for (auto ic : {6}) {
for (auto oc : {6}) {
for (auto ih : {9}) {
for (auto iw : {9}) {
for (auto flag_bias : {false, true}) {
for (auto flag_relu : {false, true}) {
for (auto depthwise : {false, true}) {
for (auto dilation : {1}) {
for (auto stride : {1, 2}) {
for (auto padding : {0, 1, 2}) {
for (auto ks : {1, 3, 5}) {
#else
for (auto n : {1, 2}) { for (auto n : {1, 2}) {
for (auto ic : {6, 32 /*, 128*/}) { for (auto ic : {6, 32 /*, 128*/}) {
for (auto oc : {6, 32 /*, 128*/}) { for (auto oc : {6, 32 /*, 128*/}) {
...@@ -136,6 +150,7 @@ TEST(conv_arm, compute) { ...@@ -136,6 +150,7 @@ TEST(conv_arm, compute) {
for (auto stride : {1, 2}) { for (auto stride : {1, 2}) {
for (auto padding : {0, 1, 2}) { for (auto padding : {0, 1, 2}) {
for (auto ks : {1, 3, 5}) { for (auto ks : {1, 3, 5}) {
#endif
int group = 1; int group = 1;
if (depthwise) { // depthwise convolution ? if (depthwise) { // depthwise convolution ?
group = oc = ic; group = oc = ic;
......
...@@ -14,7 +14,10 @@ ...@@ -14,7 +14,10 @@
#include "paddle/fluid/lite/kernels/arm/mul_compute.h" #include "paddle/fluid/lite/kernels/arm/mul_compute.h"
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <algorithm>
#include <iostream>
#include <memory> #include <memory>
#include <random>
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "paddle/fluid/lite/arm/math/funcs.h" #include "paddle/fluid/lite/arm/math/funcs.h"
...@@ -25,6 +28,17 @@ namespace lite { ...@@ -25,6 +28,17 @@ namespace lite {
namespace kernels { namespace kernels {
namespace arm { namespace arm {
template <typename T>
void FillData(T* a, const int n, const T lower = static_cast<T>(-2.f),
const T upper = static_cast<T>(2.f)) {
static unsigned int seed = 100;
std::mt19937 rng(seed++);
std::uniform_real_distribution<double> uniform_dist(0, 1);
for (int i = 0; i < n; ++i) {
a[i] = static_cast<T>(uniform_dist(rng) * (upper - lower) + lower);
}
}
TEST(mul_arm, retrive_op) { TEST(mul_arm, retrive_op) {
auto mul = auto mul =
KernelRegistry::Global().Create<TARGET(kARM), PRECISION(kFloat)>("mul"); KernelRegistry::Global().Create<TARGET(kARM), PRECISION(kFloat)>("mul");
...@@ -33,114 +47,100 @@ TEST(mul_arm, retrive_op) { ...@@ -33,114 +47,100 @@ TEST(mul_arm, retrive_op) {
} }
TEST(mul_arm, init) { TEST(mul_arm, init) {
FcCompute mul; MulCompute mul;
ASSERT_EQ(mul.precision(), PRECISION(kFloat)); ASSERT_EQ(mul.precision(), PRECISION(kFloat));
ASSERT_EQ(mul.target(), TARGET(kARM)); ASSERT_EQ(mul.target(), TARGET(kARM));
} }
TEST(mul_arm, compare_test) { TEST(mul_arm, compare_test) {
lite::Tensor x, w, b, out, ref; using T = float;
constexpr int batch_size = 2;
x.Resize({batch_size, 3}); for (int m : {1, 2, 3, 4}) {
w.Resize({3, 4}); for (int n : {1, 2, 3, 4}) {
b.Resize({1, 4}); for (int k : {1, 2, 3, 4}) {
out.Resize({batch_size, 4}); lite::Tensor x, y, out, ref;
ref.Resize({batch_size, 4}); x.Resize({m, k});
y.Resize({k, n});
auto x_data = x.mutable_data<float>(); out.Resize({m, n});
auto w_data = w.mutable_data<float>(); ref.Resize({m, n});
auto b_data = b.mutable_data<float>();
auto out_data = out.mutable_data<float>(); auto* x_data = x.mutable_data<T>();
auto ref_data = ref.mutable_data<float>(); auto* y_data = y.mutable_data<T>();
auto* out_data = out.mutable_data<T>();
for (int64_t i = 0; i < x.dims().product(); i++) { auto* ref_data = ref.mutable_data<T>();
x_data[i] = static_cast<float>(i);
} FillData<T>(x_data, x.dims().production());
for (int64_t i = 0; i < w.dims().product(); i++) { FillData<T>(y_data, y.dims().production());
w_data[i] = static_cast<float>(i); FillData<T>(out_data, out.dims().production());
} FillData<T>(ref_data, out.dims().production());
for (int64_t i = 0; i < b.dims().product(); i++) {
b_data[i] = static_cast<float>(i); MulCompute mul;
} operators::MulParam param;
lite::arm::math::fc_compute_eigen(x_data, batch_size, 3, // param.x = &x;
w_data, 3, 4, // param.y = &y;
b_data, ref_data);
// mul compute kernel
FcCompute mul;
operators::FcParam param;
param.in_num_col_dims = 1;
param.input = &x;
param.w = &w;
param.bias = &b;
param.output = &out; param.output = &out;
param.in_mat_dims = x.dims();
DeviceInfo::Init(); DeviceInfo::Init();
std::unique_ptr<KernelContext> ctx(new KernelContext); std::unique_ptr<KernelContext> ctx(new KernelContext);
ctx->As<ARMContext>(); ctx->As<ARMContext>();
mul.SetParam(param); mul.SetParam(param);
mul.SetContext(std::move(ctx)); mul.SetContext(std::move(ctx));
mul.PrepareForRun();
mul.Run(); mul.Run();
VLOG(3) << "output vs ref"; lite::arm::math::mul_compute_eigen(x_data, m, k, y_data, k, n,
for (int i = 0; i < out.dims().product(); i++) { ref_data);
VLOG(3) << out_data[i] << " vs " << ref_data[i]; for (int i = 0; i < out.dims().production(); i++) {
EXPECT_NEAR(out_data[i], ref_data[i], 1e-3);
}
}
} }
for (int i = 0; i < out.dims().product(); ++i) {
EXPECT_NEAR(out_data[i], ref_data[i], 1e-5);
} }
} }
TEST(mul_arm, num_col_dims) { TEST(mul_arm, num_col_dims) {
FcCompute mul; using T = float;
operators::FcParam param;
lite::Tensor x, y, out, ref;
lite::Tensor x; x.Resize({2, 3, 4});
lite::Tensor w; y.Resize({3, 4, 5});
lite::Tensor bias; out.Resize({2, 5});
lite::Tensor output; ref.Resize({2, 5});
x.Resize({1, 2, 3}); auto* x_data = x.mutable_data<T>();
w.Resize({3, 4}); auto* y_data = y.mutable_data<T>();
bias.Resize({1, 4}); auto* out_data = out.mutable_data<T>();
output.Resize({2, 4}); auto* ref_data = ref.mutable_data<T>();
auto* x_data = x.mutable_data<float>(); FillData<T>(x_data, x.dims().production());
auto* w_data = w.mutable_data<float>(); FillData<T>(y_data, y.dims().production());
auto* bias_data = bias.mutable_data<float>(); FillData<T>(out_data, out.dims().production());
auto* output_data = output.mutable_data<float>(); FillData<T>(ref_data, out.dims().production());
for (int64_t i = 0; i < x.dims().product(); i++) { MulCompute mul;
x_data[i] = static_cast<float>(i); operators::MulParam param;
}
for (int64_t i = 0; i < w.dims().product(); i++) {
w_data[i] = static_cast<float>(i);
}
for (int64_t i = 0; i < bias.dims().product(); i++) {
bias_data[i] = static_cast<float>(i);
}
for (int64_t i = 0; i < output.dims().product(); i++) {
output_data[i] = static_cast<float>(i);
}
param.in_num_col_dims = 2; param.x = &x;
param.input = &x; param.y = &y;
param.w = &w; param.output = &out;
param.bias = &bias; param.x_num_col_dims = 1;
param.output = &output; param.y_num_col_dims = 2;
param.in_mat_dims = x.dims();
DeviceInfo::Init();
std::unique_ptr<KernelContext> ctx(new KernelContext); std::unique_ptr<KernelContext> ctx(new KernelContext);
ctx->As<ARMContext>(); ctx->As<ARMContext>();
DeviceInfo::Init();
mul.SetParam(param); mul.SetParam(param);
mul.SetContext(std::move(ctx)); mul.SetContext(std::move(ctx));
mul.PrepareForRun();
mul.Run(); mul.Run();
lite::arm::math::mul_compute_eigen(x_data, 2, 12, y_data, 12, 5, ref_data);
for (int i = 0; i < out.dims().production(); i++) {
EXPECT_NEAR(out_data[i], ref_data[i], 1e-3);
}
} }
} // namespace arm } // namespace arm
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册