提交 37f93e09 编写于 作者: L Liangliang He

Merge branch 'conv2d-neon' into 'master'

Move test and benchmark of batch_norm and conv2d3x3r1 to ops directory.

See merge request !37
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
namespace mace { namespace mace {
namespace kernels { namespace kernels {
static const int REGISTER_SIZE = 4; static const int kRegisterSize = 4;
void Conv2dNeonK3x3S1(const float* input, // NCHW void Conv2dNeonK3x3S1(const float* input, // NCHW
const index_t* input_shape, const index_t* input_shape,
...@@ -44,7 +44,7 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW ...@@ -44,7 +44,7 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW
float32x4_t filter3 = vld1q_f32(filter_ptr+3); float32x4_t filter3 = vld1q_f32(filter_ptr+3);
float32x4_t filter6 = vld1q_f32(filter_ptr+6); float32x4_t filter6 = vld1q_f32(filter_ptr+6);
const float* row[REGISTER_SIZE] = { const float* row[kRegisterSize] = {
input_ptr, input_ptr + input_width, input_ptr, input_ptr + input_width,
input_ptr + 2 * input_width, input_ptr + 3 * input_width input_ptr + 2 * input_width, input_ptr + 3 * input_width
}; };
...@@ -61,7 +61,7 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW ...@@ -61,7 +61,7 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW
float32x4_t sum0 = vdupq_n_f32(.0f); float32x4_t sum0 = vdupq_n_f32(.0f);
float32x4_t sum1 = vdupq_n_f32(.0f); float32x4_t sum1 = vdupq_n_f32(.0f);
float32x4_t row0_ext_0 = vld1q_f32(row[0]); //0123 float32x4_t row0_ext_0 = vld1q_f32(row[0]); //0123
float32x4_t row0_latter = vld1q_f32(row[0] + REGISTER_SIZE); //4567 float32x4_t row0_latter = vld1q_f32(row[0] + kRegisterSize); //4567
float32x4_t row0_ext_1 = vextq_f32(row0_ext_0, row0_latter, 1); //1234 float32x4_t row0_ext_1 = vextq_f32(row0_ext_0, row0_latter, 1); //1234
float32x4_t row0_ext_2 = vextq_f32(row0_ext_0, row0_latter, 2); //2345 float32x4_t row0_ext_2 = vextq_f32(row0_ext_0, row0_latter, 2); //2345
...@@ -70,7 +70,7 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW ...@@ -70,7 +70,7 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW
sum0 = vfmaq_laneq_f32(sum0, row0_ext_2, filter0, 2); sum0 = vfmaq_laneq_f32(sum0, row0_ext_2, filter0, 2);
float32x4_t row1_ext_0 = vld1q_f32(row[1]); //0123 float32x4_t row1_ext_0 = vld1q_f32(row[1]); //0123
float32x4_t row1_latter = vld1q_f32(row[1] + REGISTER_SIZE); //4567 float32x4_t row1_latter = vld1q_f32(row[1] + kRegisterSize); //4567
float32x4_t row1_ext_1 = vextq_f32(row1_ext_0, row1_latter, 1); //1234 float32x4_t row1_ext_1 = vextq_f32(row1_ext_0, row1_latter, 1); //1234
float32x4_t row1_ext_2 = vextq_f32(row1_ext_0, row1_latter, 2); //2345 float32x4_t row1_ext_2 = vextq_f32(row1_ext_0, row1_latter, 2); //2345
...@@ -79,7 +79,7 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW ...@@ -79,7 +79,7 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW
sum0 = vfmaq_laneq_f32(sum0, row1_ext_2, filter3, 2); sum0 = vfmaq_laneq_f32(sum0, row1_ext_2, filter3, 2);
row0_ext_0 = vld1q_f32(row[2]); //0123 row0_ext_0 = vld1q_f32(row[2]); //0123
row0_latter = vld1q_f32(row[2] + REGISTER_SIZE); //4567 row0_latter = vld1q_f32(row[2] + kRegisterSize); //4567
row0_ext_1 = vextq_f32(row0_ext_0, row0_latter, 1); //1234 row0_ext_1 = vextq_f32(row0_ext_0, row0_latter, 1); //1234
row0_ext_2 = vextq_f32(row0_ext_0, row0_latter, 2); //2345 row0_ext_2 = vextq_f32(row0_ext_0, row0_latter, 2); //2345
...@@ -97,7 +97,7 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW ...@@ -97,7 +97,7 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW
sum1 = vfmaq_laneq_f32(sum1, row0_ext_2, filter3, 2); sum1 = vfmaq_laneq_f32(sum1, row0_ext_2, filter3, 2);
row1_ext_0 = vld1q_f32(row[3]); //0123 row1_ext_0 = vld1q_f32(row[3]); //0123
row1_latter = vld1q_f32(row[3] + REGISTER_SIZE); //4567 row1_latter = vld1q_f32(row[3] + kRegisterSize); //4567
row1_ext_1 = vextq_f32(row1_ext_0, row1_latter, 1); //1234 row1_ext_1 = vextq_f32(row1_ext_0, row1_latter, 1); //1234
row1_ext_2 = vextq_f32(row1_ext_0, row1_latter, 2); //2345 row1_ext_2 = vextq_f32(row1_ext_0, row1_latter, 2); //2345
...@@ -112,10 +112,10 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW ...@@ -112,10 +112,10 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW
vst1q_f32(output_ptr1, output_row0); vst1q_f32(output_ptr1, output_row0);
vst1q_f32(output_ptr2, output_row1); vst1q_f32(output_ptr2, output_row1);
output_ptr1 += REGISTER_SIZE; output_ptr1 += kRegisterSize;
output_ptr2 += REGISTER_SIZE; output_ptr2 += kRegisterSize;
for(int i = 0; i < REGISTER_SIZE; ++i) { for(int i = 0; i < kRegisterSize; ++i) {
row[i] += REGISTER_SIZE; row[i] += kRegisterSize;
} }
} }
for (; remain_count > 0; --remain_count) { for (; remain_count > 0; --remain_count) {
...@@ -138,13 +138,13 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW ...@@ -138,13 +138,13 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW
++output_ptr1; ++output_ptr1;
++output_ptr2; ++output_ptr2;
for(int i = 0; i < REGISTER_SIZE; ++i) { for(int i = 0; i < kRegisterSize; ++i) {
row[i] += 1; row[i] += 1;
} }
} }
output_ptr1 += width; output_ptr1 += width;
output_ptr2 += width; output_ptr2 += width;
for(int i = 0; i < REGISTER_SIZE; ++i) { for(int i = 0; i < kRegisterSize; ++i) {
row[i] += 2 + input_width; row[i] += 2 + input_width;
} }
} }
...@@ -155,7 +155,7 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW ...@@ -155,7 +155,7 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW
for(; count > 0; --count) { for(; count > 0; --count) {
float32x4_t sum0 = vdupq_n_f32(.0f); float32x4_t sum0 = vdupq_n_f32(.0f);
float32x4_t row0_ext_0 = vld1q_f32(row[0]); //0123 float32x4_t row0_ext_0 = vld1q_f32(row[0]); //0123
float32x4_t row0_latter = vld1q_f32(row[0] + REGISTER_SIZE); //4567 float32x4_t row0_latter = vld1q_f32(row[0] + kRegisterSize); //4567
float32x4_t row0_ext_1 = vextq_f32(row0_ext_0, row0_latter, 1); //1234 float32x4_t row0_ext_1 = vextq_f32(row0_ext_0, row0_latter, 1); //1234
float32x4_t row0_ext_2 = vextq_f32(row0_ext_0, row0_latter, 2); //2345 float32x4_t row0_ext_2 = vextq_f32(row0_ext_0, row0_latter, 2); //2345
...@@ -164,7 +164,7 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW ...@@ -164,7 +164,7 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW
sum0 = vfmaq_laneq_f32(sum0, row0_ext_2, filter0, 2); sum0 = vfmaq_laneq_f32(sum0, row0_ext_2, filter0, 2);
float32x4_t row1_ext_0 = vld1q_f32(row[1]); //0123 float32x4_t row1_ext_0 = vld1q_f32(row[1]); //0123
float32x4_t row1_latter = vld1q_f32(row[1] + REGISTER_SIZE); //4567 float32x4_t row1_latter = vld1q_f32(row[1] + kRegisterSize); //4567
float32x4_t row1_ext_1 = vextq_f32(row1_ext_0, row1_latter, 1); //1234 float32x4_t row1_ext_1 = vextq_f32(row1_ext_0, row1_latter, 1); //1234
float32x4_t row1_ext_2 = vextq_f32(row1_ext_0, row1_latter, 2); //2345 float32x4_t row1_ext_2 = vextq_f32(row1_ext_0, row1_latter, 2); //2345
...@@ -173,7 +173,7 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW ...@@ -173,7 +173,7 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW
sum0 = vfmaq_laneq_f32(sum0, row1_ext_2, filter3, 2); sum0 = vfmaq_laneq_f32(sum0, row1_ext_2, filter3, 2);
row0_ext_0 = vld1q_f32(row[2]); //0123 row0_ext_0 = vld1q_f32(row[2]); //0123
row0_latter = vld1q_f32(row[2] + REGISTER_SIZE); //4567 row0_latter = vld1q_f32(row[2] + kRegisterSize); //4567
row0_ext_1 = vextq_f32(row0_ext_0, row0_latter, 1); //1234 row0_ext_1 = vextq_f32(row0_ext_0, row0_latter, 1); //1234
row0_ext_2 = vextq_f32(row0_ext_0, row0_latter, 2); //2345 row0_ext_2 = vextq_f32(row0_ext_0, row0_latter, 2); //2345
...@@ -184,9 +184,9 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW ...@@ -184,9 +184,9 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW
float32x4_t output_row0 = vld1q_f32(output_ptr1); float32x4_t output_row0 = vld1q_f32(output_ptr1);
output_row0 = vaddq_f32(output_row0, sum0); output_row0 = vaddq_f32(output_row0, sum0);
vst1q_f32(output_ptr1, output_row0); vst1q_f32(output_ptr1, output_row0);
output_ptr1 += REGISTER_SIZE; output_ptr1 += kRegisterSize;
for(int i = 0; i < 3; ++i) { for(int i = 0; i < 3; ++i) {
row[i] += REGISTER_SIZE; row[i] += kRegisterSize;
} }
} }
for (; remain_count > 0; --remain_count) { for (; remain_count > 0; --remain_count) {
......
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include <random>
#include "gtest/gtest.h"
#include "mace/kernels/batch_norm.h"
namespace mace {
TEST(BatchNormNeonTest, Simple) {
std::random_device rd;
std::mt19937 gen(rd());
std::normal_distribution<float> nd(0, 1);
srand(time(NULL));
// generate random input
index_t batch = 1 + rand() % 128;
index_t channels = 3;
index_t height = 10 + rand() % 100;
index_t width = 10 + rand() % 100;
index_t input_size = batch * channels * height * width;
std::vector<float> input(input_size, 0.0);
std::vector<float> scale(channels, 0.0);
std::vector<float> offset(channels, 0.0);
std::vector<float> mean(channels, 0.0);
std::vector<float> var(channels, 0.0);
for (int i = 0; i < input_size; ++i) {
input[i] = nd(gen);
}
for (int i = 0; i < channels; ++i) {
scale[i] = nd(gen);
offset[i] = nd(gen);
mean[i] = nd(gen);
var[i] = std::abs(nd(gen));
}
// declare output
std::unique_ptr<float[]> output(new float[input_size]);
std::unique_ptr<float[]> output_neon(new float[input_size]);
kernels::BatchNormFunctor<DeviceType::CPU, float>(1e-5)(
input.data(),
scale.data(),
offset.data(),
mean.data(),
var.data(),
batch,
channels,
height * width,
output.get()
);
kernels::BatchNormFunctor<DeviceType::NEON, float>(1e-5)(
input.data(),
scale.data(),
offset.data(),
mean.data(),
var.data(),
batch,
channels,
height * width,
output_neon.get()
);
for (index_t i = 0; i < input_size; ++i) {
EXPECT_FLOAT_EQ(output[i], output_neon[i]);
}
}
} // namespace mace
\ No newline at end of file
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "gtest/gtest.h"
#include "mace/kernels/conv_2d.h"
#include "mace/kernels/conv_pool_2d_util.h"
namespace mace {
TEST(Conv2dNeon3X3Test, Correctness) {
std::random_device rd;
std::mt19937 gen(rd());
std::normal_distribution<float> nd(0, 1);
srand(time(NULL));
// generate random input
index_t batch = 1 + rand() % 16;
index_t channels = 3 + rand() % 100;
index_t height = 10 + rand() % 100;
index_t width = 10 + rand() % 100;
index_t output_channels = 3 + rand() % 100;
index_t input_size = batch * channels * height * width;
index_t filter_size = output_channels * channels * 3 * 3;
std::vector<float> input(input_size, 0.0);
const index_t input_shape[] = {batch, channels, height, width};
std::vector<float> filter(filter_size, 0.0);
const index_t filter_shape[] = {output_channels, channels, 3, 3};
std::vector<float> bias(output_channels, 0.0);
const int dilations[] = {1, 1};
const int strides[] = {1, 1};
// declare output
vector<index_t> output_shape;
vector<int> padding_size;
kernels::CalcPaddingAndOutputSize(input_shape, filter_shape, dilations, strides, VALID,
&output_shape, &padding_size);
const index_t output_size = output_shape[0] * output_shape[1] * output_shape[2] * output_shape[3];
std::unique_ptr<float[]> output(new float[output_size]);
std::unique_ptr<float[]> output_neon(new float[output_size]);
for (int i = 0; i < input_size; ++i) {
input[i] = nd(gen);
}
for (int i = 0; i < filter_size; ++i) {
filter[i] = nd(gen);
}
for (int i = 0; i < output_channels; ++i) {
bias[i] = nd(gen);
}
kernels::Conv2dFunctor<DeviceType::CPU, float>(strides, padding_size.data(), dilations)(
input.data(),
input_shape,
filter.data(),
filter_shape,
bias.data(),
output.get(),
output_shape.data()
);
kernels::Conv2dFunctor<DeviceType::NEON, float>(strides, padding_size.data(), dilations)(
input.data(),
input_shape,
filter.data(),
filter_shape,
bias.data(),
output_neon.get(),
output_shape.data()
);
for (index_t i = 0; i < output_size; ++i) {
EXPECT_NEAR(output[i], output_neon[i], 1e-3);
}
}
} // namespace mace
\ No newline at end of file
...@@ -2,75 +2,67 @@ ...@@ -2,75 +2,67 @@
// Copyright (c) 2017 XiaoMi All rights reserved. // Copyright (c) 2017 XiaoMi All rights reserved.
// //
#include "mace/core/operator.h"
#include "mace/core/testing/test_benchmark.h" #include "mace/core/testing/test_benchmark.h"
#include "mace/kernels/batch_norm.h" #include "mace/ops/ops_test_util.h"
namespace mace { namespace mace {
template <DeviceType D, typename T> template <DeviceType D, typename T>
static void BatchNorm(int iters, int batch, int channels, int height, int width) { static void BatchNorm(int iters, int batch, int channels, int height, int width) {
std::random_device rd; mace::testing::StopTiming();
std::mt19937 gen(rd());
std::normal_distribution<float> nd(0, 1);
index_t input_size = batch * channels * height * width; OpsTestNet net;
std::vector<T> input(input_size, 0.0); OpDefBuilder("BatchNorm", "BatchNormBM")
std::vector<T> scale(channels, 0.0); .Input("Input")
std::vector<T> offset(channels, 0.0); .Input("Scale")
std::vector<T> mean(channels, 0.0); .Input("Offset")
std::vector<T> var(channels, 0.0); .Input("Mean")
.Input("Var")
.Output("Output")
.Finalize(net.operator_def());
for (int i = 0; i < input_size; ++i) { // Add input data
input[i] = nd(gen); net.AddRandomInput<T>("Input", {batch, channels, height, width});
} net.AddRandomInput<T>("Scale", {channels});
for (int i = 0; i < channels; ++i) { net.AddRandomInput<T>("Offset", {channels});
scale[i] = nd(gen); net.AddRandomInput<T>("Mean", {channels});
offset[i] = nd(gen); net.AddRandomInput<T>("Var", {channels}, true);
mean[i] = nd(gen);
var[i] = std::abs(nd(gen));
}
// declare output // Warm-up
std::unique_ptr<T[]> output(new T[input_size]); for (int i = 0; i < 5; ++i) {
auto functor = kernels::BatchNormFunctor<D, T>(1e-5); net.RunOp(D);
}
mace::testing::StartTiming();
while(iters--) { while(iters--) {
functor(input.data(), net.RunOp(D);
scale.data(),
offset.data(),
mean.data(),
var.data(),
batch,
channels,
height * width,
output.get());
} }
} }
#define BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, DEVICE) \ #define BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, DEVICE) \
static void BM_BATCH_NORM_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE( \ static void BM_BATCH_NORM_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE( \
int iters) { \ int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \ const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::ItemsProcessed(tot); \ mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot * (sizeof(TYPE)));\ mace::testing::BytesProcessed(tot * (sizeof(TYPE))); \
BatchNorm<DEVICE, TYPE>(iters, N, C, H, W); \ BatchNorm<DEVICE, TYPE>(iters, N, C, H, W); \
} \ } \
BENCHMARK(BM_BATCH_NORM_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE) BENCHMARK(BM_BATCH_NORM_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
#define BM_BATCH_NORM(N, C, H, W, TYPE) \ #define BM_BATCH_NORM(N, C, H, W, TYPE) \
BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, CPU); \ BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, CPU); \
BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, NEON); BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, NEON);
BM_BATCH_NORM(1, 1, 128, 128, float);
BM_BATCH_NORM(1, 1, 512, 512, float); BM_BATCH_NORM(1, 1, 512, 512, float);
BM_BATCH_NORM(1, 1, 1024, 1024, float); BM_BATCH_NORM(1, 1, 1024, 1024, float);
BM_BATCH_NORM(16, 1, 256, 256, float);
BM_BATCH_NORM(32, 1, 256, 256, float);
BM_BATCH_NORM(64, 1, 256, 256, float);
BM_BATCH_NORM(1, 3, 128, 128, float); BM_BATCH_NORM(1, 3, 128, 128, float);
BM_BATCH_NORM(1, 3, 512, 512, float); BM_BATCH_NORM(1, 3, 512, 512, float);
BM_BATCH_NORM(1, 3, 1024, 1024, float); BM_BATCH_NORM(1, 3, 1024, 1024, float);
BM_BATCH_NORM(16, 3, 256, 256, float); BM_BATCH_NORM(1, 64, 256, 256, float);
BM_BATCH_NORM(1, 64, 512, 512, float);
BM_BATCH_NORM(1, 128, 256, 256, float);
BM_BATCH_NORM(1, 128, 512, 512, float);
BM_BATCH_NORM(32, 1, 256, 256, float);
BM_BATCH_NORM(32, 3, 256, 256, float); BM_BATCH_NORM(32, 3, 256, 256, float);
BM_BATCH_NORM(64, 3, 256, 256, float);
} // namespace mace } // namespace mace
\ No newline at end of file
...@@ -9,7 +9,7 @@ namespace mace { ...@@ -9,7 +9,7 @@ namespace mace {
class BatchNormOpTest : public OpsTestBase {}; class BatchNormOpTest : public OpsTestBase {};
TEST_F(BatchNormOpTest, Simple) { TEST_F(BatchNormOpTest, SimpleCPU) {
// Construct graph // Construct graph
auto& net = test_net(); auto& net = test_net();
OpDefBuilder("BatchNorm", "BatchNormTest") OpDefBuilder("BatchNorm", "BatchNormTest")
...@@ -40,4 +40,42 @@ TEST_F(BatchNormOpTest, Simple) { ...@@ -40,4 +40,42 @@ TEST_F(BatchNormOpTest, Simple) {
ExpectTensorNear<float>(expected, *net.GetOutput("Output"), 0.01); ExpectTensorNear<float>(expected, *net.GetOutput("Output"), 0.01);
} }
TEST_F(BatchNormOpTest, SimpleNeon) {
srand(time(NULL));
// generate random input
index_t batch = 1 + rand() % 10;
index_t channels = 3 + rand() % 50;
index_t height = 10 + rand() % 50;
index_t width = 10 + rand() % 50;
// Construct graph
auto& net = test_net();
OpDefBuilder("BatchNorm", "BatchNormTest")
.Input("Input")
.Input("Scale")
.Input("Offset")
.Input("Mean")
.Input("Var")
.Output("Output")
.Finalize(net.operator_def());
// Add input data
net.AddRandomInput<float>("Input", {batch, channels, height, width});
net.AddRandomInput<float>("Scale", {channels});
net.AddRandomInput<float>("Offset", {channels});
net.AddRandomInput<float>("Mean", {channels});
net.AddRandomInput<float>("Var", {channels}, true);
// run cpu
net.RunOp();
// Check
Tensor expected = *net.GetOutput("Output");
// Run NEON
net.RunOp(DeviceType::NEON);
ExpectTensorNear<float>(expected, *net.GetOutput("Output"), 1e-5);
}
} }
...@@ -62,5 +62,6 @@ static void Conv2d(int iters, int batch, int channels, int height, int width, ...@@ -62,5 +62,6 @@ static void Conv2d(int iters, int batch, int channels, int height, int width,
BM_CONV_2D(1, 64, 32, 32, 1, 1, 1, VALID, 128, float); BM_CONV_2D(1, 64, 32, 32, 1, 1, 1, VALID, 128, float);
BM_CONV_2D(1, 64, 32, 32, 3, 3, 1, VALID, 128, float); BM_CONV_2D(1, 64, 32, 32, 3, 3, 1, VALID, 128, float);
BM_CONV_2D(1, 64, 32, 32, 3, 3, 1, SAME, 128, float);
} // namespace mace } // namespace mace
...@@ -192,3 +192,47 @@ TEST_F(Conv2dOpTest, Conv1x1) { ...@@ -192,3 +192,47 @@ TEST_F(Conv2dOpTest, Conv1x1) {
} }
// TODO we need more tests // TODO we need more tests
TEST_F(Conv2dOpTest, Conv3x3R1) {
auto func = [&](Padding type) {
srand(time(NULL));
// generate random input
index_t batch = 1 + rand() % 5;
index_t input_channels = 3 + rand() % 50;
index_t height = 10 + rand() % 100;
index_t width = 10 + rand() % 100;
index_t output_channels = 3 + rand() % 50;
// Construct graph
auto& net = test_net();
OpDefBuilder("Conv2d", "Conv2dTest")
.Input("Input")
.Input("Filter")
.Input("Bias")
.Output("Output")
.Finalize(net.operator_def());
// Add args
net.AddIntsArg("strides", {1, 1});
net.AddIntArg("padding", type);
net.AddIntsArg("dilations", {1, 1});
// Add input data
net.AddRandomInput<float>("Input", {batch, input_channels, height, width});
net.AddRandomInput<float>("Filter", {output_channels, input_channels, 3, 3});
net.AddRandomInput<float>("Bias", {output_channels});
// run cpu
net.RunOp();
// Check
Tensor expected = *net.GetOutput("Output");
// Run NEON
net.RunOp(DeviceType::NEON);
ExpectTensorNear<float>(expected, *net.GetOutput("Output"), 1e-5);
};
func(VALID);
func(SAME);
}
...@@ -15,152 +15,167 @@ ...@@ -15,152 +15,167 @@
namespace mace { namespace mace {
class OpDefBuilder { class OpDefBuilder {
public: public:
OpDefBuilder(const char* type, const char* name) { OpDefBuilder(const char *type, const char *name) {
op_def_.set_type(type); op_def_.set_type(type);
op_def_.set_name(name); op_def_.set_name(name);
} }
OpDefBuilder& Input(const char* input_name) {
op_def_.add_input(input_name); OpDefBuilder &Input(const char *input_name) {
return *this; op_def_.add_input(input_name);
} return *this;
OpDefBuilder& Output(const char* output_name) { }
op_def_.add_output(output_name);
return *this; OpDefBuilder &Output(const char *output_name) {
} op_def_.add_output(output_name);
void Finalize(OperatorDef* op_def) const { return *this;
MACE_CHECK(op_def != nullptr, "input should not be null."); }
*op_def = op_def_;
} void Finalize(OperatorDef *op_def) const {
OperatorDef op_def_; MACE_CHECK(op_def != nullptr, "input should not be null.");
*op_def = op_def_;
}
OperatorDef op_def_;
}; };
class OpsTestNet { class OpsTestNet {
public: public:
OpsTestNet() {} OpsTestNet() {}
template <typename T> template<typename T>
void AddInputFromArray(const char* name, void AddInputFromArray(const char *name,
const std::vector<index_t>& shape, const std::vector<index_t> &shape,
const std::vector<T>& data) { const std::vector<T> &data) {
Tensor* input = ws_.CreateTensor(name, cpu_allocator(), DataTypeToEnum<T>::v()); Tensor *input = ws_.CreateTensor(name, cpu_allocator(), DataTypeToEnum<T>::v());
input->Resize(shape); input->Resize(shape);
T* input_data = input->mutable_data<T>(); T *input_data = input->mutable_data<T>();
MACE_CHECK(input->size() == data.size()); MACE_CHECK(input->size() == data.size());
memcpy(input_data, data.data(), data.size() * sizeof(T)); memcpy(input_data, data.data(), data.size() * sizeof(T));
} }
template <typename T> template<typename T>
void AddRandomInput(const char* name, const std::vector<index_t>& shape) { void AddRepeatedInput(const char *name,
Tensor* input = ws_.CreateTensor(name, cpu_allocator(), DataTypeToEnum<T>::v()); const std::vector<index_t> &shape,
input->Resize(shape); const T data) {
float* input_data = input->mutable_data<T>(); Tensor *input = ws_.CreateTensor(name, cpu_allocator(), DataTypeToEnum<T>::v());
input->Resize(shape);
T *input_data = input->mutable_data<T>();
MACE_CHECK(input->size() == data.size());
std::fill(input_data, input_data + input->size(), data);
}
std::random_device rd; template<typename T>
std::mt19937 gen(rd()); void AddRandomInput(const char *name, const std::vector<index_t> &shape, bool positive = false) {
std::normal_distribution<T> nd(0, 1); Tensor *input = ws_.CreateTensor(name, cpu_allocator(), DataTypeToEnum<T>::v());
input->Resize(shape);
float *input_data = input->mutable_data<T>();
std::generate(input_data, input_data + input->size(), std::random_device rd;
[&gen, &nd]{ return nd(gen); }); std::mt19937 gen(rd());
} std::normal_distribution<T> nd(0, 1);
void AddIntArg(const char* name, const int value) { std::generate(input_data, input_data + input->size(),
auto arg = op_def_.add_arg(); [&gen, &nd, positive] { return positive ? std::abs(nd(gen)) : nd(gen); });
arg->set_name(name); }
arg->set_i(value);
}
void AddFloatArg(const char* name, const float value) { void AddIntArg(const char *name, const int value) {
auto arg = op_def_.add_arg(); auto arg = op_def_.add_arg();
arg->set_name(name); arg->set_name(name);
arg->set_f(value); arg->set_i(value);
} }
void AddStringArg(const char* name, const char* value) { void AddFloatArg(const char *name, const float value) {
auto arg = op_def_.add_arg(); auto arg = op_def_.add_arg();
arg->set_name(name); arg->set_name(name);
arg->set_s(value); arg->set_f(value);
} }
void AddIntsArg(const char* name, const std::vector<int>& values) { void AddStringArg(const char *name, const char *value) {
auto arg = op_def_.add_arg(); auto arg = op_def_.add_arg();
arg->set_name(name); arg->set_name(name);
for (auto value : values) { arg->set_s(value);
arg->add_ints(value); }
}
void AddIntsArg(const char *name, const std::vector<int> &values) {
auto arg = op_def_.add_arg();
arg->set_name(name);
for (auto value : values) {
arg->add_ints(value);
} }
}
void AddFloatsArg(const char* name, const std::vector<float>& values) { void AddFloatsArg(const char *name, const std::vector<float> &values) {
auto arg = op_def_.add_arg(); auto arg = op_def_.add_arg();
arg->set_name(name); arg->set_name(name);
for (auto value : values) { for (auto value : values) {
arg->add_floats(value); arg->add_floats(value);
}
} }
}
void AddStringsArg(const char* name, const std::vector<const char*>& values) { void AddStringsArg(const char *name, const std::vector<const char *> &values) {
auto arg = op_def_.add_arg(); auto arg = op_def_.add_arg();
arg->set_name(name); arg->set_name(name);
for (auto value : values) { for (auto value : values) {
arg->add_strings(value); arg->add_strings(value);
}
} }
}
OperatorDef* operator_def() { return &op_def_; } OperatorDef *operator_def() { return &op_def_; }
Workspace* ws() { return &ws_; } Workspace *ws() { return &ws_; }
bool RunOp(DeviceType device) { bool RunOp(DeviceType device) {
if (!net_) { if (!net_) {
NetDef net_def; NetDef net_def;
net_def.add_op()->CopyFrom(op_def_); net_def.add_op()->CopyFrom(op_def_);
VLOG(3) << net_def.DebugString(); VLOG(3) << net_def.DebugString();
net_ = CreateNet(net_def, &ws_, device); net_ = CreateNet(net_def, &ws_, device);
}
return net_->Run();
} }
return net_->Run();
}
bool RunOp() { bool RunOp() {
return RunOp(DeviceType::CPU); return RunOp(DeviceType::CPU);
} }
Tensor* GetOutput(const char* output_name) { Tensor *GetOutput(const char *output_name) {
return ws_.GetTensor(output_name); return ws_.GetTensor(output_name);
} }
public: public:
Workspace ws_; Workspace ws_;
OperatorDef op_def_; OperatorDef op_def_;
std::unique_ptr<NetBase> net_; std::unique_ptr<NetBase> net_;
}; };
class OpsTestBase : public ::testing::Test { class OpsTestBase : public ::testing::Test {
public: public:
OpsTestNet& test_net() { return test_net_; }; OpsTestNet &test_net() { return test_net_; };
protected: protected:
virtual void TearDown() { virtual void TearDown() {
auto ws = test_net_.ws(); auto ws = test_net_.ws();
auto tensor_names = ws->Tensors(); auto tensor_names = ws->Tensors();
for (auto& name : tensor_names) { for (auto &name : tensor_names) {
ws->RemoveTensor(name); ws->RemoveTensor(name);
}
} }
}
private: private:
OpsTestNet test_net_; OpsTestNet test_net_;
}; };
template <typename T> template<typename T>
Tensor CreateTensor(const std::vector<index_t>& shape, const std::vector<T>& data) { Tensor CreateTensor(const std::vector<index_t> &shape, const std::vector<T> &data) {
Tensor res(cpu_allocator(), DataTypeToEnum<T>::v()); Tensor res(cpu_allocator(), DataTypeToEnum<T>::v());
res.Resize(shape); res.Resize(shape);
float* input_data = res.mutable_data<float>(); float *input_data = res.mutable_data<float>();
memcpy(input_data, data.data(), data.size() * sizeof(T)); memcpy(input_data, data.data(), data.size() * sizeof(T));
return res; return res;
} }
inline bool IsSameSize(const Tensor& x, const Tensor& y) { inline bool IsSameSize(const Tensor &x, const Tensor &y) {
if (x.dim_size() != y.dim_size()) return false; if (x.dim_size() != y.dim_size()) return false;
for (int d = 0; d < x.dim_size(); ++d) { for (int d = 0; d < x.dim_size(); ++d) {
if (x.dim(d) != y.dim(d)) return false; if (x.dim(d) != y.dim(d)) return false;
...@@ -168,58 +183,59 @@ inline bool IsSameSize(const Tensor& x, const Tensor& y) { ...@@ -168,58 +183,59 @@ inline bool IsSameSize(const Tensor& x, const Tensor& y) {
return true; return true;
} }
inline std::string ShapeToString(const Tensor& x) { inline std::string ShapeToString(const Tensor &x) {
std::stringstream stream; std::stringstream stream;
for (int i = 0; i < x.dim_size(); i++) { for (int i = 0; i < x.dim_size(); i++) {
if (i > 0) stream<<","; if (i > 0) stream << ",";
int64_t dim = x.dim(i); int64_t dim = x.dim(i);
if (dim < 0) { if (dim < 0) {
stream<<"?"; stream << "?";
} else { } else {
stream<<dim; stream << dim;
} }
} }
stream<<"]"; stream << "]";
return std::string(stream.str()); return std::string(stream.str());
} }
template <typename T> template<typename T>
struct is_floating_point_type { struct is_floating_point_type {
static const bool value = std::is_same<T, float>::value || static const bool value = std::is_same<T, float>::value ||
std::is_same<T, double>::value; std::is_same<T, double>::value;
}; };
template <typename T> template<typename T>
inline void ExpectEqual(const T& a, const T& b) { inline void ExpectEqual(const T &a, const T &b) {
EXPECT_EQ(a, b); EXPECT_EQ(a, b);
} }
template <> template<>
inline void ExpectEqual<float>(const float& a, const float& b) { inline void ExpectEqual<float>(const float &a, const float &b) {
EXPECT_FLOAT_EQ(a, b); EXPECT_FLOAT_EQ(a, b);
} }
template <> template<>
inline void ExpectEqual<double>(const double& a, const double& b) { inline void ExpectEqual<double>(const double &a, const double &b) {
EXPECT_DOUBLE_EQ(a, b); EXPECT_DOUBLE_EQ(a, b);
} }
inline void AssertSameTypeDims(const Tensor& x, const Tensor& y) { inline void AssertSameTypeDims(const Tensor &x, const Tensor &y) {
ASSERT_EQ(x.dtype(), y.dtype()); ASSERT_EQ(x.dtype(), y.dtype());
ASSERT_TRUE(IsSameSize(x, y)) ASSERT_TRUE(IsSameSize(x, y))
<< "x.shape [" << ShapeToString(x) << "] vs " << "x.shape [" << ShapeToString(x) << "] vs "
<< "y.shape [ " << ShapeToString(y) << "]"; << "y.shape [ " << ShapeToString(y) << "]";
} }
template <typename T, bool is_fp = is_floating_point_type<T>::value> template<typename T, bool is_fp = is_floating_point_type<T>::value>
struct Expector; struct Expector;
// Partial specialization for float and double. // Partial specialization for float and double.
template <typename T> template<typename T>
struct Expector<T, true> { struct Expector<T, true> {
static void Equal(const T& a, const T& b) { ExpectEqual(a, b); } static void Equal(const T &a, const T &b) { ExpectEqual(a, b); }
static void Equal(const Tensor& x, const Tensor& y) { static void Equal(const Tensor &x, const Tensor &y) {
ASSERT_EQ(x.dtype(), DataTypeToEnum<T>::v()); ASSERT_EQ(x.dtype(), DataTypeToEnum<T>::v());
AssertSameTypeDims(x, y); AssertSameTypeDims(x, y);
auto a = x.data<T>(); auto a = x.data<T>();
...@@ -229,22 +245,22 @@ struct Expector<T, true> { ...@@ -229,22 +245,22 @@ struct Expector<T, true> {
} }
} }
static void Near(const Tensor& x, const Tensor& y, const double abs_err) { static void Near(const Tensor &x, const Tensor &y, const double abs_err) {
ASSERT_EQ(x.dtype(), DataTypeToEnum<T>::v()); ASSERT_EQ(x.dtype(), DataTypeToEnum<T>::v());
AssertSameTypeDims(x, y); AssertSameTypeDims(x, y);
auto a = x.data<T>(); auto a = x.data<T>();
auto b = y.data<T>(); auto b = y.data<T>();
for (int i = 0; i < x.size(); ++i) { for (int i = 0; i < x.size(); ++i) {
EXPECT_NEAR(a[i], b[i], abs_err) EXPECT_NEAR(a[i], b[i], abs_err)
<< "a = " << a << " b = " << b << " index = " << i; << "a = " << a << " b = " << b << " index = " << i;
} }
} }
}; };
template <typename T> template<typename T>
void ExpectTensorNear(const Tensor& x, const Tensor& y, const double abs_err) { void ExpectTensorNear(const Tensor &x, const Tensor &y, const double abs_err) {
static_assert(is_floating_point_type<T>::value, "T is not a floating point type"); static_assert(is_floating_point_type<T>::value, "T is not a floating point type");
Expector<T>::Near(x, y ,abs_err); Expector<T>::Near(x, y, abs_err);
} }
} // namespace mace } // namespace mace
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册