提交 3b82bfb5 编写于 作者: L liuruilong

optimize batchnormal op kernel

上级 b37c8fef
......@@ -23,7 +23,6 @@ namespace operators {
template <>
void BatchNormKernel<CPU, float>::Compute(const BatchNormParam &param) const {
/// todo: test.
const Tensor *input_x = param.InputX();
auto input_x_ptr = input_x->data<float>();
const auto &x_dims = input_x->dims();
......@@ -46,50 +45,191 @@ void BatchNormKernel<CPU, float>::Compute(const BatchNormParam &param) const {
auto scale_ptr = scale->data<float>();
auto bias_ptr = bias->data<float>();
Tensor inv_std;
auto inv_std_ptr = inv_std.mutable_data<float>(make_ddim({C}));
if (C != variance->numel()) {
DLOG << "C must equal to variance.numel()";
}
assert(C == variance->numel());
/// std = (var + epsilon).sqrt();
/// inv_std = 1 / std;
for (int i = 0; i < C; i++) {
inv_std_ptr[i] =
1 / static_cast<float>(pow((variance_ptr[i] + epsilon), 0.5));
}
// Tensor inv_std;
// auto inv_std_ptr = inv_std.mutable_data<float>(make_ddim({C}));
PADDLE_MOBILE_ENFORCE(C == variance->numel(), "C must equal to variance.numel()");
int HXW = H * W;
if (HXW > 32) {
int NXC = N * C;
float *inv_std_ptr = new float[NXC * 4];
float * volatile new_scale_ptr = new float[NXC * 4];
float * volatile new_bias_ptr = new float[NXC * 4];
/// std = (var + epsilon).sqrt();
/// inv_std = 1 / std;
for (int i = 0; i < C * 4; i += 4) {
inv_std_ptr[i] =
1 / static_cast<float>(pow((variance_ptr[i/4] + epsilon), 0.5));
inv_std_ptr[i + 1] = inv_std_ptr[i];
inv_std_ptr[i + 2] = inv_std_ptr[i];
inv_std_ptr[i + 3] = inv_std_ptr[i];
new_scale_ptr[i] = inv_std_ptr[i] * scale_ptr[i/4];
new_scale_ptr[i + 1] = new_scale_ptr[i];
new_scale_ptr[i + 2] = new_scale_ptr[i];
new_scale_ptr[i + 3] = new_scale_ptr[i];
new_bias_ptr[i] = bias_ptr[i/4] - mean_ptr[i/4] * inv_std_ptr[i] * scale_ptr[i/4];
new_bias_ptr[i + 1] = new_bias_ptr[i];
new_bias_ptr[i + 2] = new_bias_ptr[i];
new_bias_ptr[i + 3] = new_bias_ptr[i];
}
for (int j = C * 4; j < NXC * 4; ++j) {
new_scale_ptr[j] = new_scale_ptr[j - C * 4];
new_bias_ptr[j] = new_bias_ptr[j - C * 4];
}
asm volatile(
"subs %[N], %[N], #1 \n\t"
"blt end_n_%= \n\t"
"loop_n_%=: \n\t"
"subs %[C], %[C], #1 \n\t"
"blt end_c_%= \n\t"
"loop_c_%=: \n\t"
"vld1.32 {q9}, [%[new_scale_ptr]]! \n\t"
"vld1.32 {q10}, [%[new_bias_ptr]]! \n\t"
"mov r6, %[HXW] \n\t"
"subs r6, r6, #32 \n\t"
"blt end_hw_%= \n\t"
"loop_hw_%=: \n\t"
"vld1.32 {q1, q2}, [%[input_x_ptr]]! \n\t"
"vld1.32 {q3, q4}, [%[input_x_ptr]]! \n\t"
"vld1.32 {q5, q6}, [%[input_x_ptr]]! \n\t"
"vld1.32 {q7, q8}, [%[input_x_ptr]]! \n\t"
Tensor new_scale;
auto new_scale_ptr = new_scale.mutable_data<float>(make_ddim({C}));
Tensor new_bias;
auto new_bias_ptr = new_bias.mutable_data<float>(make_ddim({C}));
/// ((x - est_mean) * (inv_var) * scale + bias equal to
/// (x * inv_var * scale) + (bias - est_mean * inv_var * scale)
for (int i = 0; i < C; i++) {
new_scale_ptr[i] = inv_std_ptr[i] * scale_ptr[i];
new_bias_ptr[i] = bias_ptr[i] - mean_ptr[i] * inv_std_ptr[i] * scale_ptr[i];
{
for (int n = 0; n < N; n++) {
for (int h = 0; h < H; h++) {
int tmp_index = n * stride0 + i * stride1 + h * stride2;
for (int w = 0; w < W; w++) {
int index = tmp_index + w;
out_ptr[index] =
input_x_ptr[index] * new_scale_ptr[i] + new_bias_ptr[i];
"vmul.f32 q1, q1, q9 \n\t"
"vmul.f32 q2, q2, q9 \n\t"
"vmul.f32 q3, q3, q9 \n\t"
"vmul.f32 q4, q4, q9 \n\t"
"vmul.f32 q5, q5, q9 \n\t"
"vmul.f32 q6, q6, q9 \n\t"
"vmul.f32 q7, q7, q9 \n\t"
"vmul.f32 q8, q8, q9 \n\t"
"vadd.f32 q1, q1, q10 \n\t"
"vadd.f32 q2, q2, q10 \n\t"
"vadd.f32 q3, q3, q10 \n\t"
"vadd.f32 q4, q4, q10 \n\t"
"vadd.f32 q5, q5, q10 \n\t"
"vadd.f32 q6, q6, q10 \n\t"
"vadd.f32 q7, q7, q10 \n\t"
"vadd.f32 q8, q8, q10 \n\t"
"vst1.32 {q1, q2}, [%[out_ptr]]! \n\t"
"vst1.32 {q3, q4}, [%[out_ptr]]! \n\t"
"vst1.32 {q5, q6}, [%[out_ptr]]! \n\t"
"vst1.32 {q7, q8}, [%[out_ptr]]! \n\t"
"subs r6, r6, #32 \n\t"
"bge loop_hw_%= \n\t"
"end_hw_%=: \n\t"
"cmp r6, #0 \n\t"
"bge end_remainder_%= \n\t"
"mov r5, #4 \n\t"
"mul r6, r6, r5 \n\t"
"add %[input_x_ptr], %[input_x_ptr], r6 \n\t"
"vld1.32 {q1, q2}, [%[input_x_ptr]]! \n\t"
"vld1.32 {q3, q4}, [%[input_x_ptr]]! \n\t"
"vld1.32 {q5, q6}, [%[input_x_ptr]]! \n\t"
"vld1.32 {q7, q8}, [%[input_x_ptr]]! \n\t"
"vmul.f32 q1, q1, q9 \n\t"
"vmul.f32 q2, q2, q9 \n\t"
"vmul.f32 q3, q3, q9 \n\t"
"vmul.f32 q4, q4, q9 \n\t"
"vmul.f32 q5, q5, q9 \n\t"
"vmul.f32 q6, q6, q9 \n\t"
"vmul.f32 q7, q7, q9 \n\t"
"vmul.f32 q8, q8, q9 \n\t"
"vadd.f32 q1, q1, q10 \n\t"
"vadd.f32 q2, q2, q10 \n\t"
"vadd.f32 q3, q3, q10 \n\t"
"vadd.f32 q4, q4, q10 \n\t"
"vadd.f32 q5, q5, q10 \n\t"
"vadd.f32 q6, q6, q10 \n\t"
"vadd.f32 q7, q7, q10 \n\t"
"vadd.f32 q8, q8, q10 \n\t"
"add %[out_ptr], %[out_ptr], r6 \n\t"
"vst1.32 {q1, q2}, [%[out_ptr]]! \n\t"
"vst1.32 {q3, q4}, [%[out_ptr]]! \n\t"
"vst1.32 {q5, q6}, [%[out_ptr]]! \n\t"
"vst1.32 {q7, q8}, [%[out_ptr]]! \n\t"
"end_remainder_%=: \n\t"
"subs %[C], %[C], #1 \n\t"
"bge loop_c_%= \n\t"
"end_c_%=: \n\t"
"subs %[N], %[N], #1 \n\t"
"bge loop_n_%= \n\t"
"end_n_%=: \n\t"
:
:[input_x_ptr]"r"(input_x_ptr), [out_ptr]"r"(out_ptr), [new_scale_ptr]"r"(new_scale_ptr), [new_bias_ptr]"r"(new_bias_ptr),
[N]"r"(N), [C]"r"(C), [HXW]"r"(HXW)
:"memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "r5", "r6"
);
delete [] inv_std_ptr;
delete [] new_scale_ptr;
delete [] new_bias_ptr;
} else {
float *inv_std_ptr = new float[C];
for (int i = 0; i < C; i++) {
inv_std_ptr[i] =
1 / static_cast<float>(pow((variance_ptr[i] + epsilon), 0.5));
}
Tensor new_scale;
auto new_scale_ptr = new_scale.mutable_data<float>(make_ddim({C}));
Tensor new_bias;
auto new_bias_ptr = new_bias.mutable_data<float>(make_ddim({C}));
/// ((x - est_mean) * (inv_var) * scale + bias equal to
/// (x * inv_var * scale) + (bias - est_mean * inv_var * scale)
for (int i = 0; i < C; i++) {
new_scale_ptr[i] = inv_std_ptr[i] * scale_ptr[i];
new_bias_ptr[i] = bias_ptr[i] - mean_ptr[i] * inv_std_ptr[i] * scale_ptr[i];
{
for (int n = 0; n < N; n++) {
for (int h = 0; h < H; h++) {
int tmp_index = n * stride0 + i * stride1 + h * stride2;
for (int w = 0; w < W; w++) {
int index = tmp_index + w;
out_ptr[index] =
input_x_ptr[index] * new_scale_ptr[i] + new_bias_ptr[i];
}
}
}
}
}
delete [] inv_std_ptr;
// DLOG << "input[2,5,1,0](input[102]) ,channel 5 :";
// DLOG << "input_x_ptr : " << input_x_ptr[102];
// DLOG << "variance : " << variance_ptr[5];
// DLOG << "inv_std_ptr : " << inv_std_ptr[5];
// DLOG << "new_scale_ptr : " << new_scale_ptr[5];
// DLOG << "new_bias_ptr : " << new_bias_ptr[5];
// DLOG << "out_ptr : " << out_ptr[102];
}
DLOG << "input[2,5,1,0](input[102]) ,channel 5 :";
DLOG << "input_x_ptr : " << input_x_ptr[102];
DLOG << "variance : " << variance_ptr[5];
DLOG << "inv_std_ptr : " << inv_std_ptr[5];
DLOG << "new_scale_ptr : " << new_scale_ptr[5];
DLOG << "new_bias_ptr : " << new_bias_ptr[5];
DLOG << "out_ptr : " << out_ptr[102];
}
} // namespace operators
} // namespace paddle_mobile
......
......@@ -38,70 +38,70 @@ void ReluKernel<CPU, float>::Compute(const ReluParam &param) const {
auto *out_ptr = out->mutable_data<float>();
int numel = input_x->numel();
if (numel > 32) {
asm volatile(
"pld [%[input_x_ptr], #0] \n\t"
"vmov.f32 q8, #0.0 \n\t"
"subs %[num], %[num], #32 \n\t"
"blt end_num_%= \n\t"
"loop_num_%=: \n\t"
"pld [%[input_x_ptr], #1024] \n\t"
"vld1.32 {q0, q1}, [%[input_x_ptr]]! \n\t"
"vld1.32 {q2, q3}, [%[input_x_ptr]]! \n\t"
"vld1.32 {q4, q5}, [%[input_x_ptr]]! \n\t"
"vld1.32 {q6, q7}, [%[input_x_ptr]]! \n\t"
"vmax.f32 q0, q0, q8 \n\t"
"vmax.f32 q1, q1, q8 \n\t"
"vmax.f32 q2, q2, q8 \n\t"
"vmax.f32 q3, q3, q8 \n\t"
"vmax.f32 q4, q4, q8 \n\t"
"vmax.f32 q5, q5, q8 \n\t"
"vmax.f32 q6, q6, q8 \n\t"
"vmax.f32 q7, q7, q8 \n\t"
"vst1.32 {q0, q1}, [%[out_ptr]]! \n\t"
"vst1.32 {q2, q3}, [%[out_ptr]]! \n\t"
"vst1.32 {q4, q5}, [%[out_ptr]]! \n\t"
"vst1.32 {q6, q7}, [%[out_ptr]]! \n\t"
"subs %[num], %[num], #32 \n\t"
"bge loop_num_%= \n\t"
"end_num_%=: \n\t"
"cmp %[num], #0 \n\t"
"bge end_%= \n\t"
"mov r6, #4 \n\t"
"mul r5, %[num], r6 \n\t"
"add %[input_x_ptr], %[input_x_ptr], r5 \n\t"
"vld1.32 {q0, q1}, [%[input_x_ptr]]! \n\t"
"vld1.32 {q2, q3}, [%[input_x_ptr]]! \n\t"
"vld1.32 {q4, q5}, [%[input_x_ptr]]! \n\t"
"vld1.32 {q6, q7}, [%[input_x_ptr]]! \n\t"
"vmax.f32 q0, q0, q8 \n\t"
"vmax.f32 q1, q1, q8 \n\t"
"vmax.f32 q2, q2, q8 \n\t"
"vmax.f32 q3, q3, q8 \n\t"
"vmax.f32 q4, q4, q8 \n\t"
"vmax.f32 q5, q5, q8 \n\t"
"vmax.f32 q6, q6, q8 \n\t"
"vmax.f32 q7, q7, q8 \n\t"
"add %[out_ptr], %[out_ptr], r5 \n\t"
"vst1.32 {q0, q1}, [%[out_ptr]]! \n\t"
"vst1.32 {q2, q3}, [%[out_ptr]]! \n\t"
"vst1.32 {q4, q5}, [%[out_ptr]]! \n\t"
"vst1.32 {q6, q7}, [%[out_ptr]]! \n\t"
"end_%=: \n\t"
:
:
[out_ptr] "r"(out_ptr), [input_x_ptr] "r"(input_x_ptr), [num] "r"(numel)
: "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "r5",
"r6");
} else {
// if (numel > 64) {
// asm volatile(
// "pld [%[input_x_ptr], #0] \n\t"
// "vmov.f32 q8, #0.0 \n\t"
// "subs %[num], %[num], #32 \n\t"
// "blt end_num_%= \n\t"
// "loop_num_%=: \n\t"
// "pld [%[input_x_ptr], #1024] \n\t"
//
// "vld1.32 {q0, q1}, [%[input_x_ptr]]! \n\t"
// "vld1.32 {q2, q3}, [%[input_x_ptr]]! \n\t"
// "vld1.32 {q4, q5}, [%[input_x_ptr]]! \n\t"
// "vld1.32 {q6, q7}, [%[input_x_ptr]]! \n\t"
//
// "vmax.f32 q0, q0, q8 \n\t"
// "vmax.f32 q1, q1, q8 \n\t"
// "vmax.f32 q2, q2, q8 \n\t"
// "vmax.f32 q3, q3, q8 \n\t"
// "vmax.f32 q4, q4, q8 \n\t"
// "vmax.f32 q5, q5, q8 \n\t"
// "vmax.f32 q6, q6, q8 \n\t"
// "vmax.f32 q7, q7, q8 \n\t"
//
// "vst1.32 {q0, q1}, [%[out_ptr]]! \n\t"
// "vst1.32 {q2, q3}, [%[out_ptr]]! \n\t"
// "vst1.32 {q4, q5}, [%[out_ptr]]! \n\t"
// "vst1.32 {q6, q7}, [%[out_ptr]]! \n\t"
//
// "subs %[num], %[num], #32 \n\t"
// "bge loop_num_%= \n\t"
// "end_num_%=: \n\t"
// "cmp %[num], #0 \n\t"
// "bge end_%= \n\t"
// "mov r6, #4 \n\t"
// "mul r5, %[num], r6 \n\t"
// "add %[input_x_ptr], %[input_x_ptr], r5 \n\t"
// "vld1.32 {q0, q1}, [%[input_x_ptr]]! \n\t"
// "vld1.32 {q2, q3}, [%[input_x_ptr]]! \n\t"
// "vld1.32 {q4, q5}, [%[input_x_ptr]]! \n\t"
// "vld1.32 {q6, q7}, [%[input_x_ptr]]! \n\t"
// "vmax.f32 q0, q0, q8 \n\t"
// "vmax.f32 q1, q1, q8 \n\t"
// "vmax.f32 q2, q2, q8 \n\t"
// "vmax.f32 q3, q3, q8 \n\t"
// "vmax.f32 q4, q4, q8 \n\t"
// "vmax.f32 q5, q5, q8 \n\t"
// "vmax.f32 q6, q6, q8 \n\t"
// "vmax.f32 q7, q7, q8 \n\t"
// "add %[out_ptr], %[out_ptr], r5 \n\t"
// "vst1.32 {q0, q1}, [%[out_ptr]]! \n\t"
// "vst1.32 {q2, q3}, [%[out_ptr]]! \n\t"
// "vst1.32 {q4, q5}, [%[out_ptr]]! \n\t"
// "vst1.32 {q6, q7}, [%[out_ptr]]! \n\t"
// "end_%=: \n\t"
// :
// :
// [out_ptr] "r"(out_ptr), [input_x_ptr] "r"(input_x_ptr), [num] "r"(numel)
// : "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "r5",
// "r6");
// } else {
ReluFunctor<float> func_;
math::Transform trans;
trans(input_x_ptr, input_x_ptr + numel, out_ptr, func_);
}
// }
}
} // namespace operators
} // namespace paddle_mobile
......
......@@ -137,4 +137,6 @@ else ()
ADD_EXECUTABLE(test-depthwise-conv-op operators/test_depthwise_conv_op.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-depthwise-conv-op paddle-mobile)
#add_library(test-lib-size SHARED common/test_lib_size.h common/test_lib_size.cpp)
endif()
......@@ -20,6 +20,8 @@ limitations under the License. */
#define PADDLE_MOBILE_TEST_LIB_SIZE_H
#include <vector>
#include <pthread.h>
#include <thread>
//#include <list>
//#include <tuple>
//#include <typeinfo>
......@@ -33,7 +35,7 @@ limitations under the License. */
//#include <iostream>
//#include <sstream>
#include <memory>
//#include <memory>
//#include <stdio.h>
//#include <cstring>
......@@ -44,8 +46,10 @@ void foo() {
// std::cout << "12345" << std::endl;
std::vector<int> vec = {1, 2, 3, 4, 5};
vec.push_back(2);
// std::find(vec.begin(), vec.end(), 1);
pthread_mutex_init(NULL, NULL);
pthread_attr_destroy(NULL);
// std::find(vec.begin(), vec.end(), 1);
// std::list<int> l;
......@@ -70,7 +74,7 @@ void foo() {
// int z = 10;
// }
std::shared_ptr<int> s1 = std::make_shared<int>();
// std::shared_ptr<int> s1 = std::make_shared<int>();
// std::stringstream ss;
// ss << "12345";
......
......@@ -19,11 +19,9 @@ int main() {
paddle_mobile::Loader<paddle_mobile::CPU> loader;
// ../../../test/models/googlenet
// ../../../test/models/mobilenet
auto program = loader.Load(g_googlenet, true, true);
auto program = loader.Load(g_mobilenet_ssd, false, false);
// loader.Load(g_googlenet_combine + "/model", g_googlenet_combine +
// "/params",
// true);
// "/params", true);
program.originProgram->Description("program desc: ");
return 0;
}
......@@ -18,7 +18,7 @@ limitations under the License. */
int main() {
paddle_mobile::Loader<paddle_mobile::CPU> loader;
bool optimize = false;
bool optimize = true;
auto time1 = time();
auto program = loader.Load(g_googlenet, optimize);
// auto program = loader.Load(g_googlenet_combine + "/model",
......
......@@ -41,7 +41,7 @@ class TestBatchNormOp {
for (int j = 0; j < ops.size(); ++j) {
std::shared_ptr<OpDesc> op = ops[j];
if (op->Type() == "batch_norm" &&
op->Input("X")[0] == "conv2d_0.tmp_0") {
op->Input("X")[0] == "conv2d_5.tmp_0") {
DLOG << " mul attr size: " << op->GetAttrMap().size();
DLOG << " inputs size: " << op->GetInputs().size();
DLOG << " outputs size: " << op->GetOutputs().size();
......@@ -67,29 +67,29 @@ class TestBatchNormOp {
const Tensor &t5) {
// feed
auto scope = program_.scope;
Variable *x1_feed_value = scope->Var("conv2d_0.tmp_0");
Variable *x1_feed_value = scope->Var("conv2d_5.tmp_0");
auto tensor_x1 = x1_feed_value->GetMutable<LoDTensor>();
tensor_x1->ShareDataWith(t1);
Variable *mean_feed_value = scope->Var("batch_norm_0.w_1");
Variable *mean_feed_value = scope->Var("batch_norm_10.w_1");
auto tensor_mean = mean_feed_value->GetMutable<LoDTensor>();
tensor_mean->ShareDataWith(t2);
Variable *scale_feed_value = scope->Var("batch_norm_0.w_0");
Variable *scale_feed_value = scope->Var("batch_norm_10.w_0");
auto tensor_scale = scale_feed_value->GetMutable<LoDTensor>();
tensor_scale->ShareDataWith(t3);
Variable *variance_feed_value = scope->Var("batch_norm_0.w_2");
Variable *variance_feed_value = scope->Var("batch_norm_10.w_2");
auto tensor_variance = variance_feed_value->GetMutable<LoDTensor>();
tensor_variance->ShareDataWith(t4);
Variable *bias_feed_value = scope->Var("batch_norm_0.b_0");
Variable *bias_feed_value = scope->Var("batch_norm_10.b_0");
auto tensor_bias = bias_feed_value->GetMutable<LoDTensor>();
tensor_bias->ShareDataWith(t5);
Variable *output = scope->Var("batch_norm_0.tmp_2");
Variable *output = scope->Var("batch_norm_10.tmp_2");
auto *output_tensor = output->GetMutable<LoDTensor>();
output_tensor->mutable_data<float>({4, 10, 2, 2});
output_tensor->mutable_data<float>({1, 256, 38, 38});
// DLOG << typeid(output_tensor).name();
// DLOG << "output_tensor dims: " << output_tensor->dims();
......@@ -128,30 +128,30 @@ int main() {
DLOG << "----------**********----------";
DLOG << "begin to run BatchNormOp Test";
paddle_mobile::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(std::string(g_resnet));
auto program = loader.Load(std::string(g_mobilenet_ssd));
/// input x (4,10,2,2)
paddle_mobile::framework::Tensor inputx1;
SetupTensor<float>(&inputx1, {4, 10, 2, 2}, static_cast<float>(0),
SetupTensor<float>(&inputx1, {1, 256, 38, 38}, static_cast<float>(0),
static_cast<float>(1));
auto *inputx1_ptr = inputx1.data<float>();
paddle_mobile::framework::Tensor mean;
SetupTensor<float>(&mean, {10}, static_cast<float>(0), static_cast<float>(1));
SetupTensor<float>(&mean, {256}, static_cast<float>(0), static_cast<float>(1));
auto *mean_ptr = mean.data<float>();
paddle_mobile::framework::Tensor scale;
SetupTensor<float>(&scale, {10}, static_cast<float>(0),
SetupTensor<float>(&scale, {256}, static_cast<float>(0),
static_cast<float>(1));
auto *scale_ptr = scale.data<float>();
paddle_mobile::framework::Tensor variance;
SetupTensor<float>(&variance, {10}, static_cast<float>(0),
SetupTensor<float>(&variance, {256}, static_cast<float>(0),
static_cast<float>(1));
auto *variance_ptr = variance.data<float>();
paddle_mobile::framework::Tensor bias;
SetupTensor<float>(&bias, {10}, static_cast<float>(0), static_cast<float>(1));
SetupTensor<float>(&bias, {256}, static_cast<float>(0), static_cast<float>(1));
auto *bias_ptr = bias.data<float>();
paddle_mobile::framework::TestBatchNormOp<paddle_mobile::CPU> testBatchNormOp(
......@@ -161,11 +161,13 @@ int main() {
testBatchNormOp.predict_bn(inputx1, mean, scale, variance, bias);
auto *output_bn_ptr = output_bn->data<float>();
/// [2, 5, 1, 0]
DLOG << " (" << inputx1_ptr[102] << " - " << mean_ptr[5] << ")/(("
<< variance_ptr[5] << " + 0.00001"
<< ")^0.5)* " << scale_ptr[5] << " + " << bias_ptr[5] << " = ";
DLOG << output_bn_ptr[102];
DLOG << " (" << inputx1_ptr[0] << " - " << mean_ptr[0] << ")/(("
<< variance_ptr[0] << " + 0.00001"
<< ")^0.5)* " << scale_ptr[0] << " + " << bias_ptr[0] << " = ";
DLOG << output_bn_ptr[0];
DLOG << "input_ptr 0 : " << inputx1_ptr[0];
DLOG << "output_ptr 0 : " << output_bn_ptr[0];
return 0;
}
#!/usr/bin/env sh
push_fn () {
MODELS_PATH="../test/models/*"
MODELS_SRC="../test/models"
IMAGE_PATH="../test/images/*"
EXE_FILE="../test/build/*"
EXE_DIR="data/local/tmp/bin"
adb shell mkdir ${EXE_DIR}
MODELS_DIR="data/local/tmp/models"
adb shell mkdir ${MODELS_DIR}
for file in `ls ${MODELS_SRC}`
do
adb shell mkdir ${MODELS_DIR}"/"${file}
done
IMAGES_DIR="data/local/tmp/images"
adb shell mkdir ${IMAGES_DIR}
LIB_PATH="../build/release/arm-v7a/build/*"
adb push ${EXE_FILE} ${EXE_DIR}
adb push ${LIB_PATH} ${EXE_DIR}
if [[ $1 != "npm" ]]; then
adb push ${IMAGE_PATH} ${IMAGES_DIR}
adb push ${MODELS_PATH} ${MODELS_DIR}
fi
}
if [[ $1 == "npm" ]]; then
push_fn $1
else
push_fn
fi
......@@ -24,8 +24,15 @@ adb shell mkdir ${IMAGES_DIR}
LIB_PATH="../build/release/arm-v7a/build/*"
adb push ${EXE_FILE} ${EXE_DIR}
adb push ${LIB_PATH} ${EXE_DIR}
if [[ $1 != "npm" ]]; then
adb push ${IMAGE_PATH} ${IMAGES_DIR}
adb push ${MODELS_PATH} ${MODELS_DIR}
fi
adb shell "cd /data/local/tmp/bin; LD_LIBRARY_PATH=. ./${TESTUNIT}"
}
if [[ $1 == "npm" ]]; then
push_fn $1
else
push_fn
fi
\ No newline at end of file
......@@ -19,12 +19,19 @@ adb shell mkdir ${IMAGES_DIR}
LIB_PATH="../../build/release/arm-v7a/build/*"
adb push ${EXE_FILE} ${EXE_DIR}
adb push ${LIB_PATH} ${EXE_DIR}
if [[ $1 != "npm" ]]; then
adb push ${IMAGE_PATH} ${IMAGES_DIR}
adb push ${MODELS_PATH} ${MODELS_DIR}
fi
echo "test-op or test-net below : "
adb shell ls /data/local/tmp/bin
echo "**** choose OP or NET to test ****"
read -p "which to test : " test_name
adb shell "cd /data/local/tmp/bin; LD_LIBRARY_PATH=. ./${test_name}"
}
if [[ $1 == "npm" ]]; then
push_fn $1
else
push_fn
fi
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册