diff --git a/src/operators/kernel/arm/batchnorm_kernel.cpp b/src/operators/kernel/arm/batchnorm_kernel.cpp index 4327b7f3163f013f270ca4428227075e4883f96c..30d922a777b67a55a7d0dfa98a55144bcb569d49 100644 --- a/src/operators/kernel/arm/batchnorm_kernel.cpp +++ b/src/operators/kernel/arm/batchnorm_kernel.cpp @@ -23,7 +23,6 @@ namespace operators { template <> void BatchNormKernel::Compute(const BatchNormParam ¶m) const { - /// todo: test. const Tensor *input_x = param.InputX(); auto input_x_ptr = input_x->data(); const auto &x_dims = input_x->dims(); @@ -46,50 +45,192 @@ void BatchNormKernel::Compute(const BatchNormParam ¶m) const { auto scale_ptr = scale->data(); auto bias_ptr = bias->data(); - Tensor inv_std; - auto inv_std_ptr = inv_std.mutable_data(make_ddim({C})); - if (C != variance->numel()) { - DLOG << "C must equal to variance.numel()"; - } - assert(C == variance->numel()); + // Tensor inv_std; + // auto inv_std_ptr = inv_std.mutable_data(make_ddim({C})); - /// std = (var + epsilon).sqrt(); - /// inv_std = 1 / std; - for (int i = 0; i < C; i++) { - inv_std_ptr[i] = - 1 / static_cast(pow((variance_ptr[i] + epsilon), 0.5)); - } + PADDLE_MOBILE_ENFORCE(C == variance->numel(), + "C must equal to variance.numel()"); + + int HXW = H * W; + if (HXW > 32) { + int NXC = N * C; + float *inv_std_ptr = new float[NXC * 4]; + float *volatile new_scale_ptr = new float[NXC * 4]; + float *volatile new_bias_ptr = new float[NXC * 4]; + + /// std = (var + epsilon).sqrt(); + /// inv_std = 1 / std; + for (int i = 0; i < C * 4; i += 4) { + inv_std_ptr[i] = + 1 / static_cast(pow((variance_ptr[i / 4] + epsilon), 0.5)); + inv_std_ptr[i + 1] = inv_std_ptr[i]; + inv_std_ptr[i + 2] = inv_std_ptr[i]; + inv_std_ptr[i + 3] = inv_std_ptr[i]; + + new_scale_ptr[i] = inv_std_ptr[i] * scale_ptr[i / 4]; + new_scale_ptr[i + 1] = new_scale_ptr[i]; + new_scale_ptr[i + 2] = new_scale_ptr[i]; + new_scale_ptr[i + 3] = new_scale_ptr[i]; + + new_bias_ptr[i] = + bias_ptr[i / 4] - mean_ptr[i / 4] * inv_std_ptr[i] * scale_ptr[i / 4]; + + new_bias_ptr[i + 1] = new_bias_ptr[i]; + new_bias_ptr[i + 2] = new_bias_ptr[i]; + new_bias_ptr[i + 3] = new_bias_ptr[i]; + } + + for (int j = C * 4; j < NXC * 4; ++j) { + new_scale_ptr[j] = new_scale_ptr[j - C * 4]; + new_bias_ptr[j] = new_bias_ptr[j - C * 4]; + } + + asm volatile( + "subs %[N], %[N], #1 \n\t" + "blt end_n_%= \n\t" + "loop_n_%=: \n\t" + + "subs %[C], %[C], #1 \n\t" + "blt end_c_%= \n\t" + "loop_c_%=: \n\t" + + "vld1.32 {q9}, [%[new_scale_ptr]]! \n\t" + "vld1.32 {q10}, [%[new_bias_ptr]]! \n\t" + + "mov r6, %[HXW] \n\t" + + "subs r6, r6, #32 \n\t" + "blt end_hw_%= \n\t" + "loop_hw_%=: \n\t" + + "vld1.32 {q1, q2}, [%[input_x_ptr]]! \n\t" + "vld1.32 {q3, q4}, [%[input_x_ptr]]! \n\t" + "vld1.32 {q5, q6}, [%[input_x_ptr]]! \n\t" + "vld1.32 {q7, q8}, [%[input_x_ptr]]! \n\t" + + "vmul.f32 q1, q1, q9 \n\t" + "vmul.f32 q2, q2, q9 \n\t" + "vmul.f32 q3, q3, q9 \n\t" + "vmul.f32 q4, q4, q9 \n\t" - Tensor new_scale; - auto new_scale_ptr = new_scale.mutable_data(make_ddim({C})); - Tensor new_bias; - auto new_bias_ptr = new_bias.mutable_data(make_ddim({C})); - - /// ((x - est_mean) * (inv_var) * scale + bias equal to - /// (x * inv_var * scale) + (bias - est_mean * inv_var * scale) - for (int i = 0; i < C; i++) { - new_scale_ptr[i] = inv_std_ptr[i] * scale_ptr[i]; - new_bias_ptr[i] = bias_ptr[i] - mean_ptr[i] * inv_std_ptr[i] * scale_ptr[i]; - { - for (int n = 0; n < N; n++) { - for (int h = 0; h < H; h++) { - int tmp_index = n * stride0 + i * stride1 + h * stride2; - for (int w = 0; w < W; w++) { - int index = tmp_index + w; - out_ptr[index] = - input_x_ptr[index] * new_scale_ptr[i] + new_bias_ptr[i]; + "vmul.f32 q5, q5, q9 \n\t" + "vmul.f32 q6, q6, q9 \n\t" + "vmul.f32 q7, q7, q9 \n\t" + "vmul.f32 q8, q8, q9 \n\t" + + "vadd.f32 q1, q1, q10 \n\t" + "vadd.f32 q2, q2, q10 \n\t" + "vadd.f32 q3, q3, q10 \n\t" + "vadd.f32 q4, q4, q10 \n\t" + "vadd.f32 q5, q5, q10 \n\t" + "vadd.f32 q6, q6, q10 \n\t" + "vadd.f32 q7, q7, q10 \n\t" + "vadd.f32 q8, q8, q10 \n\t" + + "vst1.32 {q1, q2}, [%[out_ptr]]! \n\t" + "vst1.32 {q3, q4}, [%[out_ptr]]! \n\t" + "vst1.32 {q5, q6}, [%[out_ptr]]! \n\t" + "vst1.32 {q7, q8}, [%[out_ptr]]! \n\t" + + "subs r6, r6, #32 \n\t" + "bge loop_hw_%= \n\t" + "end_hw_%=: \n\t" + + "cmp r6, #0 \n\t" + "bge end_remainder_%= \n\t" + "mov r5, #4 \n\t" + "mul r6, r6, r5 \n\t" + "add %[input_x_ptr], %[input_x_ptr], r6 \n\t" + + "vld1.32 {q1, q2}, [%[input_x_ptr]]! \n\t" + "vld1.32 {q3, q4}, [%[input_x_ptr]]! \n\t" + "vld1.32 {q5, q6}, [%[input_x_ptr]]! \n\t" + "vld1.32 {q7, q8}, [%[input_x_ptr]]! \n\t" + + "vmul.f32 q1, q1, q9 \n\t" + "vmul.f32 q2, q2, q9 \n\t" + "vmul.f32 q3, q3, q9 \n\t" + "vmul.f32 q4, q4, q9 \n\t" + "vmul.f32 q5, q5, q9 \n\t" + "vmul.f32 q6, q6, q9 \n\t" + "vmul.f32 q7, q7, q9 \n\t" + "vmul.f32 q8, q8, q9 \n\t" + "vadd.f32 q1, q1, q10 \n\t" + "vadd.f32 q2, q2, q10 \n\t" + "vadd.f32 q3, q3, q10 \n\t" + "vadd.f32 q4, q4, q10 \n\t" + "vadd.f32 q5, q5, q10 \n\t" + "vadd.f32 q6, q6, q10 \n\t" + "vadd.f32 q7, q7, q10 \n\t" + "vadd.f32 q8, q8, q10 \n\t" + + "add %[out_ptr], %[out_ptr], r6 \n\t" + "vst1.32 {q1, q2}, [%[out_ptr]]! \n\t" + "vst1.32 {q3, q4}, [%[out_ptr]]! \n\t" + "vst1.32 {q5, q6}, [%[out_ptr]]! \n\t" + "vst1.32 {q7, q8}, [%[out_ptr]]! \n\t" + + "end_remainder_%=: \n\t" + + "subs %[C], %[C], #1 \n\t" + "bge loop_c_%= \n\t" + "end_c_%=: \n\t" + + "subs %[N], %[N], #1 \n\t" + "bge loop_n_%= \n\t" + "end_n_%=: \n\t" + : + : [input_x_ptr] "r"(input_x_ptr), [out_ptr] "r"(out_ptr), + [new_scale_ptr] "r"(new_scale_ptr), [new_bias_ptr] "r"(new_bias_ptr), + [N] "r"(N), [C] "r"(C), [HXW] "r"(HXW) + : "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", + "q10", "r5", "r6"); + + delete[] inv_std_ptr; + delete[] new_scale_ptr; + delete[] new_bias_ptr; + + } else { + float *inv_std_ptr = new float[C]; + for (int i = 0; i < C; i++) { + inv_std_ptr[i] = + 1 / static_cast(pow((variance_ptr[i] + epsilon), 0.5)); + } + + Tensor new_scale; + auto new_scale_ptr = new_scale.mutable_data(make_ddim({C})); + Tensor new_bias; + auto new_bias_ptr = new_bias.mutable_data(make_ddim({C})); + + /// ((x - est_mean) * (inv_var) * scale + bias equal to + /// (x * inv_var * scale) + (bias - est_mean * inv_var * scale) + for (int i = 0; i < C; i++) { + new_scale_ptr[i] = inv_std_ptr[i] * scale_ptr[i]; + new_bias_ptr[i] = + bias_ptr[i] - mean_ptr[i] * inv_std_ptr[i] * scale_ptr[i]; + { + for (int n = 0; n < N; n++) { + for (int h = 0; h < H; h++) { + int tmp_index = n * stride0 + i * stride1 + h * stride2; + for (int w = 0; w < W; w++) { + int index = tmp_index + w; + out_ptr[index] = + input_x_ptr[index] * new_scale_ptr[i] + new_bias_ptr[i]; + } } } } } + + delete[] inv_std_ptr; + // DLOG << "input[2,5,1,0](input[102]) ,channel 5 :"; + // DLOG << "input_x_ptr : " << input_x_ptr[102]; + // DLOG << "variance : " << variance_ptr[5]; + // DLOG << "inv_std_ptr : " << inv_std_ptr[5]; + // DLOG << "new_scale_ptr : " << new_scale_ptr[5]; + // DLOG << "new_bias_ptr : " << new_bias_ptr[5]; + // DLOG << "out_ptr : " << out_ptr[102]; } - DLOG << "input[2,5,1,0](input[102]) ,channel 5 :"; - DLOG << "input_x_ptr : " << input_x_ptr[102]; - DLOG << "variance : " << variance_ptr[5]; - DLOG << "inv_std_ptr : " << inv_std_ptr[5]; - DLOG << "new_scale_ptr : " << new_scale_ptr[5]; - DLOG << "new_bias_ptr : " << new_bias_ptr[5]; - DLOG << "out_ptr : " << out_ptr[102]; } } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/kernel/arm/relu_kernel.cpp b/src/operators/kernel/arm/relu_kernel.cpp index e7e0941a4d0bf48d86525cc52ee33301cdcbf67e..86bf53e5a1e5ecc285c9e9f20cb412d290d535d1 100644 --- a/src/operators/kernel/arm/relu_kernel.cpp +++ b/src/operators/kernel/arm/relu_kernel.cpp @@ -38,70 +38,71 @@ void ReluKernel::Compute(const ReluParam ¶m) const { auto *out_ptr = out->mutable_data(); int numel = input_x->numel(); - if (numel > 32) { - asm volatile( - "pld [%[input_x_ptr], #0] \n\t" - "vmov.f32 q8, #0.0 \n\t" - "subs %[num], %[num], #32 \n\t" - "blt end_num_%= \n\t" - "loop_num_%=: \n\t" - "pld [%[input_x_ptr], #1024] \n\t" - - "vld1.32 {q0, q1}, [%[input_x_ptr]]! \n\t" - "vld1.32 {q2, q3}, [%[input_x_ptr]]! \n\t" - "vld1.32 {q4, q5}, [%[input_x_ptr]]! \n\t" - "vld1.32 {q6, q7}, [%[input_x_ptr]]! \n\t" - - "vmax.f32 q0, q0, q8 \n\t" - "vmax.f32 q1, q1, q8 \n\t" - "vmax.f32 q2, q2, q8 \n\t" - "vmax.f32 q3, q3, q8 \n\t" - "vmax.f32 q4, q4, q8 \n\t" - "vmax.f32 q5, q5, q8 \n\t" - "vmax.f32 q6, q6, q8 \n\t" - "vmax.f32 q7, q7, q8 \n\t" - - "vst1.32 {q0, q1}, [%[out_ptr]]! \n\t" - "vst1.32 {q2, q3}, [%[out_ptr]]! \n\t" - "vst1.32 {q4, q5}, [%[out_ptr]]! \n\t" - "vst1.32 {q6, q7}, [%[out_ptr]]! \n\t" - - "subs %[num], %[num], #32 \n\t" - "bge loop_num_%= \n\t" - "end_num_%=: \n\t" - "cmp %[num], #0 \n\t" - "bge end_%= \n\t" - "mov r6, #4 \n\t" - "mul r5, %[num], r6 \n\t" - "add %[input_x_ptr], %[input_x_ptr], r5 \n\t" - "vld1.32 {q0, q1}, [%[input_x_ptr]]! \n\t" - "vld1.32 {q2, q3}, [%[input_x_ptr]]! \n\t" - "vld1.32 {q4, q5}, [%[input_x_ptr]]! \n\t" - "vld1.32 {q6, q7}, [%[input_x_ptr]]! \n\t" - "vmax.f32 q0, q0, q8 \n\t" - "vmax.f32 q1, q1, q8 \n\t" - "vmax.f32 q2, q2, q8 \n\t" - "vmax.f32 q3, q3, q8 \n\t" - "vmax.f32 q4, q4, q8 \n\t" - "vmax.f32 q5, q5, q8 \n\t" - "vmax.f32 q6, q6, q8 \n\t" - "vmax.f32 q7, q7, q8 \n\t" - "add %[out_ptr], %[out_ptr], r5 \n\t" - "vst1.32 {q0, q1}, [%[out_ptr]]! \n\t" - "vst1.32 {q2, q3}, [%[out_ptr]]! \n\t" - "vst1.32 {q4, q5}, [%[out_ptr]]! \n\t" - "vst1.32 {q6, q7}, [%[out_ptr]]! \n\t" - "end_%=: \n\t" - : - : - [out_ptr] "r"(out_ptr), [input_x_ptr] "r"(input_x_ptr), [num] "r"(numel) - : "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "r5", - "r6"); - } else { - ReluFunctor func_; - math::Transform trans; - trans(input_x_ptr, input_x_ptr + numel, out_ptr, func_); - } + // if (numel > 64) { + // asm volatile( + // "pld [%[input_x_ptr], #0] \n\t" + // "vmov.f32 q8, #0.0 \n\t" + // "subs %[num], %[num], #32 \n\t" + // "blt end_num_%= \n\t" + // "loop_num_%=: \n\t" + // "pld [%[input_x_ptr], #1024] \n\t" + // + // "vld1.32 {q0, q1}, [%[input_x_ptr]]! \n\t" + // "vld1.32 {q2, q3}, [%[input_x_ptr]]! \n\t" + // "vld1.32 {q4, q5}, [%[input_x_ptr]]! \n\t" + // "vld1.32 {q6, q7}, [%[input_x_ptr]]! \n\t" + // + // "vmax.f32 q0, q0, q8 \n\t" + // "vmax.f32 q1, q1, q8 \n\t" + // "vmax.f32 q2, q2, q8 \n\t" + // "vmax.f32 q3, q3, q8 \n\t" + // "vmax.f32 q4, q4, q8 \n\t" + // "vmax.f32 q5, q5, q8 \n\t" + // "vmax.f32 q6, q6, q8 \n\t" + // "vmax.f32 q7, q7, q8 \n\t" + // + // "vst1.32 {q0, q1}, [%[out_ptr]]! \n\t" + // "vst1.32 {q2, q3}, [%[out_ptr]]! \n\t" + // "vst1.32 {q4, q5}, [%[out_ptr]]! \n\t" + // "vst1.32 {q6, q7}, [%[out_ptr]]! \n\t" + // + // "subs %[num], %[num], #32 \n\t" + // "bge loop_num_%= \n\t" + // "end_num_%=: \n\t" + // "cmp %[num], #0 \n\t" + // "bge end_%= \n\t" + // "mov r6, #4 \n\t" + // "mul r5, %[num], r6 \n\t" + // "add %[input_x_ptr], %[input_x_ptr], r5 \n\t" + // "vld1.32 {q0, q1}, [%[input_x_ptr]]! \n\t" + // "vld1.32 {q2, q3}, [%[input_x_ptr]]! \n\t" + // "vld1.32 {q4, q5}, [%[input_x_ptr]]! \n\t" + // "vld1.32 {q6, q7}, [%[input_x_ptr]]! \n\t" + // "vmax.f32 q0, q0, q8 \n\t" + // "vmax.f32 q1, q1, q8 \n\t" + // "vmax.f32 q2, q2, q8 \n\t" + // "vmax.f32 q3, q3, q8 \n\t" + // "vmax.f32 q4, q4, q8 \n\t" + // "vmax.f32 q5, q5, q8 \n\t" + // "vmax.f32 q6, q6, q8 \n\t" + // "vmax.f32 q7, q7, q8 \n\t" + // "add %[out_ptr], %[out_ptr], r5 \n\t" + // "vst1.32 {q0, q1}, [%[out_ptr]]! \n\t" + // "vst1.32 {q2, q3}, [%[out_ptr]]! \n\t" + // "vst1.32 {q4, q5}, [%[out_ptr]]! \n\t" + // "vst1.32 {q6, q7}, [%[out_ptr]]! \n\t" + // "end_%=: \n\t" + // : + // : + // [out_ptr] "r"(out_ptr), [input_x_ptr] "r"(input_x_ptr), [num] + // "r"(numel) : "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", + // "q7", "q8", "r5", + // "r6"); + // } else { + ReluFunctor func_; + math::Transform trans; + trans(input_x_ptr, input_x_ptr + numel, out_ptr, func_); + // } } } // namespace operators } // namespace paddle_mobile diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 124e1c2d60d81fe0d6f19ffa1936f3d1d8e7eb16..0cf3537ad64fb0dfb23d0514cf4d068b9c9c6199 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -137,4 +137,6 @@ else () ADD_EXECUTABLE(test-depthwise-conv-op operators/test_depthwise_conv_op.cpp test_helper.h test_include.h executor_for_test.h) target_link_libraries(test-depthwise-conv-op paddle-mobile) + #add_library(test-lib-size SHARED common/test_lib_size.h common/test_lib_size.cpp) + endif() diff --git a/test/common/test_lib_size.h b/test/common/test_lib_size.h index ab5cd359b73f1c5c5df4176ab444e48c85dbd95b..a00a5afe12f952a7bc47ab62ba1d07a7879cebec 100644 --- a/test/common/test_lib_size.h +++ b/test/common/test_lib_size.h @@ -19,6 +19,8 @@ limitations under the License. */ #ifndef PADDLE_MOBILE_TEST_LIB_SIZE_H #define PADDLE_MOBILE_TEST_LIB_SIZE_H +#include +#include #include //#include //#include @@ -33,7 +35,7 @@ limitations under the License. */ //#include //#include -#include +//#include //#include //#include @@ -44,8 +46,10 @@ void foo() { // std::cout << "12345" << std::endl; std::vector vec = {1, 2, 3, 4, 5}; + vec.push_back(2); - // std::find(vec.begin(), vec.end(), 1); + pthread_mutex_init(NULL, NULL); + pthread_attr_destroy(NULL); // std::find(vec.begin(), vec.end(), 1); // std::list l; @@ -70,7 +74,7 @@ void foo() { // int z = 10; // } - std::shared_ptr s1 = std::make_shared(); + // std::shared_ptr s1 = std::make_shared(); // std::stringstream ss; // ss << "12345"; diff --git a/test/framework/test_load.cpp b/test/framework/test_load.cpp index 32d314826f8d6bd4e504b16cd78464d660919a30..65d29345d51c68bc0a287ff03c8093e1cee3e0c5 100644 --- a/test/framework/test_load.cpp +++ b/test/framework/test_load.cpp @@ -19,11 +19,9 @@ int main() { paddle_mobile::Loader loader; // ../../../test/models/googlenet // ../../../test/models/mobilenet - auto program = loader.Load(g_googlenet, true, true); + auto program = loader.Load(g_mobilenet_ssd, false, false); // loader.Load(g_googlenet_combine + "/model", g_googlenet_combine + - // "/params", - // true); - + // "/params", true); program.originProgram->Description("program desc: "); return 0; } diff --git a/test/net/test_googlenet.cpp b/test/net/test_googlenet.cpp index d25a9eb7ce83876ca339adf8aff1a027b70ac611..6bae5136b60160bdf66faa6b00b0bd05607af0b6 100644 --- a/test/net/test_googlenet.cpp +++ b/test/net/test_googlenet.cpp @@ -18,7 +18,7 @@ limitations under the License. */ int main() { paddle_mobile::Loader loader; - bool optimize = false; + bool optimize = true; auto time1 = time(); auto program = loader.Load(g_googlenet, optimize); // auto program = loader.Load(g_googlenet_combine + "/model", diff --git a/test/operators/test_batchnorm_op.cpp b/test/operators/test_batchnorm_op.cpp index 38d9f624909fd645c78ae56a5d9efff9fa961795..4ccad8c1512036c2400a09575b3775e75b26acce 100644 --- a/test/operators/test_batchnorm_op.cpp +++ b/test/operators/test_batchnorm_op.cpp @@ -41,7 +41,7 @@ class TestBatchNormOp { for (int j = 0; j < ops.size(); ++j) { std::shared_ptr op = ops[j]; if (op->Type() == "batch_norm" && - op->Input("X")[0] == "conv2d_0.tmp_0") { + op->Input("X")[0] == "conv2d_5.tmp_0") { DLOG << " mul attr size: " << op->GetAttrMap().size(); DLOG << " inputs size: " << op->GetInputs().size(); DLOG << " outputs size: " << op->GetOutputs().size(); @@ -67,29 +67,29 @@ class TestBatchNormOp { const Tensor &t5) { // feed auto scope = program_.scope; - Variable *x1_feed_value = scope->Var("conv2d_0.tmp_0"); + Variable *x1_feed_value = scope->Var("conv2d_5.tmp_0"); auto tensor_x1 = x1_feed_value->GetMutable(); tensor_x1->ShareDataWith(t1); - Variable *mean_feed_value = scope->Var("batch_norm_0.w_1"); + Variable *mean_feed_value = scope->Var("batch_norm_10.w_1"); auto tensor_mean = mean_feed_value->GetMutable(); tensor_mean->ShareDataWith(t2); - Variable *scale_feed_value = scope->Var("batch_norm_0.w_0"); + Variable *scale_feed_value = scope->Var("batch_norm_10.w_0"); auto tensor_scale = scale_feed_value->GetMutable(); tensor_scale->ShareDataWith(t3); - Variable *variance_feed_value = scope->Var("batch_norm_0.w_2"); + Variable *variance_feed_value = scope->Var("batch_norm_10.w_2"); auto tensor_variance = variance_feed_value->GetMutable(); tensor_variance->ShareDataWith(t4); - Variable *bias_feed_value = scope->Var("batch_norm_0.b_0"); + Variable *bias_feed_value = scope->Var("batch_norm_10.b_0"); auto tensor_bias = bias_feed_value->GetMutable(); tensor_bias->ShareDataWith(t5); - Variable *output = scope->Var("batch_norm_0.tmp_2"); + Variable *output = scope->Var("batch_norm_10.tmp_2"); auto *output_tensor = output->GetMutable(); - output_tensor->mutable_data({4, 10, 2, 2}); + output_tensor->mutable_data({1, 256, 38, 38}); // DLOG << typeid(output_tensor).name(); // DLOG << "output_tensor dims: " << output_tensor->dims(); @@ -128,30 +128,32 @@ int main() { DLOG << "----------**********----------"; DLOG << "begin to run BatchNormOp Test"; paddle_mobile::Loader loader; - auto program = loader.Load(std::string(g_resnet)); + auto program = loader.Load(std::string(g_mobilenet_ssd)); /// input x (4,10,2,2) paddle_mobile::framework::Tensor inputx1; - SetupTensor(&inputx1, {4, 10, 2, 2}, static_cast(0), + SetupTensor(&inputx1, {1, 256, 38, 38}, static_cast(0), static_cast(1)); auto *inputx1_ptr = inputx1.data(); paddle_mobile::framework::Tensor mean; - SetupTensor(&mean, {10}, static_cast(0), static_cast(1)); + SetupTensor(&mean, {256}, static_cast(0), + static_cast(1)); auto *mean_ptr = mean.data(); paddle_mobile::framework::Tensor scale; - SetupTensor(&scale, {10}, static_cast(0), + SetupTensor(&scale, {256}, static_cast(0), static_cast(1)); auto *scale_ptr = scale.data(); paddle_mobile::framework::Tensor variance; - SetupTensor(&variance, {10}, static_cast(0), + SetupTensor(&variance, {256}, static_cast(0), static_cast(1)); auto *variance_ptr = variance.data(); paddle_mobile::framework::Tensor bias; - SetupTensor(&bias, {10}, static_cast(0), static_cast(1)); + SetupTensor(&bias, {256}, static_cast(0), + static_cast(1)); auto *bias_ptr = bias.data(); paddle_mobile::framework::TestBatchNormOp testBatchNormOp( @@ -161,11 +163,13 @@ int main() { testBatchNormOp.predict_bn(inputx1, mean, scale, variance, bias); auto *output_bn_ptr = output_bn->data(); - /// [2, 5, 1, 0] - DLOG << " (" << inputx1_ptr[102] << " - " << mean_ptr[5] << ")/((" - << variance_ptr[5] << " + 0.00001" - << ")^0.5)* " << scale_ptr[5] << " + " << bias_ptr[5] << " = "; - DLOG << output_bn_ptr[102]; + DLOG << " (" << inputx1_ptr[0] << " - " << mean_ptr[0] << ")/((" + << variance_ptr[0] << " + 0.00001" + << ")^0.5)* " << scale_ptr[0] << " + " << bias_ptr[0] << " = "; + DLOG << output_bn_ptr[0]; + + DLOG << "input_ptr 0 : " << inputx1_ptr[0]; + DLOG << "output_ptr 0 : " << output_bn_ptr[0]; return 0; } diff --git a/tools/push2android.sh b/tools/push2android.sh new file mode 100644 index 0000000000000000000000000000000000000000..d7d1ad9950d58f415804834b8ebc0740a3e796cb --- /dev/null +++ b/tools/push2android.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env sh + +push_fn () { +MODELS_PATH="../test/models/*" +MODELS_SRC="../test/models" +IMAGE_PATH="../test/images/*" +EXE_FILE="../test/build/*" +EXE_DIR="data/local/tmp/bin" +adb shell mkdir ${EXE_DIR} +MODELS_DIR="data/local/tmp/models" +adb shell mkdir ${MODELS_DIR} +for file in `ls ${MODELS_SRC}` +do + adb shell mkdir ${MODELS_DIR}"/"${file} +done + +IMAGES_DIR="data/local/tmp/images" +adb shell mkdir ${IMAGES_DIR} +LIB_PATH="../build/release/arm-v7a/build/*" +adb push ${EXE_FILE} ${EXE_DIR} +adb push ${LIB_PATH} ${EXE_DIR} +if [[ $1 != "npm" ]]; then +adb push ${IMAGE_PATH} ${IMAGES_DIR} +adb push ${MODELS_PATH} ${MODELS_DIR} +fi +} + +if [[ $1 == "npm" ]]; then +push_fn $1 +else +push_fn +fi diff --git a/tools/run.sh b/tools/run.sh index 4e00b810f6d11971c32686f9eef3237b904c9a3c..a52b8be96332bf8def075660a5e44e70327a970b 100644 --- a/tools/run.sh +++ b/tools/run.sh @@ -24,8 +24,15 @@ adb shell mkdir ${IMAGES_DIR} LIB_PATH="../build/release/arm-v7a/build/*" adb push ${EXE_FILE} ${EXE_DIR} adb push ${LIB_PATH} ${EXE_DIR} +if [[ $1 != "npm" ]]; then adb push ${IMAGE_PATH} ${IMAGES_DIR} adb push ${MODELS_PATH} ${MODELS_DIR} +fi adb shell "cd /data/local/tmp/bin; LD_LIBRARY_PATH=. ./${TESTUNIT}" } + +if [[ $1 == "npm" ]]; then +push_fn $1 +else push_fn +fi \ No newline at end of file diff --git a/tools/scripts/run_on_android.sh b/tools/scripts/run_on_android.sh index 52ca8b2748603fb4d8168136993b5732bf4547c3..e9f6388cd6e9a9b3aeaf72691a8724a898aa4e44 100644 --- a/tools/scripts/run_on_android.sh +++ b/tools/scripts/run_on_android.sh @@ -19,12 +19,19 @@ adb shell mkdir ${IMAGES_DIR} LIB_PATH="../../build/release/arm-v7a/build/*" adb push ${EXE_FILE} ${EXE_DIR} adb push ${LIB_PATH} ${EXE_DIR} +if [[ $1 != "npm" ]]; then adb push ${IMAGE_PATH} ${IMAGES_DIR} adb push ${MODELS_PATH} ${MODELS_DIR} +fi echo "test-op or test-net below : " adb shell ls /data/local/tmp/bin echo "**** choose OP or NET to test ****" read -p "which to test : " test_name adb shell "cd /data/local/tmp/bin; LD_LIBRARY_PATH=. ./${test_name}" } + +if [[ $1 == "npm" ]]; then +push_fn $1 +else push_fn +fi \ No newline at end of file