format files

dfa731e1 · liuruilong · 3b82bfb5 · dfa731e1 · dfa731e1 · dfa731e1
4 changed file
--- a/src/operators/kernel/arm/batchnorm_kernel.cpp
+++ b/src/operators/kernel/arm/batchnorm_kernel.cpp
@@ -45,34 +45,35 @@ void BatchNormKernel<CPU, float>::Compute(const BatchNormParam &param) const {
  auto scale_ptr = scale->data<float>();
  auto bias_ptr = bias->data<float>();

-
  //  Tensor inv_std;
  //  auto inv_std_ptr = inv_std.mutable_data<float>(make_ddim({C}));

-  PADDLE_MOBILE_ENFORCE(C == variance->numel(),  "C must equal to variance.numel()");
+  PADDLE_MOBILE_ENFORCE(C == variance->numel(),
+                        "C must equal to variance.numel()");

  int HXW = H * W;
  if (HXW > 32) {
    int NXC = N * C;
    float *inv_std_ptr = new float[NXC * 4];
-    float * volatile new_scale_ptr = new float[NXC *  4];
-    float * volatile new_bias_ptr = new float[NXC * 4];
+    float *volatile new_scale_ptr = new float[NXC * 4];
+    float *volatile new_bias_ptr = new float[NXC * 4];

    /// std = (var + epsilon).sqrt();
    /// inv_std = 1 / std;
    for (int i = 0; i < C * 4; i += 4) {
      inv_std_ptr[i] =
-              1 / static_cast<float>(pow((variance_ptr[i/4] + epsilon), 0.5));
+          1 / static_cast<float>(pow((variance_ptr[i / 4] + epsilon), 0.5));
      inv_std_ptr[i + 1] = inv_std_ptr[i];
      inv_std_ptr[i + 2] = inv_std_ptr[i];
      inv_std_ptr[i + 3] = inv_std_ptr[i];

-      new_scale_ptr[i] = inv_std_ptr[i] * scale_ptr[i/4];
+      new_scale_ptr[i] = inv_std_ptr[i] * scale_ptr[i / 4];
      new_scale_ptr[i + 1] = new_scale_ptr[i];
      new_scale_ptr[i + 2] = new_scale_ptr[i];
      new_scale_ptr[i + 3] = new_scale_ptr[i];

-      new_bias_ptr[i] = bias_ptr[i/4] - mean_ptr[i/4] * inv_std_ptr[i] * scale_ptr[i/4];
+      new_bias_ptr[i] =
+          bias_ptr[i / 4] - mean_ptr[i / 4] * inv_std_ptr[i] * scale_ptr[i / 4];

      new_bias_ptr[i + 1] = new_bias_ptr[i];
      new_bias_ptr[i + 2] = new_bias_ptr[i];
@@ -84,7 +85,6 @@ void BatchNormKernel<CPU, float>::Compute(const BatchNormParam &param) const {
      new_bias_ptr[j] = new_bias_ptr[j - C * 4];
    }

-
    asm volatile(
        "subs %[N], %[N], #1                  \n\t"
        "blt        end_n_%=                  \n\t"
@@ -180,14 +180,15 @@ void BatchNormKernel<CPU, float>::Compute(const BatchNormParam &param) const {
        "bge        loop_n_%=                \n\t"
        "end_n_%=:                           \n\t"
        :
-    :[input_x_ptr]"r"(input_x_ptr), [out_ptr]"r"(out_ptr), [new_scale_ptr]"r"(new_scale_ptr), [new_bias_ptr]"r"(new_bias_ptr),
-    [N]"r"(N), [C]"r"(C), [HXW]"r"(HXW)
-    :"memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "r5", "r6"
-    );
+        : [input_x_ptr] "r"(input_x_ptr), [out_ptr] "r"(out_ptr),
+          [new_scale_ptr] "r"(new_scale_ptr), [new_bias_ptr] "r"(new_bias_ptr),
+          [N] "r"(N), [C] "r"(C), [HXW] "r"(HXW)
+        : "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9",
+          "q10", "r5", "r6");

-    delete [] inv_std_ptr;
-    delete [] new_scale_ptr;
-    delete [] new_bias_ptr;
+    delete[] inv_std_ptr;
+    delete[] new_scale_ptr;
+    delete[] new_bias_ptr;

  } else {
    float *inv_std_ptr = new float[C];
@@ -205,7 +206,8 @@ void BatchNormKernel<CPU, float>::Compute(const BatchNormParam &param) const {
    /// (x * inv_var * scale) + (bias - est_mean * inv_var * scale)
    for (int i = 0; i < C; i++) {
      new_scale_ptr[i] = inv_std_ptr[i] * scale_ptr[i];
-      new_bias_ptr[i] = bias_ptr[i] - mean_ptr[i] * inv_std_ptr[i] * scale_ptr[i];
+      new_bias_ptr[i] =
+          bias_ptr[i] - mean_ptr[i] * inv_std_ptr[i] * scale_ptr[i];
      {
        for (int n = 0; n < N; n++) {
          for (int h = 0; h < H; h++) {
@@ -220,16 +222,15 @@ void BatchNormKernel<CPU, float>::Compute(const BatchNormParam &param) const {
      }
    }

-    delete [] inv_std_ptr;
-//    DLOG << "input[2,5,1,0](input[102]) ,channel 5 :";
-//    DLOG << "input_x_ptr : " << input_x_ptr[102];
-//    DLOG << "variance : " << variance_ptr[5];
-//    DLOG << "inv_std_ptr : " << inv_std_ptr[5];
-//    DLOG << "new_scale_ptr : " << new_scale_ptr[5];
-//    DLOG << "new_bias_ptr : " << new_bias_ptr[5];
-//    DLOG << "out_ptr : " << out_ptr[102];
+    delete[] inv_std_ptr;
+    //    DLOG << "input[2,5,1,0](input[102]) ,channel 5 :";
+    //    DLOG << "input_x_ptr : " << input_x_ptr[102];
+    //    DLOG << "variance : " << variance_ptr[5];
+    //    DLOG << "inv_std_ptr : " << inv_std_ptr[5];
+    //    DLOG << "new_scale_ptr : " << new_scale_ptr[5];
+    //    DLOG << "new_bias_ptr : " << new_bias_ptr[5];
+    //    DLOG << "out_ptr : " << out_ptr[102];
  }
-
 }
 }  // namespace operators
 }  // namespace paddle_mobile

--- a/src/operators/kernel/arm/relu_kernel.cpp
+++ b/src/operators/kernel/arm/relu_kernel.cpp
@@ -38,70 +38,71 @@ void ReluKernel<CPU, float>::Compute(const ReluParam &param) const {
  auto *out_ptr = out->mutable_data<float>();

  int numel = input_x->numel();
-//  if (numel > 64) {
-//    asm volatile(
-//        "pld        [%[input_x_ptr], #0]        \n\t"
-//        "vmov.f32   q8,    #0.0                 \n\t"
-//        "subs %[num], %[num], #32                \n\t"
-//        "blt        end_num_%=                  \n\t"
-//        "loop_num_%=:                           \n\t"
-//        "pld        [%[input_x_ptr], #1024]      \n\t"
-//
-//        "vld1.32 {q0, q1}, [%[input_x_ptr]]!    \n\t"
-//        "vld1.32 {q2, q3}, [%[input_x_ptr]]!    \n\t"
-//        "vld1.32 {q4, q5}, [%[input_x_ptr]]!    \n\t"
-//        "vld1.32 {q6, q7}, [%[input_x_ptr]]!    \n\t"
-//
-//        "vmax.f32 q0, q0, q8                   \n\t"
-//        "vmax.f32 q1, q1, q8                    \n\t"
-//        "vmax.f32 q2, q2, q8                   \n\t"
-//        "vmax.f32 q3, q3, q8                   \n\t"
-//        "vmax.f32 q4, q4, q8                   \n\t"
-//        "vmax.f32 q5, q5, q8                   \n\t"
-//        "vmax.f32 q6, q6, q8                   \n\t"
-//        "vmax.f32 q7, q7, q8                   \n\t"
-//
-//        "vst1.32 {q0, q1}, [%[out_ptr]]!        \n\t"
-//        "vst1.32 {q2, q3}, [%[out_ptr]]!       \n\t"
-//        "vst1.32 {q4, q5}, [%[out_ptr]]!       \n\t"
-//        "vst1.32 {q6, q7}, [%[out_ptr]]!       \n\t"
-//
-//        "subs %[num], %[num], #32              \n\t"
-//        "bge        loop_num_%=                \n\t"
-//        "end_num_%=:                           \n\t"
-//        "cmp %[num], #0                         \n\t"
-//        "bge   end_%=                          \n\t"
-//        "mov r6, #4                             \n\t"
-//        "mul r5, %[num], r6                     \n\t"
-//        "add %[input_x_ptr], %[input_x_ptr], r5     \n\t"
-//        "vld1.32 {q0, q1}, [%[input_x_ptr]]!    \n\t"
-//        "vld1.32 {q2, q3}, [%[input_x_ptr]]!    \n\t"
-//        "vld1.32 {q4, q5}, [%[input_x_ptr]]!    \n\t"
-//        "vld1.32 {q6, q7}, [%[input_x_ptr]]!    \n\t"
-//        "vmax.f32 q0, q0, q8                   \n\t"
-//        "vmax.f32 q1, q1, q8                    \n\t"
-//        "vmax.f32 q2, q2, q8                   \n\t"
-//        "vmax.f32 q3, q3, q8                   \n\t"
-//        "vmax.f32 q4, q4, q8                   \n\t"
-//        "vmax.f32 q5, q5, q8                   \n\t"
-//        "vmax.f32 q6, q6, q8                   \n\t"
-//        "vmax.f32 q7, q7, q8                   \n\t"
-//        "add %[out_ptr], %[out_ptr], r5       \n\t"
-//        "vst1.32 {q0, q1}, [%[out_ptr]]!        \n\t"
-//        "vst1.32 {q2, q3}, [%[out_ptr]]!       \n\t"
-//        "vst1.32 {q4, q5}, [%[out_ptr]]!       \n\t"
-//        "vst1.32 {q6, q7}, [%[out_ptr]]!       \n\t"
-//        "end_%=:                                \n\t"
-//        :
-//        :
-//        [out_ptr] "r"(out_ptr), [input_x_ptr] "r"(input_x_ptr), [num] "r"(numel)
-//        : "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "r5",
-//          "r6");
-//  } else {
+  //  if (numel > 64) {
+  //    asm volatile(
+  //        "pld        [%[input_x_ptr], #0]        \n\t"
+  //        "vmov.f32   q8,    #0.0                 \n\t"
+  //        "subs %[num], %[num], #32                \n\t"
+  //        "blt        end_num_%=                  \n\t"
+  //        "loop_num_%=:                           \n\t"
+  //        "pld        [%[input_x_ptr], #1024]      \n\t"
+  //
+  //        "vld1.32 {q0, q1}, [%[input_x_ptr]]!    \n\t"
+  //        "vld1.32 {q2, q3}, [%[input_x_ptr]]!    \n\t"
+  //        "vld1.32 {q4, q5}, [%[input_x_ptr]]!    \n\t"
+  //        "vld1.32 {q6, q7}, [%[input_x_ptr]]!    \n\t"
+  //
+  //        "vmax.f32 q0, q0, q8                   \n\t"
+  //        "vmax.f32 q1, q1, q8                    \n\t"
+  //        "vmax.f32 q2, q2, q8                   \n\t"
+  //        "vmax.f32 q3, q3, q8                   \n\t"
+  //        "vmax.f32 q4, q4, q8                   \n\t"
+  //        "vmax.f32 q5, q5, q8                   \n\t"
+  //        "vmax.f32 q6, q6, q8                   \n\t"
+  //        "vmax.f32 q7, q7, q8                   \n\t"
+  //
+  //        "vst1.32 {q0, q1}, [%[out_ptr]]!        \n\t"
+  //        "vst1.32 {q2, q3}, [%[out_ptr]]!       \n\t"
+  //        "vst1.32 {q4, q5}, [%[out_ptr]]!       \n\t"
+  //        "vst1.32 {q6, q7}, [%[out_ptr]]!       \n\t"
+  //
+  //        "subs %[num], %[num], #32              \n\t"
+  //        "bge        loop_num_%=                \n\t"
+  //        "end_num_%=:                           \n\t"
+  //        "cmp %[num], #0                         \n\t"
+  //        "bge   end_%=                          \n\t"
+  //        "mov r6, #4                             \n\t"
+  //        "mul r5, %[num], r6                     \n\t"
+  //        "add %[input_x_ptr], %[input_x_ptr], r5     \n\t"
+  //        "vld1.32 {q0, q1}, [%[input_x_ptr]]!    \n\t"
+  //        "vld1.32 {q2, q3}, [%[input_x_ptr]]!    \n\t"
+  //        "vld1.32 {q4, q5}, [%[input_x_ptr]]!    \n\t"
+  //        "vld1.32 {q6, q7}, [%[input_x_ptr]]!    \n\t"
+  //        "vmax.f32 q0, q0, q8                   \n\t"
+  //        "vmax.f32 q1, q1, q8                    \n\t"
+  //        "vmax.f32 q2, q2, q8                   \n\t"
+  //        "vmax.f32 q3, q3, q8                   \n\t"
+  //        "vmax.f32 q4, q4, q8                   \n\t"
+  //        "vmax.f32 q5, q5, q8                   \n\t"
+  //        "vmax.f32 q6, q6, q8                   \n\t"
+  //        "vmax.f32 q7, q7, q8                   \n\t"
+  //        "add %[out_ptr], %[out_ptr], r5       \n\t"
+  //        "vst1.32 {q0, q1}, [%[out_ptr]]!        \n\t"
+  //        "vst1.32 {q2, q3}, [%[out_ptr]]!       \n\t"
+  //        "vst1.32 {q4, q5}, [%[out_ptr]]!       \n\t"
+  //        "vst1.32 {q6, q7}, [%[out_ptr]]!       \n\t"
+  //        "end_%=:                                \n\t"
+  //        :
+  //        :
+  //        [out_ptr] "r"(out_ptr), [input_x_ptr] "r"(input_x_ptr), [num]
+  //        "r"(numel) : "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6",
+  //        "q7", "q8", "r5",
+  //          "r6");
+  //  } else {
  ReluFunctor<float> func_;
  math::Transform trans;
  trans(input_x_ptr, input_x_ptr + numel, out_ptr, func_);
-//  }
+  //  }
 }
 }  // namespace operators
 }  // namespace paddle_mobile

--- a/test/common/test_lib_size.h
+++ b/test/common/test_lib_size.h
@@ -19,9 +19,9 @@ limitations under the License. */
 #ifndef PADDLE_MOBILE_TEST_LIB_SIZE_H
 #define PADDLE_MOBILE_TEST_LIB_SIZE_H

-#include <vector>
 #include <pthread.h>
 #include <thread>
+#include <vector>
 //#include <list>
 //#include <tuple>
 //#include <typeinfo>
@@ -74,7 +74,7 @@ void foo() {
  //    int z = 10;
  //  }

-//  std::shared_ptr<int> s1 = std::make_shared<int>();
+  //  std::shared_ptr<int> s1 = std::make_shared<int>();

  //  std::stringstream ss;
  //  ss << "12345";

--- a/test/operators/test_batchnorm_op.cpp
+++ b/test/operators/test_batchnorm_op.cpp
@@ -137,7 +137,8 @@ int main() {
  auto *inputx1_ptr = inputx1.data<float>();

  paddle_mobile::framework::Tensor mean;
-  SetupTensor<float>(&mean, {256}, static_cast<float>(0), static_cast<float>(1));
+  SetupTensor<float>(&mean, {256}, static_cast<float>(0),
+                     static_cast<float>(1));
  auto *mean_ptr = mean.data<float>();

  paddle_mobile::framework::Tensor scale;
@@ -151,7 +152,8 @@ int main() {
  auto *variance_ptr = variance.data<float>();

  paddle_mobile::framework::Tensor bias;
-  SetupTensor<float>(&bias, {256}, static_cast<float>(0), static_cast<float>(1));
+  SetupTensor<float>(&bias, {256}, static_cast<float>(0),
+                     static_cast<float>(1));
  auto *bias_ptr = bias.data<float>();

  paddle_mobile::framework::TestBatchNormOp<paddle_mobile::CPU> testBatchNormOp(