Make arm linux compiler happy for inline assembly

e583803b · 李寅 · 028b4ab3 · e583803b · e583803b · e583803b
隐藏空白更改
内联并排

Showing with 35 addition and 34 deletion

.gitlab-ci.yml .gitlab-ci.yml +2 -1

mace/kernels/sgemm.cc mace/kernels/sgemm.cc +32 -32

mace/ops/infer_conv2d_shape.h mace/ops/infer_conv2d_shape.h +1 -1

未找到文件。
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -73,6 +73,7 @@ platform_compatible_tests:
  stage: platform_compatible_tests
  script:
    - bazel build mace/core:core --define openmp=true
+    - bazel build --config arm_linux --define openmp=true --define opencl=true --define neon=true //mace/libmace:libmace.so
    - bazel build --config aarch64_linux --define openmp=true --define opencl=true --define neon=true //mace/libmace:libmace.so

 build_libraries:
@@ -86,7 +87,7 @@ ndk_versions_compatible_tests:
    - DEFAULT_NDK_PATH=$ANDROID_NDK_HOME
    - prefix_path=${DEFAULT_NDK_PATH%android-ndk-*}
    - >
-      for ndk in android-ndk-r15c android-ndk-r16 android-ndk-r17b;
+      for ndk in android-ndk-r12b android-ndk-r15c android-ndk-r16 android-ndk-r17b;
      do
      new_ndk_path=${prefix_path}${ndk};
      if [ "$new_ndk_path" != "$DEFAULT_NDK_PATH" ]; then

--- a/mace/kernels/sgemm.cc
+++ b/mace/kernels/sgemm.cc
@@ -488,34 +488,34 @@ void SGemm::RunPerBatch(const float *lhs_data,
          "vld1.f32 {d22-d23}, [%[rhs_ptr]]! \n"
          "vld1.f32 {d24-d25}, [%[rhs_ptr]]! \n"

-          "vmla.f32 %[c0], q10, d0[0] \n"
-          "vmla.f32 %[c1], q10, d0[1] \n"
-          "vmla.f32 %[c2], q10, d1[0] \n"
-          "vmla.f32 %[c3], q10, d1[1] \n"
+          "vmla.f32 %q[c0], q10, d0[0] \n"
+          "vmla.f32 %q[c1], q10, d0[1] \n"
+          "vmla.f32 %q[c2], q10, d1[0] \n"
+          "vmla.f32 %q[c3], q10, d1[1] \n"

          "vld1.f32 {d6-d7}, [%[lhs_ptr]]! \n"
          "vld1.f32 {d26-d27}, [%[rhs_ptr]]! \n"

-          "vmla.f32 %[c0], q11, d2[0] \n"
-          "vmla.f32 %[c1], q11, d2[1] \n"
-          "vmla.f32 %[c2], q11, d3[0] \n"
-          "vmla.f32 %[c3], q11, d3[1] \n"
+          "vmla.f32 %q[c0], q11, d2[0] \n"
+          "vmla.f32 %q[c1], q11, d2[1] \n"
+          "vmla.f32 %q[c2], q11, d3[0] \n"
+          "vmla.f32 %q[c3], q11, d3[1] \n"

          "vld1.f32 {d8-d9}, [%[lhs_ptr]]! \n"
          "vld1.f32 {d28-d29}, [%[rhs_ptr]]! \n"

-          "vmla.f32 %[c0], q12, d4[0] \n"
-          "vmla.f32 %[c1], q12, d4[1] \n"
-          "vmla.f32 %[c2], q12, d5[0] \n"
-          "vmla.f32 %[c3], q12, d5[1] \n"
+          "vmla.f32 %q[c0], q12, d4[0] \n"
+          "vmla.f32 %q[c1], q12, d4[1] \n"
+          "vmla.f32 %q[c2], q12, d5[0] \n"
+          "vmla.f32 %q[c3], q12, d5[1] \n"

          "vld1.f32 {d10-d11}, [%[lhs_ptr]]! \n"
          "vld1.f32 {d30-d31}, [%[rhs_ptr]]! \n"

-          "vmla.f32 %[c0], q13, d6[0] \n"
-          "vmla.f32 %[c1], q13, d6[1] \n"
-          "vmla.f32 %[c2], q13, d7[0] \n"
-          "vmla.f32 %[c3], q13, d7[1] \n"
+          "vmla.f32 %q[c0], q13, d6[0] \n"
+          "vmla.f32 %q[c1], q13, d6[1] \n"
+          "vmla.f32 %q[c2], q13, d7[0] \n"
+          "vmla.f32 %q[c3], q13, d7[1] \n"

          "vld1.f32 {d0-d1}, [%[lhs_ptr]]! \n"
          "vld1.f32 {d2-d3}, [%[lhs_ptr]]! \n"
@@ -523,27 +523,27 @@ void SGemm::RunPerBatch(const float *lhs_data,
          "vld1.f32 {d20-d21}, [%[rhs_ptr]]! \n"
          "vld1.f32 {d22-d23}, [%[rhs_ptr]]! \n"

-          "vmla.f32 %[c0], q14, d8[0] \n"
-          "vmla.f32 %[c1], q14, d8[1] \n"
-          "vmla.f32 %[c2], q14, d9[0] \n"
-          "vmla.f32 %[c3], q14, d9[1] \n"
+          "vmla.f32 %q[c0], q14, d8[0] \n"
+          "vmla.f32 %q[c1], q14, d8[1] \n"
+          "vmla.f32 %q[c2], q14, d9[0] \n"
+          "vmla.f32 %q[c3], q14, d9[1] \n"

-          "vmla.f32 %[c0], q15, d10[0] \n"
-          "vmla.f32 %[c1], q15, d10[1] \n"
-          "vmla.f32 %[c2], q15, d11[0] \n"
-          "vmla.f32 %[c3], q15, d11[1] \n"
+          "vmla.f32 %q[c0], q15, d10[0] \n"
+          "vmla.f32 %q[c1], q15, d10[1] \n"
+          "vmla.f32 %q[c2], q15, d11[0] \n"
+          "vmla.f32 %q[c3], q15, d11[1] \n"

-          "vmla.f32 %[c0], q10, d0[0] \n"
-          "vmla.f32 %[c1], q10, d0[1] \n"
-          "vmla.f32 %[c2], q10, d1[0] \n"
-          "vmla.f32 %[c3], q10, d1[1] \n"
+          "vmla.f32 %q[c0], q10, d0[0] \n"
+          "vmla.f32 %q[c1], q10, d0[1] \n"
+          "vmla.f32 %q[c2], q10, d1[0] \n"
+          "vmla.f32 %q[c3], q10, d1[1] \n"

          "subs %[block_d], %[block_d], #1 \n"

-          "vmla.f32 %[c0], q11, d2[0] \n"
-          "vmla.f32 %[c1], q11, d2[1] \n"
-          "vmla.f32 %[c2], q11, d3[0] \n"
-          "vmla.f32 %[c3], q11, d3[1] \n"
+          "vmla.f32 %q[c0], q11, d2[0] \n"
+          "vmla.f32 %q[c1], q11, d2[1] \n"
+          "vmla.f32 %q[c2], q11, d3[0] \n"
+          "vmla.f32 %q[c3], q11, d3[1] \n"

          "bne 0b \n"
        :  // outputs

--- a/mace/ops/infer_conv2d_shape.h
+++ b/mace/ops/infer_conv2d_shape.h
@@ -41,7 +41,7 @@ class InferConv2dShapeOp : public Operator<D, T> {
        OperatorBase::GetOptionalArg<int>("data_format", 0);
    const bool isNCHW = data_format == 1;

-    const Padding padding_type =
+    Padding padding_type =
        static_cast<Padding>(OperatorBase::GetOptionalArg<int>(
            "padding", static_cast<int>(SAME)));
    const std::vector<int32_t> paddings =