Merge remote-tracking branch 'origin/develop' into develop

33122506 · qnqinan · 2c668b4d · b5c14d86 · 33122506 · 33122506
21 changed file
--- a/src/fpga/api/fpga_api.cpp
+++ b/src/fpga/api/fpga_api.cpp
@@ -29,15 +29,15 @@ limitations under the License. */

 #include "fpga/api/fpga_api.h"

-namespace paddle {
-namespace mobile {
+namespace paddle_mobile {
 namespace fpga {
-namespace api {

 static int fd = -1;
 static const char *device_path = "/dev/fpgadrv0";

-static inline int do_ioctl(int req, void *arg) { return ioctl(req, arg); }
+static inline int do_ioctl(int req, void *arg) {
+  return ioctl(req, (long unsigned int)arg);
+}

 int open_device() {
  if (fd == -1) {
@@ -48,8 +48,8 @@ int open_device() {

 // memory management;
 void *fpga_malloc(size_t size) {
-  return reinterpret_cast<(void *)> mmap64(NULL, size, PROT_READ | PROT_WRITE,
-                                           MAP_SHARED, fd, 0);
+  return reinterpret_cast<void *>(
+      mmap64(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0));
 }

 void fpga_free(void *ptr) { munmap(ptr, 0); }
@@ -58,11 +58,9 @@ void fpga_copy(void *dest, const void *src, size_t num) {
  memcpy(dest, src, num);
 }

-int ComputeFpgaConv(struct FpgaConvArgs) {}
-int ComputeFpgaPool(struct FpgaPoolArgs) {}
-int ComputeFpgaEWAdd(struct FpgaEWAddArgs) {}
+int ComputeFpgaConv(struct ConvArgs args) {}
+int ComputeFpgaPool(struct PoolingArgs args) {}
+int ComputeFpgaEWAdd(struct EWAddArgs args) {}

-}  // namespace api
 }  // namespace fpga
-}  // namespace mobile
-}  // namespace paddle
+}  // namespace paddle_mobile
--- a/src/fpga/api/fpga_api.h
+++ b/src/fpga/api/fpga_api.h
@@ -31,90 +31,132 @@ void* fpga_malloc(size_t size);
 void fpga_free(void* ptr);
 void fpga_copy(void* dst, const void* src, size_t num);

-struct FpgaVersionArgs {
-  void* buf;
-};
-
-struct MemoryToPhysicalArgs {
-  const void* src;
-  uint64_t physical;
+struct VersionArgs {
+  void* buffer;
 };

 struct MemoryCopyArgs {
  void* src;
-  void* dst;
+  void* dest;
  size_t size;
 };

-struct FpgaQuantArgs {
-  float scale;
-};
-
-struct FpgaBNArgs {
-  bool enabled = false;
-  void* bias_addr;
-  void* scale_addr;
+struct BNArgs {
+  bool enabled;
+  void* bias_address;
+  void* scale_address;
 };

-struct FpgaKernelArgs {
+/**
+Conv and Pooling kernel
+*/
+struct KernelArgs {
  uint32_t width;
  uint32_t height;
-  uint32_t stride_h;
  uint32_t stride_w;
+  uint32_t stride_h;
 };

-struct FpgaImageArgs {
-  uint32_t width;
-  uint32_t height;
+struct ImageInputArgs {
+  void* address;         // input featuremap virtual address
+  float* scale_address;  // input scale address;
  uint32_t channels;
-  uint32_t pad_h;
-  uint32_t pad_w;
+  uint32_t width;  // featuremap width
+  uint32_t height;
+  uint32_t pad_width;  // padding width;
+  uint32_t pad_height;
+};
+
+struct ImageOutputArgs {
+  void* address;         // output result address;
+  float* scale_address;  // output scale address;
 };

-struct FpgaConvArgs {
+struct ConvArgs {
  bool relu_enabled;
-  struct FpgaBNArgs BNargs;
-  void* image_addr;
-  void* filter_addr;
-  void* bias_addr;
-  void* output_addr;
-  float quant_scale;
-  struct FpgaImageArgs image;
+  void* bias_address;
+  void* filter_address;
  uint32_t filter_num;
  uint32_t group_num;

-  struct FpgaKernelArgs kernel;
+  struct BNArgs bn;
+  struct KernelArgs kernel;
+  struct ImageInputArgs image;  // input image;
+  struct ImageOutputArgs output;
 };

-struct FpgaPoolArgs {
-  void* image_addr;
-  void* output_addr;
-  struct FpgaImageArgs image;
-  struct FpgaKernelArgs kernel;
+struct PoolingArgs {
+  struct KernelArgs kernel;
+  struct ImageInputArgs image;  // input image;
+  struct ImageOutputArgs output;
 };

-struct FpgaEWAddArgs {
+// elementwise add arguments
+struct EWAddArgs {
  bool relu_enabled;
-  void* image0_addr;
-  void* image1_addr;
-  void* result_addr;
-  uint32_t const0;
-  uint32_t const1;
-  uint32_t data_len;  // aligned element count
+
+  float const0;  // output0 = const0 x input0 + const1 x input1;
+  float const1;
+  struct ImageInputArgs image0;
+  struct ImageInputArgs image1;
+  struct ImageOutputArgs output;
+};
+
+struct FpgaRegWriteArgs {
+  uint64_t address;  //
+  uint64_t value;
 };

-int ComputeFpgaConv(struct FpgaConvArgs args);
-int ComputeFpgaPool(struct FpgaPoolArgs args);
-int ComputeFpgaEWAdd(struct FpgaEWAddArgs args);
+struct FpgaRegReadArgs {
+  uint64_t address;
+  uint64_t value;
+};
+
+#define IOCTL_FPGA_MAGIC 'FPGA'
+
+#define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 01, struct VersionArgs)
+#define IOCTL_FPGA_REG_READ _IOW(IOCTL_FPGA_MAGIC, 02, struct FpgaRegReadArgs)
+#define IOCTL_FPGA_REG_WRITE _IOW(IOCTL_FPGA_MAGIC, 03, struct FpgaRegWriteArgs)
+
+#define IOCTL_SEPARATOR_0 10

-#define IOCTL_FPGA_MAGIC 'CNN'
-#define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 1, struct FpgaVersionArgs)
-#define IOCTL_GET_QUANT _IOW(IOCTL_FPGA_MAGIC, 2, struct FpgaQuantArgs)
-#define IOCTL_SET_QUANT _IOW(IOCTL_FPGA_MAGIC, 3, struct FpgaQuantArgs)
 #define IOCTL_MEM_COPY _IOW(IOCTL_FPGA_MAGIC, 11, struct MemoryCopyArgs)
-#define IOCTL_CONFIG_CONV _IOW(IOCTL_FPGA_MAGIC, 21, struct FpgaConvArgs)
-#define IOCTL_CONFIG_POOLING _IOW(IOCTL_FPGA_MAGIC, 22, struct FpgaPoolArgs)
-#define IOCTL_CONFIG_EW _IOW(IOCTL_FPGA_MAGIC, 23, struct FpgaEWAddArgs)
+
+#define IOCTL_SEPARATOR_1 20
+
+#define IOCTL_CONFIG_CONV _IOW(IOCTL_FPGA_MAGIC, 21, struct ConvArgs)
+#define IOCTL_CONFIG_POOLING _IOW(IOCTL_FPGA_MAGIC, 22, struct PoolingArgs)
+#define IOCTL_CONFIG_EW _IOW(IOCTL_FPGA_MAGIC, 23, struct EWAddArgs)
+
+enum FPGA_ERR_TYPE {
+  ERR_IOCTL_CMD = -1,
+  ERR_TIMEOUT = -2,
+  ERR_COMPLETION_TIMEOUT = -3,
+  ERR_INVALID_FPGA_ADDR = -4,
+  ERR_NOMEM = -5,
+  ERR_NO_RESERVE_MEM = -6,
+  ERR_COPY_FROM_USER = -7,
+  ERR_COPY_TO_USER = -8,
+  ERR_DEL_TIMER = -9,
+  ERR_ENABLE_MSI = -10,
+  ERR_REGISTER_IRQ = -11,
+  ERR_PCIE_REGISTER = -12,
+  ERR_PCIE_PROBE = -13,
+  ERR_REGISTER_BLOCK = -14,
+  ERR_ALLOC_GENDISK = -15,
+  ERR_INIT_QUEUE = -16,
+  ERR_WAIT = -17,
+  ERR_ECC_ERROR = -31,
+  ERR_FPGA_FAIL_STOP = -64,
+  ERR_FPGA_DEBUG_STOP = -113,
+  DEV_TMP_UNAVAILABLE = -128
+};
+
+//============================== API =============================
+
+int ComputeFpgaConv(struct ConvArgs args);
+int ComputeFpgaPool(struct PoolingArgs args);
+int ComputeFpgaEWAdd(struct EWAddArgs args);

 }  // namespace fpga
 }  // namespace paddle_mobile
--- a/src/framework/tensor.h
+++ b/src/framework/tensor.h
@@ -253,6 +253,18 @@ class Tensor {
                          "Tensor's dims_ is out of bound. ");
  }

+#ifdef PADDLE_MOBILE_FPGA
+  struct FPGAArgs {
+    float scale;
+
+    inline float *scale_pointer() { return &scale; }
+  };
+
+  struct &fpga_args() const {
+    return fpgaArgs_;
+  }
+#endif
+
 private:
  /**
   * @note    Placeholder hides type T, so it doesn't appear as a
@@ -319,6 +331,10 @@ class Tensor {
   * begins.
   */
  size_t offset_;
+
+#ifdef PADDLE_MOBILE_FPGA
+  FPGAArgs fpgaArgs_;
+#endif
 };

 #ifdef PADDLE_MOBILE_DEBUG

--- a/src/io/executor.cpp
+++ b/src/io/executor.cpp
@@ -420,6 +420,6 @@ std::vector<typename Executor<Dtype, P>::Ptype> Executor<Dtype, P>::Predict(

 template class Executor<CPU, Precision::FP32>;
 template class Executor<GPU_MALI, Precision::FP32>;
-template class Executor<FPGA, Precision::FP16>;
+template class Executor<FPGA, Precision::FP32>;

 }  // namespace paddle_mobile
--- a/src/memory/t_malloc.cpp
+++ b/src/memory/t_malloc.cpp
@@ -27,17 +27,17 @@ namespace memory {
 const int MALLOC_ALIGN = 64;

 #ifdef PADDLE_MOBILE_FPGA
-namespace api = paddle::mobile::fpga::api;
+namespace fpga = paddle_mobile::fpga;

 void Copy(void *dst, const void *src, size_t num) {
  std::memcpy(dst, src, num);
 }

-void *Alloc(size_t size) { return api::malloc(size); }
+void *Alloc(size_t size) { return fpga::fpga_malloc(size); }

 void Free(void *ptr) {
  if (ptr) {
-    api::fpga_free(ptr);
+    fpga::fpga_free(ptr);
  }
 }


--- a/src/operators/feed_op.h
+++ b/src/operators/feed_op.h
@@ -29,7 +29,7 @@ class FeedOp : public framework::OperatorBase<DeviceType> {
         std::shared_ptr<framework::Scope> scope)
      : framework::OperatorBase<DeviceType>(type, inputs, outputs, attrs,
                                            scope),
-        param_(inputs, outputs, attrs, *scope) {}
+        param_(inputs, outputs, attrs, scope.get()) {}
  void RunImpl() const { param_.Out()->ShareDataWith(*param_.InputX()); }

  void Init() {}

--- a/src/operators/kernel/arm/dropout_kernel.cpp
+++ b/src/operators/kernel/arm/dropout_kernel.cpp
@@ -14,8 +14,6 @@ limitations under the License. */

 #ifdef DROPOUT_OP

-#pragma once
-
 #include "operators/kernel/dropout_kernel.h"
 #include <operators/math/transform.h>


--- a/src/operators/kernel/dropout_kernel.h
+++ b/src/operators/kernel/dropout_kernel.h
@@ -17,7 +17,7 @@ limitations under the License. */
 #include "framework/operator.h"
 #include "operators/op_param.h"

-#pragma once;
+#pragma once

 namespace paddle_mobile {
 namespace operators {

--- a/src/operators/kernel/fpga/concat_kernel.cpp
+++ b/src/operators/kernel/fpga/concat_kernel.cpp
@@ -39,7 +39,7 @@ void ConcatKernel<FPGA, half>::Compute(const ConcatParam &param) const {

  for (int i = 0; i < inputs.size(); ++i) {
    auto input = inputs[i];
-    auto channels = input[3];
+    auto channels = input->dims()[3];
    out_offset += channels;
    auto src = input->data<half>();
    for (int j = 0; j < pixels; ++j) {

--- a/src/operators/kernel/im2sequence_kernel.h
+++ b/src/operators/kernel/im2sequence_kernel.h
@@ -20,13 +20,11 @@ limitations under the License. */
 #include "operators/math/vol2col.h"
 #include "operators/op_param.h"

-#pragma once;
+#pragma once

 namespace paddle_mobile {
 namespace operators {

-using namespace framework;
-
 template <typename DeviceType, typename T>
 class Im2SequenceKernel
    : public framework::OpKernelBase<DeviceType, Im2SequenceParam> {

--- a/src/operators/kernel/mali/fushion_fc_kernel.cpp
+++ b/src/operators/kernel/mali/fushion_fc_kernel.cpp
@@ -14,8 +14,6 @@ limitations under the License. */

 #ifdef FUSION_FC_OP

-#pragma once
-
 #include "operators/kernel/fusion_fc_kernel.h"

 namespace paddle_mobile {

--- a/src/operators/kernel/prelu_kernel.h
+++ b/src/operators/kernel/prelu_kernel.h
@@ -15,7 +15,7 @@ limitations under the License. */
 #include "framework/operator.h"
 #include "operators/op_param.h"

-#pragma once;
+#pragma once

 namespace paddle_mobile {
 namespace operators {

--- a/src/operators/kernel/scale_kernel.h
+++ b/src/operators/kernel/scale_kernel.h
@@ -15,7 +15,7 @@ limitations under the License. */
 #include "framework/operator.h"
 #include "operators/op_param.h"

-#pragma once;
+#pragma once

 namespace paddle_mobile {
 namespace operators {

--- a/src/operators/kernel/slice_kernel.h
+++ b/src/operators/kernel/slice_kernel.h
@@ -15,7 +15,7 @@ limitations under the License. */
 #include "framework/operator.h"
 #include "operators/op_param.h"

-#pragma once;
+#pragma once

 namespace paddle_mobile {
 namespace operators {

--- a/src/operators/op_param.h
+++ b/src/operators/op_param.h
@@ -262,11 +262,11 @@ class ElementwiseAddParam : OpParam {
 #ifdef PADDLE_MOBILE_FPGA

 private:
-  fpga::FpgaEWAddArgs fpga_EW_add_args;
+  fpga::EWAddArgs fpga_EW_add_args;

 public:
-  const fpga::FpgaEWAddArgs &FpgaArgs() const { return fpga_EW_add_args; }
-  void SetFpgaArgs(const fpga::FpgaEWAddArgs &args) { fpga_EW_add_args = args; }
+  const fpga::EWAddArgs &FpgaArgs() const { return fpga_EW_add_args; }
+  void SetFpgaArgs(const fpga::EWAddArgs &args) { fpga_EW_add_args = args; }
 #endif
 };

@@ -465,11 +465,11 @@ class PoolParam : public OpParam {
 #ifdef PADDLE_MOBILE_FPGA

 private:
-  fpga::FpgaPoolArgs fpga_pool_args;
+  fpga::PoolingArgs fpga_pool_args;

 public:
-  const fpga::FpgaPoolArgs &FpgaArgs() const { return fpga_pool_args; }
-  void SetFpgaArgs(const fpga::FpgaPoolArgs &args) { fpga_pool_args = args; }
+  const fpga::PoolingArgs &FpgaArgs() const { return fpga_pool_args; }
+  void SetFpgaArgs(const fpga::PoolingArgs &args) { fpga_pool_args = args; }
 #endif
 };
 #endif
@@ -651,10 +651,10 @@ class MultiClassNMSParam : public OpParam {
 class FeedParam : public OpParam {
 public:
  FeedParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
-            const AttributeMap &attrs, Scope const &scope) {
-    input_x_ = InputXFrom<LoDTensor>(inputs, scope);
-    out_ = OutFrom<LoDTensor>(outputs, scope);
-    auto var = scope.Var("batch_size");
+            const AttributeMap &attrs, Scope *scope) {
+    input_x_ = InputXFrom<LoDTensor>(inputs, *scope);
+    out_ = OutFrom<LoDTensor>(outputs, *scope);
+    auto var = scope->Var("batch_size");
    batch_size = var->GetValue<int>();
  }
  const Tensor *InputX() const { return input_x_; }
@@ -933,11 +933,11 @@ class FusionFcParam : public OpParam {
 #ifdef PADDLE_MOBILE_FPGA

 private:
-  fpga::FpgaConvArgs fpga_conv_args;
+  fpga::ConvArgs fpga_conv_args;

 public:
-  const fpga::FpgaConvArgs &FpgaArgs() const { return fpga_conv_args; }
-  void SetFpgaArgs(const fpga::FpgaConvArgs &args) { fpga_conv_args = args; }
+  const fpga::ConvArgs &FpgaArgs() const { return fpga_conv_args; }
+  void SetFpgaArgs(const fpga::ConvArgs &args) { fpga_conv_args = args; }
 #endif
 };

@@ -991,11 +991,11 @@ class FusionConvAddParam : public OpParam {
 #ifdef PADDLE_MOBILE_FPGA

 private:
-  fpga::FpgaConvArgs fpga_conv_args;
+  fpga::ConvArgs fpga_conv_args;

 public:
-  const fpga::FpgaConvArgs &FpgaArgs() const { return fpga_conv_args; }
-  void SetFpgaArgs(const fpga::FpgaConvArgs &args) { fpga_conv_args = args; }
+  const fpga::ConvArgs &FpgaArgs() const { return fpga_conv_args; }
+  void SetFpgaArgs(const fpga::ConvArgs &args) { fpga_conv_args = args; }
 #endif
 };

@@ -1096,11 +1096,11 @@ class FusionConvAddBNReluParam : public OpParam {
 #ifdef PADDLE_MOBILE_FPGA

 private:
-  fpga::FpgaConvArgs fpga_conv_args;
+  fpga::ConvArgs fpga_conv_args;

 public:
-  const fpga::FpgaConvArgs &FpgaArgs() const { return fpga_conv_args; }
-  void SetFpgaArgs(const fpga::FpgaConvArgs &args) { fpga_conv_args = args; }
+  const fpga::ConvArgs &FpgaArgs() const { return fpga_conv_args; }
+  void SetFpgaArgs(const fpga::ConvArgs &args) { fpga_conv_args = args; }
 #endif
 };
 #endif
@@ -1190,11 +1190,11 @@ class FusionConvAddBNParam : public OpParam {
 #ifdef PADDLE_MOBILE_FPGA

 private:
-  fpga::FpgaConvArgs fpga_conv_args;
+  fpga::ConvArgs fpga_conv_args;

 public:
-  const fpga::FpgaConvArgs &FpgaArgs() const { return fpga_conv_args; }
-  void SetFpgaArgs(const fpga::FpgaConvArgs &args) { fpga_conv_args = args; }
+  const fpga::ConvArgs &FpgaArgs() const { return fpga_conv_args; }
+  void SetFpgaArgs(const fpga::ConvArgs &args) { fpga_conv_args = args; }
 #endif
 };
 #endif

--- a/test/framework/test_load.cpp
+++ b/test/framework/test_load.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

+#include <string>
+
 #include "../test_helper.h"
 #include "io/loader.h"

@@ -20,12 +22,10 @@ int main() {
  //  ../../../test/models/googlenet
  //  ../../../test/models/mobilenet
  //  auto program = loader.Load(g_googlenet, true);
+  //  auto program = loader.Load(g_mobilenet_ssd, true);

-  auto program = loader.Load(g_mobilenet_ssd, true);
-  //  auto program = loader.Load(g_googlenet_combine + "/model",
-  //  g_googlenet_combine +
-  //    "/params", true);
-
+  auto program = loader.Load(std::string(g_ocr) + "/model",
+                             std::string(g_ocr) + "/params", false);
  //  program.originProgram->Description("program desc: ");
  return 0;
 }
--- a/test/test_helper.h
+++ b/test/test_helper.h
@@ -24,6 +24,7 @@ limitations under the License. */
 #include "framework/ddim.h"
 #include "framework/tensor.h"

+static const char *g_ocr = "../models/ocr";
 static const char *g_mobilenet_ssd = "../models/mobilenet+ssd";
 static const char *g_mobilenet_ssd_gesture = "../models/mobilenet+ssd_gesture";
 static const char *g_squeezenet = "../models/squeezenet";

--- a/test_gemm.cpp
+++ b/test_gemm.cpp
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <iostream>
-#include <cstdlib>
-#include <ctime>
-#include "../test_helper.h"
-#include "common/log.h"
-#include "memory/t_malloc.h"
-#include "operators/math/gemm.h"
-
-#define a(i, j) a[(i)*lda + (j)]
-#define b(i, j) b[(i)*ldb + (j)]
-#define c(i, j) c[(i)*ldc + (j)]
-#define c1(i, j) c1[(i)*ldc + (j)]
-
-
-void print_matirx(int m, int n, int ldc, float *c) {
-    for (int i = 0; i < m; ++i) {
-        std::cout << c(i, 0);
-        for (int j = 1; j < n; ++j) {
-            std::cout << " | " << c(i, j);
-        }
-        std::cout << std::endl;
-    }
-    std::cout << std::endl;
-}
-
-int do_sgemm(int m, int n, int k, bool relu, int t1, int t2, int pr) {
-  int lda = k;
-  int ldb = n;
-  int ldc = n;
-
-  float *a = static_cast<float *>(paddle_mobile::memory::Alloc(sizeof(float) * m * k));
-  float *b = static_cast<float *>(paddle_mobile::memory::Alloc(sizeof(float) * k * n));
-  float *c = static_cast<float *>(paddle_mobile::memory::Alloc(sizeof(float) * m * n));
-  float *c1 = static_cast<float *>(paddle_mobile::memory::Alloc(sizeof(float) * m * n));
-  float* scale = static_cast<float *>(paddle_mobile::memory::Alloc(sizeof(float) * m));
-  float* bias = static_cast<float *>(paddle_mobile::memory::Alloc(sizeof(float) * m));
-  
-  srand(unsigned(time(0)));
-  for (int i = 0; i < m * k; ++i) {
-    a[i] = t1 + rand() % t2; 
-  }
-  for (int i = 0; i < k * n; ++i) {
-    b[i] = t1 + rand() % t2; 
-  }
-  for (int i = 0; i < m; ++i) {
-    scale[i] = t1 + rand() % t2; 
-  }
-  for (int i = 0; i < m; ++i) {
-    bias[i] = t1 + rand() % t2; 
-  }
-  
-  for (int i = 0; i < m; ++i) {
-    for (int j = 0; j < n; ++j) {
-      float r = 0;
-      for (int p = 0; p < k; p++) {
-        r += a(i, p) * b(p, j);
-      }
-      r *= scale[i];
-      r += bias[i];
-      if (relu && (r < 0)) {
-        r = 0;
-      }
-      c1(i, j) = r;
-    }
-  }
-  
-  paddle_mobile::operators::math::SgemmWithBn(m, n, k, 0.9, a, lda,
-                 b, ldb, 0.3, c, ldc, relu, scale, bias);
-  int eq = 0;
-  int neq = 0;
-  for (int i = 0; i < m * n; ++i) {
-    if (static_cast<int>(c[i]) == static_cast<int>(c1[i])) {
-        ++eq;
-    } else {
-        ++neq;
-    }
-  }
-  
-  if (pr > 0) {
-    std::cout << "A:" << std::endl;
-    print_matirx(m, k, lda, a);
-    
-    std::cout << "B:" << std::endl;
-    print_matirx(k, n, ldb, b);
-    
-    std::cout << "C:" << std::endl;
-    print_matirx(m, n, ldc, c);
-
-    std::cout << "C1:" << std::endl;
-    print_matirx(m, n, ldc, c1);
-  }
-    
-  std::cout << "mnk=" << m << " " << n << " " << k << 
-    " relu=" << relu <<
-    "   eq=" << eq << " neq=" << neq << std::endl;
-
-  paddle_mobile::memory::Free(a);
-  paddle_mobile::memory::Free(b);
-  paddle_mobile::memory::Free(c);
-  paddle_mobile::memory::Free(c1);
-  paddle_mobile::memory::Free(scale);
-  paddle_mobile::memory::Free(bias);
-        
-  return 0;
-}
-
-int main() {
-    
-  do_sgemm(9, 9, 9, true, 10, 10, 10);
-  do_sgemm(10, 6, 12, false, 10, 10, 0);
-  do_sgemm(512, 256, 384, false, 10, 10, 0);
-  do_sgemm(1366, 768, 256, false, 10, 10, 0);
-  do_sgemm(1255, 755, 333, false, 10, 10, 0);
-  do_sgemm(555, 777, 999, false, 10, 10, 0);
-  
-  do_sgemm(10, 6, 12, true, -4, 10, 0);
-  do_sgemm(512, 256, 384, true, -4, 10, 0);
-  do_sgemm(1366, 768, 256, true, -4, 10, 0);
-  do_sgemm(1255, 755, 333, true, -4, 10, 0);
-  do_sgemm(555, 777, 999, true, -4, 10, 0);
-  return 0;
-}
--- a/tools/build.sh
+++ b/tools/build.sh
@@ -40,8 +40,8 @@ build_for_android() {
    fi

    if [ -z "$PLATFORM" ]; then
-#        PLATFORM="arm-v7a"  # Users could choose "arm-v8a" platform.
-        PLATFORM="arm-v8a"
+        PLATFORM="arm-v7a"  # Users could choose "arm-v8a" platform.
+#        PLATFORM="arm-v8a"
    fi

    if [ "${PLATFORM}" = "arm-v7a" ]; then

--- a/tools/quantification/convert.cpp
+++ b/tools/quantification/convert.cpp
@@ -3,8 +3,8 @@
 #include "src/enforce.h"
 #include "src/var_desc.h"
 #include "src/program_desc.h"
+#include <cstring>
 #include <cstdlib>
-#include <string>
 #include <cmath>
 #include <iostream>
 #include <utility>
@@ -13,7 +13,7 @@
 #include "src/protobuf-c.h"
 #include <fstream>
 #include <iostream>
-
+#include <limits>

 const size_t kSize64 = sizeof(uint64_t);
 const size_t kSize32 = sizeof(uint32_t);

--- a/tools/quantification/src/block_desc_local.h
+++ b/tools/quantification/src/block_desc_local.h
@@ -19,6 +19,7 @@ limitations under the License. */
 #ifndef TOOLS_QUANTIFICATION_SRC_BLOCK_DESC_LOCAL_H_
 #define TOOLS_QUANTIFICATION_SRC_BLOCK_DESC_LOCAL_H_

+#include <memory>
 #include <vector>
 #include "src/var_desc.h"