diff --git a/src/operators/kernel/arm/beam_search_decode_kernel.cpp b/src/operators/kernel/arm/beam_search_decode_kernel.cpp
index f22c032347faa2eb85c6594df54a5f91f214903c..97aaffe7c2fbb7a957748a4c7779d6e9785a9d95 100644
--- a/src/operators/kernel/arm/beam_search_decode_kernel.cpp
+++ b/src/operators/kernel/arm/beam_search_decode_kernel.cpp
@@ -15,6 +15,7 @@ limitations under the License. */
 #ifdef BEAM_SEARCH_DECODE_OP
 
 #include "operators/kernel/beam_search_decode_kernel.h"
+#include <algorithm>
 #include "framework/data_type.h"
 
 namespace paddle_mobile {
diff --git a/src/operators/kernel/arm/beam_search_kernel.cpp b/src/operators/kernel/arm/beam_search_kernel.cpp
index 5e88e2f18eed1d9aefbeb954a02245ff6daae036..9128c57c64617b338d5948519298f80241a29545 100644
--- a/src/operators/kernel/arm/beam_search_kernel.cpp
+++ b/src/operators/kernel/arm/beam_search_kernel.cpp
@@ -15,6 +15,7 @@ limitations under the License. */
 #ifdef BEAM_SEARCH_OP
 
 #include "operators/kernel/beam_search_kernel.h"
+#include <cmath>
 #include <numeric>
 
 namespace paddle_mobile {
diff --git a/src/operators/math/pooling.h b/src/operators/math/pooling.h
index 0f0b4e2630294aca069883932ad8115b50eb2ed4..909d289767ca1835d397370b9e075762ccc61d94 100644
--- a/src/operators/math/pooling.h
+++ b/src/operators/math/pooling.h
@@ -157,39 +157,34 @@ inline float PoolPost<AVG>(const float &x, const float &post) {
 
 template <PoolingType P>
 struct Pooling {
-  inline void operator()(const framework::Tensor &input,
-                         const std::vector<int> &kernel_size,
-                         const std::vector<int> &strides,
-                         const std::vector<int> &paddings,
-                         framework::Tensor *output);
+  void operator()(const framework::Tensor &input,
+                  const std::vector<int> &kernel_size,
+                  const std::vector<int> &strides,
+                  const std::vector<int> &paddings, framework::Tensor *output);
 };
 
 template <PoolingType P, int Stride>
 struct Pooling2x2 {
-  inline void operator()(const framework::Tensor &input,
-                         const std::vector<int> &paddings,
-                         framework::Tensor *output);
+  void operator()(const framework::Tensor &input,
+                  const std::vector<int> &paddings, framework::Tensor *output);
 };
 
 template <PoolingType P, int Stride>
 struct Pooling3x3 {
-  inline void operator()(const framework::Tensor &input,
-                         const std::vector<int> &paddings,
-                         framework::Tensor *output);
+  void operator()(const framework::Tensor &input,
+                  const std::vector<int> &paddings, framework::Tensor *output);
 };
 
 template <PoolingType P, int Stride>
 struct Pooling5x5 {
-  inline void operator()(const framework::Tensor &input,
-                         const std::vector<int> &paddings,
-                         framework::Tensor *output);
+  void operator()(const framework::Tensor &input,
+                  const std::vector<int> &paddings, framework::Tensor *output);
 };
 
 template <PoolingType P, int Stride>
 struct Pooling7x7 {
-  inline void operator()(const framework::Tensor &input,
-                         const std::vector<int> &paddings,
-                         framework::Tensor *output);
+  void operator()(const framework::Tensor &input,
+                  const std::vector<int> &paddings, framework::Tensor *output);
 };
 
 }  // namespace math
diff --git a/test/net/test_benchmark.cpp b/test/net/test_benchmark.cpp
index 4c9a36dc26371d701b1bc62840b73b2fee295224..0b576561b7f2ee5843a7ba5ebb500a681ce8da0e 100644
--- a/test/net/test_benchmark.cpp
+++ b/test/net/test_benchmark.cpp
@@ -46,7 +46,7 @@ int main(int argc, char* argv[]) {
     std::shared_ptr<paddle_mobile::framework::Tensor> output;
     std::vector<int64_t> dims{1, 3, 224, 224};
     if (feed_shape) {
-      sscanf(feed_shape, "%ld,%ld,%ld,%ld", &dims[0], &dims[1], &dims[2],
+      sscanf(feed_shape, "%lld,%lld,%lld,%lld", &dims[0], &dims[1], &dims[2],
              &dims[3]);
     }
     std::cout << "feed shape: [" << dims[0] << ", " << dims[1] << ", "
diff --git a/test/net/test_googlenet.cpp b/test/net/test_googlenet.cpp
index 24e74ffeaca6c0b27f04109721ffdab61d7fbaee..ea6c6ce1556f0332165f38a7491ca5f0230bac6c 100644
--- a/test/net/test_googlenet.cpp
+++ b/test/net/test_googlenet.cpp
@@ -13,25 +13,31 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include <iostream>
+#include <sstream>
 #include "../test_helper.h"
 #include "../test_include.h"
 
-int main(int argc, char* argv[]) {
-  if (argc < 2) {
-    std::cout << "Usage: ./test_benchmark feed_shape [thread_num] [use_fuse]\n"
-              << "feed_shape: input tensor shape, such as 3,224,224.\n"
-              << "thread_num: optional int, threads count, default is 1.\n"
-              << "use_fuse: optional bool, default is 0.\n";
+int main(int argc, char *argv[]) {
+  if (argc < 4) {
+    std::cout << "Usage: ./test_googlenet fluid-model input-image image-shape "
+                 "[thread-num] [fusion]\n"
+              << " fluid-model: fluid model path. \n"
+              << " input-image: input raw image path. \n"
+              << " image-shape: input tensor shape, such as 1,3,224,224.\n"
+              << " thread-num: optional int, threads count, default is 1.\n"
+              << " fusion: optional bool, default is 0.\n";
     return 1;
   }
   int thread_num = 1;
   bool optimize = false;
-  char* feed_shape = argv[1];
-  if (argc >= 3) {
-    thread_num = atoi(argv[2]);
+  char *fluid_model = argv[1];
+  char *input_img = argv[2];
+  char *feed_shape = argv[3];
+  if (argc >= 5) {
+    thread_num = atoi(argv[4]);
   }
-  if (argc >= 4) {
-    optimize = atoi(argv[3]);
+  if (argc >= 6) {
+    optimize = atoi(argv[5]);
   }
 #ifdef PADDLE_MOBILE_FPGA
   paddle_mobile::PaddleMobile<paddle_mobile::FPGA> paddle_mobile;
@@ -42,18 +48,21 @@ int main(int argc, char* argv[]) {
   paddle_mobile.SetThreadNum(thread_num);
   auto time1 = time();
   std::vector<float> output;
-  if (paddle_mobile.Load(g_googlenet, optimize, false, 1, true)) {
+  if (paddle_mobile.Load(fluid_model, optimize, false, 1, true)) {
     auto time2 = paddle_mobile::time();
     std::cout << "load cost :" << paddle_mobile::time_diff(time1, time2) << "ms"
               << std::endl;
     std::vector<float> input;
     std::vector<int64_t> dims{1, 3, 224, 224};
     if (feed_shape) {
-      sscanf(feed_shape, "%d,%d,%d", &dims[1], &dims[2], &dims[3]);
+      sscanf(feed_shape, "%lld,%lld,%lld,%lld", &dims[0], &dims[1], &dims[2],
+             &dims[3]);
     }
     std::cout << "feed shape: [" << dims[0] << ", " << dims[1] << ", "
               << dims[2] << ", " << dims[3] << "]" << std::endl;
-    GetInput<float>(g_test_image_1x3x224x224, &input, dims);
+
+    GetInput<float>(input_img, &input, dims);
+
     // warmup
     for (int i = 0; i < 10; ++i) {
       output = paddle_mobile.Predict(input, dims);
@@ -64,6 +73,13 @@ int main(int argc, char* argv[]) {
     }
     auto time4 = time();
     std::cout << "predict cost: " << time_diff(time3, time4) / 10 << "ms\n";
+
+    std::ostringstream os;
+    os << output[0];
+    for (int i = 1; i < output.size(); ++i) {
+      os << ", " << output[i];
+    }
+    DLOG << os.str();
   }
   return 0;
 }
diff --git a/tools/build.sh b/tools/build.sh
index cac18c79d7bf5c783d85dd92e0833af17bbd2bc7..2134abdd9ee0ae3ac32fcc2030e32032bcdab48b 100755
--- a/tools/build.sh
+++ b/tools/build.sh
@@ -118,7 +118,8 @@ build_for_arm_linux() {
     fi
 
     cd "../build/release/arm-linux"
-    make -j 8
+    make -j 2
+
     cd "../../../test/"
     DIRECTORY="models"
     if [ "`ls -A $DIRECTORY`" = "" ]; then
diff --git a/tools/ci_build.sh b/tools/ci_build.sh
index 424dc1890f1b7c04863701b1d219e59a4eccb438..8bd892c22d26b3c7f4b4bccf60689abf5f42cc16 100755
--- a/tools/ci_build.sh
+++ b/tools/ci_build.sh
@@ -75,7 +75,6 @@ function build_android_armv7_cpu_only() {
     -DANDROID=true \
     -DWITH_LOGGING=OFF \
     -DCPU=ON \
-    -DGPU_MALI=OFF \
     -DGPU_CL=OFF \
     -DFPGA=OFF
 
@@ -95,7 +94,6 @@ function build_android_armv7_gpu() {
     -DANDROID=true \
     -DWITH_LOGGING=OFF \
     -DCPU=ON \
-    -DGPU_MALI=ON \
     -DGPU_CL=ON \
     -DFPGA=OFF
 
@@ -115,7 +113,6 @@ function build_android_armv8_cpu_only() {
     -DANDROID=true \
     -DWITH_LOGGING=OFF \
     -DCPU=ON \
-    -DGPU_MALI=OFF \
     -DGPU_CL=OFF \
     -DFPGA=OFF
 
@@ -135,7 +132,6 @@ function build_android_armv8_gpu() {
     -DANDROID=true \
     -DWITH_LOGGING=OFF \
     -DCPU=ON \
-    -DGPU_MALI=ON \
     -DGPU_CL=ON \
     -DFPGA=OFF
 
@@ -154,7 +150,6 @@ function build_ios_armv8_cpu_only() {
     -DIS_IOS=true \
     -DUSE_OPENMP=OFF \
     -DCPU=ON \
-    -DGPU_MALI=OFF \
     -DGPU_CL=OFF \
     -DFPGA=OFF
 
@@ -173,7 +168,6 @@ function build_ios_armv8_gpu() {
     -DIS_IOS=true \
     -DUSE_OPENMP=OFF \
     -DCPU=ON \
-    -DGPU_MALI=OFF \
     -DGPU_CL=ON \
     -DFPGA=OFF
 
@@ -188,7 +182,6 @@ function build_linux_armv7_cpu_only() {
     -DCMAKE_BUILD_TYPE="MinSizeRel" \
     -DCMAKE_TOOLCHAIN_FILE="./tools/toolchains/arm-linux-gnueabihf.cmake" \
     -DCPU=ON \
-    -DGPU_MALI=OFF \
     -DGPU_CL=OFF \
     -DFPGA=OFF
 
@@ -203,7 +196,6 @@ function build_linux_armv7_gpu() {
     -DCMAKE_BUILD_TYPE="MinSizeRel" \
     -DCMAKE_TOOLCHAIN_FILE="./tools/toolchains/arm-linux-gnueabihf.cmake" \
     -DCPU=ON \
-    -DGPU_MALI=ON \
     -DGPU_CL=ON \
     -DFPGA=OFF
 
diff --git a/tools/toolchains/arm-linux-gnueabihf.cmake b/tools/toolchains/arm-linux-gnueabihf.cmake
index 7db42c7e73f4cfabce670bb2bc691e4b5bd314a2..2b8729cd9db05f34959e936d8e0c1e2bdc529338 100644
--- a/tools/toolchains/arm-linux-gnueabihf.cmake
+++ b/tools/toolchains/arm-linux-gnueabihf.cmake
@@ -4,8 +4,7 @@ set(CMAKE_SYSTEM_NAME Linux)
 set(CMAKE_SYSTEM_PROCESSOR arm)
 set(CMAKE_SYSTEM_VERSION 1)
 
-message("if U build on platform . this is right.")
 set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
 set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
 set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
-set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
\ No newline at end of file
+set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)