refine tests. test=develop

eb2f7ed2 · dzhwinter · 0a180584 · eb2f7ed2 · eb2f7ed2 · eb2f7ed2
11 changed file
--- a/cmake/external/threadpool.cmake
+++ b/cmake/external/threadpool.cmake
@@ -3,7 +3,6 @@ INCLUDE(ExternalProject)
 SET(THREADPOOL_SOURCE_DIR ${THIRD_PARTY_PATH}/threadpool)
 SET(THREADPOOL_INCLUDE_DIR ${THREADPOOL_SOURCE_DIR}/src/extern_threadpool)
 INCLUDE_DIRECTORIES(${THREADPOOL_INCLUDE_DIR})
-message("Debug" ${THREADPOOL_INCLUDE_DIR})

 ExternalProject_Add(
    extern_threadpool

--- a/paddle/fluid/framework/data_type.h
+++ b/paddle/fluid/framework/data_type.h
@@ -25,7 +25,6 @@ namespace framework {
 extern proto::VarType::Type ToDataType(std::type_index type);
 extern std::type_index ToTypeIndex(proto::VarType::Type type);

-#if !defined(_MSC_VER)
 template <typename Visitor>
 inline void VisitDataType(proto::VarType::Type type, Visitor visitor) {
  switch (type) {
@@ -60,46 +59,6 @@ inline void VisitDataType(proto::VarType::Type type, Visitor visitor) {
      PADDLE_THROW("Not supported %d", type);
  }
 }
-#else
-// the msvc compiler do not implement two-stage name lookup correctly.
-template <typename Visitor>
-inline void VisitDataType(proto::VarType::Type type, Visitor visitor) {
-  switch (type) {
-    case proto::VarType::FP16:
-      visitor.template apply<platform::float16>();
-      break;
-    case proto::VarType::FP32:
-      visitor.template apply<float>();
-      break;
-    case proto::VarType::FP64:
-      visitor.template apply<double>();
-      break;
-    case proto::VarType::INT32:
-      visitor.template apply<int>();
-      break;
-    case proto::VarType::INT64:
-      visitor.template apply<int64_t>();
-      break;
-    case proto::VarType::BOOL:
-      visitor.template apply<bool>();
-      break;
-    case proto::VarType::UINT8:
-      visitor.template apply<uint8_t>();
-      break;
-    case proto::VarType::INT16:
-      visitor.template apply<int16_t>();
-      break;
-    default:
-      PADDLE_THROW("Not supported %d", type);
-  }
-}
-
-template <typename InT>
-void* AnyCast(const InT* t) {
-  return static_cast<void*>(const_cast<InT*>(t));
-}
-
-#endif  // _WIN32

 extern std::string DataTypeToString(const proto::VarType::Type type);
 extern size_t SizeOfType(std::type_index type);

--- a/paddle/fluid/framework/executor.cc
+++ b/paddle/fluid/framework/executor.cc
@@ -337,7 +337,6 @@ std::unique_ptr<ExecutorPrepareContext> Executor::Prepare(
      new ExecutorPrepareContext(program, block_id));
  PADDLE_ENFORCE_LT(static_cast<size_t>(block_id), program.Size());
  auto& block = program.Block(block_id);
-  int counter = 0;
  for (auto& op_desc : block.AllOps()) {
    ctx->ops_.push_back(OpRegistry::CreateOp(*op_desc));
  }

--- a/paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
@@ -11,10 +11,9 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include <string>

-#include <string>
 #include "paddle/fluid/framework/ir/attention_lstm_fuse_pass.h"
+#include <string>
 #include "paddle/fluid/framework/ir/graph_pattern_detector.h"
 #include "paddle/fluid/framework/ir/graph_viz_pass.h"
 #include "paddle/fluid/framework/lod_tensor.h"
@@ -212,12 +211,12 @@ void PrepareLSTMWeight(const LoDTensor& W_forget_w0,
  VLOG(3) << "LSTMWeight resized to " << out->dims();

  float* out_data = out->mutable_data<float>(platform::CPUPlace());
-  std::array<const float*, 4> tensors = {
-      W_forget_w0.data<float>(), W_input_w0.data<float>(),
-      W_output_w0.data<float>(), W_cell_w0.data<float>()};
-  std::array<const float*, 4> tensors1 = {
-      W_forget_w1.data<float>(), W_input_w1.data<float>(),
-      W_output_w1.data<float>(), W_cell_w1.data<float>()};
+  std::array<const float*, 4> tensors(
+      {{W_forget_w0.data<float>(), W_input_w0.data<float>(),
+        W_output_w0.data<float>(), W_cell_w0.data<float>()}});
+  std::array<const float*, 4> tensors1(
+      {{W_forget_w1.data<float>(), W_input_w1.data<float>(),
+        W_output_w1.data<float>(), W_cell_w1.data<float>()}});

  for (int row = 0; row < D; row++) {
    for (int col = 0; col < 4; col++) {
@@ -239,9 +238,9 @@ void PrepareLSTMWeight(const LoDTensor& W_forget_w0,
 void PrepareLSTMBias(const LoDTensor& B_forget, const LoDTensor& B_input,
                     const LoDTensor& B_output, const LoDTensor& B_cell,
                     LoDTensor* out) {
-  std::array<const float*, 4> tensors = {
-      B_forget.data<float>(), B_input.data<float>(), B_output.data<float>(),
-      B_cell.data<float>()};
+  std::array<const float*, 4> tensors(
+      {{B_forget.data<float>(), B_input.data<float>(), B_output.data<float>(),
+        B_cell.data<float>()}});

  PADDLE_ENFORCE_EQ(B_forget.dims().size(), 1);
  int D = B_forget.dims()[0];

--- a/paddle/fluid/inference/api/api_impl.cc
+++ b/paddle/fluid/inference/api/api_impl.cc
@@ -94,8 +94,6 @@ bool NativePaddlePredictor::Init(
    // All parameters are saved in a single file.
    // The file names should be consistent with that used
    // in Python API `fluid.io.save_inference_model`.
-    auto exe = executor_.get();
-    auto sc = scope_.get();
    inference_program_ = paddle::inference::Load(
        executor_.get(), scope_.get(), config_.prog_file, config_.param_file);


--- a/paddle/fluid/inference/api/demo_ci/CMakeLists.txt
+++ b/paddle/fluid/inference/api/demo_ci/CMakeLists.txt
@@ -6,13 +6,13 @@ option(WITH_STATIC_LIB "Compile demo with static/shared library, default use sta
 option(USE_TENSORRT "Compile demo with TensorRT."   OFF)

 macro(safe_set_static_flag)
-    foreach(flag_var
-        CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
-        CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
-      if(${flag_var} MATCHES "/MD")
-        string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
-      endif(${flag_var} MATCHES "/MD")
-    endforeach(flag_var)
+  foreach(flag_var
+      CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
+      CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
+    if(${flag_var} MATCHES "/MD")
+      string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
+    endif(${flag_var} MATCHES "/MD")
+  endforeach(flag_var)
 endmacro()

 if (WIN32)
@@ -42,7 +42,7 @@ if(WITH_GPU) # default gpu path
    set(CUDA_LIB "/usr/local/cuda/lib64/" CACHE STRING "CUDA Library")
  else()
    if(CUDA_LIB STREQUAL "")
-    set(CUDA_LIB "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v8.0\\lib\\x64")
+      set(CUDA_LIB "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v8.0\\lib\\x64")
    endif()
  endif(NOT WIN32)
 endif()
@@ -53,9 +53,9 @@ include_directories("${PADDLE_LIB}/third_party/install/glog/include")
 include_directories("${PADDLE_LIB}/third_party/install/gflags/include")
 include_directories("${PADDLE_LIB}/third_party/install/xxhash/include")
 if (NOT WIN32)
-include_directories("${PADDLE_LIB}/third_party/install/snappy/include")
-include_directories("${PADDLE_LIB}/third_party/install/snappystream/include")
-include_directories("${PADDLE_LIB}/third_party/install/zlib/include")
+  include_directories("${PADDLE_LIB}/third_party/install/snappy/include")
+  include_directories("${PADDLE_LIB}/third_party/install/snappystream/include")
+  include_directories("${PADDLE_LIB}/third_party/install/zlib/include")
 endif(NOT WIN32)

 include_directories("${PADDLE_LIB}/third_party/boost")
@@ -63,15 +63,15 @@ include_directories("${PADDLE_LIB}/third_party/eigen3")

 if (NOT WIN32)
  if (USE_TENSORRT AND WITH_GPU)
-      include_directories("${TENSORRT_INCLUDE_DIR}")
-      link_directories("${TENSORRT_LIB_DIR}")
+    include_directories("${TENSORRT_INCLUDE_DIR}")
+    link_directories("${TENSORRT_LIB_DIR}")
  endif()
 endif(NOT WIN32)

 if (NOT WIN32)
-link_directories("${PADDLE_LIB}/third_party/install/snappy/lib")
-link_directories("${PADDLE_LIB}/third_party/install/snappystream/lib")
-link_directories("${PADDLE_LIB}/third_party/install/zlib/lib")
+  link_directories("${PADDLE_LIB}/third_party/install/snappy/lib")
+  link_directories("${PADDLE_LIB}/third_party/install/snappystream/lib")
+  link_directories("${PADDLE_LIB}/third_party/install/zlib/lib")
 endif(NOT WIN32)

 link_directories("${PADDLE_LIB}/third_party/install/protobuf/lib")
@@ -80,18 +80,12 @@ link_directories("${PADDLE_LIB}/third_party/install/gflags/lib")
 link_directories("${PADDLE_LIB}/third_party/install/xxhash/lib")
 link_directories("${PADDLE_LIB}/paddle/lib")

-# add_executable(${DEMO_NAME} ${DEMO_NAME}.cc)
- # add_library(${DEMO_NAME} ${DEMO_NAME}.cc)
-add_executable(real_data_icnet_tester real_data_icnet_tester.cc)
-
-# add_library(${DEMO_NAME} SHARED  ${DEMO_NAME}.cc)
-# add_executable(test test.cc)
-add_executable(thread_icnet_test thread_icnet_test.cc)
+add_executable(${DEMO_NAME} ${DEMO_NAME}.cc)

 if(WITH_MKL)
  include_directories("${PADDLE_LIB}/third_party/install/mklml/include")
  set(MATH_LIB ${PADDLE_LIB}/third_party/install/mklml/lib/libmklml_intel${CMAKE_SHARED_LIBRARY_SUFFIX}
-               ${PADDLE_LIB}/third_party/install/mklml/lib/libiomp5${CMAKE_SHARED_LIBRARY_SUFFIX})
+    ${PADDLE_LIB}/third_party/install/mklml/lib/libiomp5${CMAKE_SHARED_LIBRARY_SUFFIX})
  set(MKLDNN_PATH "${PADDLE_LIB}/third_party/install/mkldnn")
  if(EXISTS ${MKLDNN_PATH})
    include_directories("${MKLDNN_PATH}/include")
@@ -104,25 +98,25 @@ endif()
 # Note: libpaddle_inference_api.so/a must put before libpaddle_fluid.so/a
 if(WITH_STATIC_LIB)
  set(DEPS
-	  ${PADDLE_LIB}/paddle/fluid/inference/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX})
+    ${PADDLE_LIB}/paddle/lib/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX})
 else()
  set(DEPS
-      ${PADDLE_LIB}/paddle/fluid/inference/libpaddle_fluid${CMAKE_SHARED_LIBRARY_SUFFIX})
+    ${PADDLE_LIB}/paddle/lib/libpaddle_fluid${CMAKE_SHARED_LIBRARY_SUFFIX})
 endif()

 if (NOT WIN32)
-set(EXTERNAL_LIB "-lrt -ldl -lpthread")
-set(DEPS ${DEPS}
+  set(EXTERNAL_LIB "-lrt -ldl -lpthread")
+  set(DEPS ${DEPS}
    ${MATH_LIB} ${MKLDNN_LIB}
    glog gflags protobuf snappystream snappy z xxhash
    ${EXTERNAL_LIB})
 else()
-set(DEPS ${DEPS}
+  set(DEPS ${DEPS}
    ${MATH_LIB} ${MKLDNN_LIB}
    ${CMAKE_STATIC_LIBRARY_PREFIX}glog  ${CMAKE_STATIC_LIBRARY_PREFIX}gflags  ${CMAKE_STATIC_LIBRARY_PREFIX}protobuf
    ${EXTERNAL_LIB})
-# NOTE(dzhwinter) shlwapi is deprecated.
-set(DEPS ${DEPS} libcmt shlwapi)
+  # NOTE(dzhwinter) shlwapi will be deprecated.
+  set(DEPS ${DEPS} libcmt shlwapi)
 endif(NOT WIN32)

 if(WITH_GPU)
@@ -134,14 +128,9 @@ if(WITH_GPU)
    set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX})
  else()
    set(DEPS ${DEPS} ${CUDA_LIB}/cudart${CMAKE_STATIC_LIBRARY_SUFFIX} )
-  set(DEPS ${DEPS} ${CUDA_LIB}/cublas${CMAKE_STATIC_LIBRARY_SUFFIX} )
-  set(DEPS ${DEPS} ${CUDA_LIB}/cudnn${CMAKE_STATIC_LIBRARY_SUFFIX} )
+    set(DEPS ${DEPS} ${CUDA_LIB}/cublas${CMAKE_STATIC_LIBRARY_SUFFIX} )
+    set(DEPS ${DEPS} ${CUDA_LIB}/cudnn${CMAKE_STATIC_LIBRARY_SUFFIX} )
  endif()
 endif()

-target_link_libraries(real_data_icnet_tester ${DEPS})
-
-# target_link_libraries(${DEMO_NAME} ${DEPS})
-# target_link_libraries(test ${DEMO_NAME} )
-target_link_libraries(thread_icnet_test ${DEPS})
-# target_compile_definitions(${DEMO_NAME} PRIVATE "API_DEFINITION")
+target_link_libraries(${DEMO_NAME} ${DEPS})
--- a/paddle/fluid/inference/api/demo_ci/inference_icnet.cc
+++ b/paddle/fluid/inference/api/demo_ci/inference_icnet.cc
@@ -11,152 +11,89 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-
-#include <cassert>
-#include <chrono>
-#include <iostream>
+#define GOOGLE_GLOG_DLL_DECL
+#include <gflags/gflags.h>
+#include <glog/logging.h>
+#include <chrono>  // NOLINT
 #include <fstream>
-#include <algorithm>
-#include <vector>
-#include <string>
-#include <memory>
-
-#include "paddle/fluid/inference/api/paddle_inference_api.h"
-#include "inference_icnet.h"
-
-// 数据格式
-// "<space splitted floats as data>\t<space splitted ints as shape"
-// 1. 存储为float32格式。
-// 2. 必须减去均值。 CHW三个通道为 mean = 112.15, 109.41, 185.42
-using namespace paddle;
-
-class Predictor {
-private:
-	std::unique_ptr<PaddlePredictor> predictor;
-	struct Record
-	{
-		std::vector<float> data;
-		std::vector<int32_t> shape;
-	};
-
-	const int C = 3; // image channel
-	const int H = 449; // image height
-	const int W = 581; // image width
-
-	using Time = decltype(std::chrono::high_resolution_clock::now());
-
-	Time time() { return std::chrono::high_resolution_clock::now(); };
-
-	double time_diff(Time t1, Time t2) {
-		typedef std::chrono::microseconds ms;
-		auto diff = t2 - t1;
-		ms counter = std::chrono::duration_cast<ms>(diff);
-		return counter.count() / 1000.0;
-	}
-
-	static void split(const std::string& str, char sep,
-		std::vector<std::string>* pieces) {
-		pieces->clear();
-		if (str.empty()) {
-			return;
-		}
-		size_t pos = 0;
-		size_t next = str.find(sep, pos);
-		while (next != std::string::npos) {
-			pieces->push_back(str.substr(pos, next - pos));
-			pos = next + 1;
-			next = str.find(sep, pos);
-		}
-		if (!str.substr(pos).empty()) {
-			pieces->push_back(str.substr(pos));
-		}
-	}
-
-	Record ProcessALine(const std::string& line) {
-		std::vector<std::string> columns;
-		split(line, '\t', &columns);
-
-		Record record;
-		std::vector<std::string> data_strs;
-		split(columns[0], ' ', &data_strs);
-		for (auto& d : data_strs) {
-			record.data.push_back(std::stof(d));
-		}
-
-		std::vector<std::string> shape_strs;
-		split(columns[1], ' ', &shape_strs);
-		for (auto& s : shape_strs) {
-			record.shape.push_back(std::stoi(s));
-		}
-		return record;
-	}
-
-public:
-	Predictor (const char* prog_file,
-		const char* param_file, const float fraction_of_gpu_memory,
-		const bool use_gpu, const int device) {
-
-		NativeConfig config;
-		config.prog_file = prog_file;
-		config.param_file = param_file;
-		config.fraction_of_gpu_memory = fraction_of_gpu_memory;
-		config.use_gpu = use_gpu;
-		config.device = device;
-
-		predictor = CreatePaddlePredictor<NativeConfig>(config);
-	}
-
-	void predict(float* input, const int channel, const int height, const int width, 
-		int64_t** output, int* output_length, int batch_size) {
-		std::vector<float> data;
-		int intput_length = channel * height * width * batch_size;
-		for (int i = 0; i < intput_length; i++) {
-			data.push_back(*((float*)input + i));
-		}
-
-		// initialize the input data 
-		PaddleTensor tensor;
-		tensor.shape = std::vector<int>({ batch_size, channel, height, width });
-		tensor.data.Resize(sizeof(float) * batch_size * channel * height * width);
-		std::copy(data.begin(), data.end(), static_cast<float*>(tensor.data.data()));
-
-		tensor.dtype = PaddleDType::FLOAT32;
-		std::vector<PaddleTensor> paddle_tensor_feeds(1, tensor);
-
-		// initialize the output data
-		PaddleTensor tensor_out;
-		std::vector<PaddleTensor> outputs(1, tensor_out);
-		predictor->Run(paddle_tensor_feeds, &outputs, batch_size);
-		*output_length = (int)outputs[0].data.length();
-		std::memcpy(static_cast<void *>(*output), outputs[0].data.data(), outputs[0].data.length());
-		int64_t sum_out = 0;
-		for(int i=0; i < outputs[0].data.length()/sizeof(int64_t); ++i) {
-			int64_t item = static_cast<int64_t*>(outputs[0].data.data())[i];
-			sum_out += item;
-			if (item != 0) {
-				std::cout << item << std::endl;
-			}
-		}
+#include <iostream>
+#include <thread>  // NOLINT
+#include <utility>
+#include "paddle/fluid/inference/paddle_inference_api.h"
+
+namespace paddle {
+
+NativeConfig GetConfig() {
+  NativeConfig config;
+  config.prog_file = "hs_lb_without_bn_cudnn/__model__";
+  config.param_file = "hs_lb_without_bn_cudnn/__params__";
+  config.fraction_of_gpu_memory = 0.0;
+  config.use_gpu = true;
+  config.device = 0;
+  return config;
+}

-		std::cout << "sum_out" << sum_out << std::endl;
-	}
-};
+using Time = decltype(std::chrono::high_resolution_clock::now());
+Time TimeNow() { return std::chrono::high_resolution_clock::now(); }
+double TimeDiff(Time t1, Time t2) {
+  typedef std::chrono::microseconds ms;
+  auto diff = t2 - t1;
+  ms counter = std::chrono::duration_cast<ms>(diff);
+  return counter.count() / 1000.0;
+}

-API_REFERENCE void * init_predictor(const char* prog_file,
-	const char* param_file, const float fraction_of_gpu_memory,
-	const bool use_gpu, const int device) {
-	return new Predictor(prog_file, param_file, fraction_of_gpu_memory, use_gpu, device);
+std::vector<PaddleTensor> PrepareData() {
+  int height = 449;
+  int width = 581;
+  std::vector<float> data;
+  for (int i = 0; i < 3 * height * width; ++i) {
+    data.push_back(0.0);
+  }
+  PaddleTensor tensor;
+  tensor.shape = std::vector<int>({batch_size, 3, height, width});
+  tensor.data.Resize(sizeof(float) * batch_size * 3 * height * width);
+  std::copy(data.begin(), data.end(), static_cast<float*>(tensor.data.data()));
+  tensor.dtype = PaddleDType::FLOAT32;
+  std::vector<PaddleTensor> paddle_tensor_feeds(1, tensor);
+  return std::move(paddle_tensor_feeds);
 }

-API_REFERENCE void predict(void* handle, float* input, const int channel, const int height, const int width, 
-	int64_t** output, int* output_length, int batch_size) {
-	assert(handle != nullptr);
-	((Predictor*)handle)->predict(input, channel, height, width, output, output_length, batch_size);
+void TestNaive(int batch_size, int thread_num) {
+  NativeConfig config = GetConfig();
+
+  int num_jobs = thread_num;   // parallel jobs.
+  constexpr int epoches = 10;  // each job run epoches.
+  std::vector<std::thread> threads;
+  std::vector<std::unique_ptr<PaddlePredictor>> predictors;
+  for (int tid = 0; tid < num_jobs; ++tid) {
+    auto& pred = CreatePaddlePredictor<NativeConfig>(config);
+    predictors.emplace_back(std::move(pred));
+  }
+
+  auto time1 = TimeNow();
+  for (int tid = 0; tid < num_jobs; ++tid) {
+    threads.emplace_back([&, tid]() {
+      auto& predictor = predictors[tid];
+      PaddleTensor tensor_out;
+      std::vector<PaddleTensor> outputs(1, tensor_out);
+      for (size_t i = 0; i < epoches; i++) {
+        ASSERT_TRUE(predictor->Run(paddle_tensor_feeds, &outputs));
+        VLOG(3) << "tid : " << tid << " run: " << i << "finished";
+        ASSERT_EQ(outputs.size(), 1UL);
+      }
+    });
+  }
+  for (int i = 0; i < num_jobs; ++i) {
+    threads[i].join();
+  }
+  auto time2 = TimeNow();
+  VLOG(3) << "Thread num " << thread_num << "total time cost"
+          << (time2 - time1);
 }
+}  // namespace paddle

-API_REFERENCE void destory_predictor(void *handle) {
-	if (handle) {
-		delete handle;
-		handle = nullptr;
-	}
+int main(int argc, char** argv) {
+  paddle::TestNaive(1, 1);  // single thread.
+  paddle::TestNaive(1, 5);  // 5 threads.
+  return 0;
 }
--- a/paddle/fluid/inference/api/demo_ci/inference_icnet.h
+++ b/paddle/fluid/inference/api/demo_ci/inference_icnet.h
-
-#ifdef _WIN32
-#ifdef inference_icnet_EXPORTS
-#define API_REFERENCE extern "C" __declspec(dllexport) 
-#else
-#define API_REFERENCE extern "C" __declspec(dllimport) 
-#endif
-#else
-#define API_REFERENCE
-#endif
-
-//API_REFERENCE void * init_predictor();
-//API_REFERENCE void destory_predictor(void *handle);
-//API_REFERENCE void predict(void *handle, int n);
-
-API_REFERENCE void * init_predictor(const char* prog_file,
-	const char* param_file, const float fraction_of_gpu_memory,
-	const bool use_gpu, const int device);
-API_REFERENCE void predict(void* handle, float* input, const int channel, const int height,
-	const int width, int64_t** output, int* output_length, int batch_size);
-API_REFERENCE void destory_predictor(void *handle);
--- a/paddle/fluid/inference/api/demo_ci/real_data_icnet_tester.cc
+++ b/paddle/fluid/inference/api/demo_ci/real_data_icnet_tester.cc
-// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#define GOOGLE_GLOG_DLL_DECL
-#include <gflags/gflags.h>
-#include <glog/logging.h>
-#include <chrono>
-#include <fstream>
-#include <iostream>
-#include "paddle/fluid/inference/paddle_inference_api.h"
-
-namespace paddle {
-NativeConfig GetConfig() {
-  NativeConfig config;
-
-  // config.model_dir = FLAGS_dirname;
-  config.prog_file = "hs_lb_without_bn_cudnn/__model__";
-  config.param_file = "hs_lb_without_bn_cudnn/__params__";
-  // config.prog_file = "hs_lb_without_bn_cuda/__model__";
-  // config.param_file = "hs_lb_without_bn_cuda/__params__";
-  config.fraction_of_gpu_memory = 0.0;
-  config.use_gpu = true;
-  config.device = 0;
-  return config;
-}
-
-using Time = decltype(std::chrono::high_resolution_clock::now());
-Time time() { return std::chrono::high_resolution_clock::now(); };
-double time_diff(Time t1, Time t2) {
-  typedef std::chrono::microseconds ms;
-  auto diff = t2 - t1;
-  ms counter = std::chrono::duration_cast<ms>(diff);
-  return counter.count() / 1000.0;
-}
-
-void test_naive(int batch_size) {
-  NativeConfig config = GetConfig();
-  auto predictor = CreatePaddlePredictor<NativeConfig>(config);
-  int height = 449;
-  int width = 581;
-
-  // =============read file list =============
-  std::ifstream infile("new_file.list");
-  std::string temp_s;
-  std::vector<std::string> all_files;
-  while (!infile.eof()) {
-    infile >> temp_s;
-    all_files.push_back(temp_s);
-  }
-
-  // size_t file_num = all_files.size();
-  infile.close();
-  // =============read file list =============
-  for (size_t f_k = 0; f_k < 1; f_k++) {
-    std::ifstream in_img(all_files[f_k]);
-    std::cout << all_files[f_k] << std::endl;
-    float temp_v;
-
-    float sum_n = 0.0;
-    std::vector<float> data;
-    while (!in_img.eof()) {
-      in_img >> temp_v;
-      data.push_back(float(temp_v));
-      // std::cout << temp_v << " ";
-      sum_n += temp_v;
-    }
-
-    in_img.close();
-    std::cout << "sum: " << sum_n << std::endl;
-
-    PaddleTensor tensor;
-    tensor.shape = std::vector<int>({batch_size, 3, height, width});
-    tensor.data.Resize(sizeof(float) * batch_size * 3 * height * width);
-    std::copy(data.begin(), data.end(),
-              static_cast<float*>(tensor.data.data()));
-    tensor.dtype = PaddleDType::FLOAT32;
-    std::vector<PaddleTensor> paddle_tensor_feeds(1, tensor);
-    PaddleTensor tensor_out;
-
-    std::vector<PaddleTensor> outputs(1, tensor_out);
-    // predictor->Run(paddle_tensor_feeds, &outputs, batch_size);
-    std::cout << "start predict123:" << std::endl;
-    auto time1 = time();
-    int steps = 100;
-    for (size_t i = 0; i < steps; i++) {
-      if (i == 5) time1 = time();
-      predictor->Run(paddle_tensor_feeds, &outputs, batch_size);
-    }
-
-    auto time2 = time();
-    std::ofstream ofresult("naive_test_result.txt", std::ios::app);
-
-    std::cout << "batch: " << batch_size
-              << " predict cost: " << time_diff(time1, time2) / steps << "ms"
-              << std::endl;
-    std::cout << outputs.size() << std::endl;
-    int64_t* data_o = static_cast<int64_t*>(outputs[0].data.data());
-    int64_t sum_out = 0;
-    for (size_t j = 0; j < outputs[0].data.length() / sizeof(int64_t); ++j) {
-      ofresult << std::to_string(data_o[j]) << " ";
-      sum_out += data_o[j];
-    }
-    std::cout << "sum_out " << sum_out << std::endl;
-    ofresult << std::endl;
-    ofresult.close();
-  }
-}
-
-}  // namespace paddle
-
-int main(int argc, char** argv) {
-  //  google::ParseCommandLineFlags(&argc, &argv, true);
-  paddle::test_naive(1 << 0);
-  return 0;
-}
--- a/paddle/fluid/inference/api/demo_ci/thread_icnet_test.cc
+++ b/paddle/fluid/inference/api/demo_ci/thread_icnet_test.cc
-// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#define GOOGLE_GLOG_DLL_DECL
-
-#include <gflags/gflags.h>
-#include <glog/logging.h>
-//#include <gtest/gtest.h>
-#include <chrono>
-#include <fstream>
-#include <iostream>
-#include <thread>  // NOLINT
-#include <utility>
-#include "paddle/fluid/inference/api/paddle_inference_api.h"
-
-#define ASSERT_TRUE(x) x
-#define ASSERT_EQ(x, y) assert(x == y)
-
-// DEFINE_string(dirname, "./LB_icnet_model",
-//               "Directory of the inference model.");
-namespace paddle {
-NativeConfig GetConfig() {
-  NativeConfig config;
-  config.prog_file = "./hs_lb_without_bn_cuda/__model__";
-  config.param_file = "./hs_lb_without_bn_cuda/__params__";
-  config.fraction_of_gpu_memory = 0.0;
-  config.use_gpu = true;
-  config.device = 0;
-  return config;
-}
-
-using Time = decltype(std::chrono::high_resolution_clock::now());
-Time time() { return std::chrono::high_resolution_clock::now(); };
-double time_diff(Time t1, Time t2) {
-  typedef std::chrono::microseconds ms;
-  auto diff = t2 - t1;
-  ms counter = std::chrono::duration_cast<ms>(diff);
-  return counter.count() / 1000.0;
-}
-
-void test_naive(int batch_size, std::string model_path) {
-  NativeConfig config = GetConfig();
-  int height = 449;
-  int width = 581;
-  std::vector<float> data;
-  for (int i = 0; i < 3 * height * width; ++i) {
-    data.push_back(0.0);
-  }
-
-  // read data
-  // std::ifstream infile("new_file.list");
-  // std::string temp_s;
-  // std::vector<std::string> all_files;
-  // while (!infile.eof()) {
-  //   infile >> temp_s;
-  //   all_files.push_back(temp_s);
-  // }
-
-  // // size_t file_num = all_files.size();
-  // infile.close();
-  // // =============read file list =============
-  // for (size_t f_k = 0; f_k < 1; f_k++) {
-  //   std::ifstream in_img(all_files[f_k]);
-  //   std::cout << all_files[f_k] << std::endl;
-  //   float temp_v;
-
-  //   float sum_n = 0.0;
-  //   std::vector<float> data;
-  //   while (!in_img.eof()) {
-  //     in_img >> temp_v;
-  //     data.push_back(float(temp_v));
-
-  //     sum_n += temp_v;
-  //   }
-  //   in_img.close();
-  //   std::cout << "sum: " << sum_n << std::endl;
-
-  PaddleTensor tensor;
-  tensor.shape = std::vector<int>({batch_size, 3, height, width});
-  tensor.data.Resize(sizeof(float) * batch_size * 3 * height * width);
-  std::copy(data.begin(), data.end(), static_cast<float*>(tensor.data.data()));
-  tensor.dtype = PaddleDType::FLOAT32;
-  std::vector<PaddleTensor> paddle_tensor_feeds(1, tensor);
-
-  constexpr int num_jobs = 5;  // each job run 1 batch
-  std::vector<std::thread> threads;
-  // using PtrPred = std::vector<std::unique_ptr<PaddlePredictor>>;
-  std::vector<std::unique_ptr<PaddlePredictor>> predictors;
-  for (int tid = 0; tid < num_jobs; ++tid) {
-    auto& pred = CreatePaddlePredictor<NativeConfig>(config);
-    predictors.emplace_back(std::move(pred));
-  }
-
-  using namespace std::chrono_literals;
-  // std::this_thread::sleep_for(std::chrono::seconds(20));
-  std::cout << "before start predict";
-
-  int epoches = 100000;
-  for (int tid = 0; tid < num_jobs; ++tid) {
-    threads.emplace_back([&, tid]() {
-      // auto predictor = CreatePaddlePredictor<NativeConfig>(config);
-      auto& predictor = predictors[tid];
-      // auto& predictor = predictors[tid];
-      // auto predictor = preds[tid];
-      // std::this_thread::sleep_for(std::chrono::seconds(20));
-      PaddleTensor tensor_out;
-      std::vector<PaddleTensor> outputs(1, tensor_out);
-      for (size_t i = 0; i < epoches; i++) {
-        ASSERT_TRUE(predictor->Run(paddle_tensor_feeds, &outputs));
-        VLOG(0) << "tid : " << tid << " run: " << i << "finished";
-        // std::cout <<"tid : " << tid << " run: " << i << "finished" <<
-        // std::endl;
-        ASSERT_EQ(outputs.size(), 1UL);
-        // int64_t* data_o = static_cast<int64_t*>(outputs[0].data.data());
-        // int64_t sum_out = 0;
-        // for (size_t j = 0; j < outputs[0].data.length() / sizeof(int64_t);
-        //      ++j) {
-        //   sum_out += data_o[j];
-        // }
-        // std::cout << "tid : " << tid << "pass : " << i << " " << sum_out
-        //           << std::endl;
-      }
-    });
-  }
-  for (int i = 0; i < num_jobs; ++i) {
-    threads[i].join();
-  }
-}
-// }
-}  // namespace paddle
-
-int main(int argc, char** argv) {
-  paddle::test_naive(1 << 0, "");
-  return 0;
-}
--- a/paddle/fluid/operators/conv_op.cc
+++ b/paddle/fluid/operators/conv_op.cc
@@ -11,9 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-#define GLOG_NO_ABBREVIATED_SEVERITIES
-#define GOOGLE_GLOG_DLL_DECL
-#include <glog/logging.h>

 #include "paddle/fluid/operators/conv_op.h"

@@ -38,7 +35,6 @@ void ConvOp::InferShape(framework::InferShapeContext* ctx) const {
  PADDLE_ENFORCE(ctx->HasOutput("Output"),
                 "Output(Output) of ConvOp should not be null.");

-  VLOG(3) << "Conv op infershape";
  auto in_dims = ctx->GetInputDim("Input");
  auto filter_dims = ctx->GetInputDim("Filter");

@@ -46,51 +42,32 @@ void ConvOp::InferShape(framework::InferShapeContext* ctx) const {
  std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
  int groups = ctx->Attrs().Get<int>("groups");
  std::vector<int> dilations = ctx->Attrs().Get<std::vector<int>>("dilations");
-  VLOG(3) << "Conv op Before check";
-  in_dims.size() == 4 || in_dims.size() == 5;
-  // PADDLE_ENFORCE(in_dims.size() == 4 || in_dims.size() == 5,
-  //               "Conv intput should be 4-D or 5-D tensor.");
-  VLOG(3) << "check0";
-
-  // PADDLE_ENFORCE_EQ(
-  //    in_dims.size(), filter_dims.size(),
-  //    "Conv input dimension and filter dimension should be the same.");
-  in_dims.size() == filter_dims.size();
-  VLOG(3) << "enforce check0";
+
+  PADDLE_ENFORCE(in_dims.size() == 4 || in_dims.size() == 5,
+                 "Conv intput should be 4-D or 5-D tensor.");
+  PADDLE_ENFORCE_EQ(
+      in_dims.size(), filter_dims.size(),
+      "Conv input dimension and filter dimension should be the same.");
  PADDLE_ENFORCE(
      in_dims.size() - strides.size() == 2U,
      "Conv input dimension and strides dimension should be consistent.");
-  VLOG(3) << "check1";
  PADDLE_ENFORCE_EQ(
      paddings.size(), strides.size(),
      "Conv paddings dimension and Conv strides dimension should be the same.");

-  VLOG(3) << "check2";
-  // in_dims[1] == filter_dims[1] * groups;
-  // PADDLE_ENFORCE_EQ(in_dims[1], filter_dims[1] * groups,
-  //                  "The number of input channels should be equal to filter "
-  //                  "channels * groups.");
-  VLOG(3) << "check3";
-  // filter_dims[0] % groups == 0 ;
-  // PADDLE_ENFORCE_EQ(
-  //    filter_dims[0] % groups, 0,
-  //    "The number of output channels should be divided by groups.");
-  VLOG(3) << "filter" << filter_dims.size();
-  VLOG(3) << "filter" << filter_dims[0];
-  VLOG(3) << "check4";
-  VLOG(3) << "filter" << filter_dims[1];
-  VLOG(3) << "dims" << in_dims[0];
+  PADDLE_ENFORCE_EQ(in_dims[1], filter_dims[1] * groups,
+                    "The number of input channels should be equal to filter "
+                    "channels * groups.");
+  PADDLE_ENFORCE_EQ(
+      filter_dims[0] % groups, 0,
+      "The number of output channels should be divided by groups.");

  std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
-  VLOG(3) << "output shape";
  for (size_t i = 0; i < strides.size(); ++i) {
-    VLOG(3) << "check5";
    output_shape.push_back(ConvOutputSize(in_dims[i + 2], filter_dims[i + 2],
                                          dilations[i], paddings[i],
                                          strides[i]));
-    VLOG(3) << "check pass";
  }
-  VLOG(3) << "Conv InferShape Pass";
  ctx->SetOutputDim("Output", framework::make_ddim(output_shape));
  ctx->ShareLoD("Input", "Output");
 }