From c3e1fb5a3e708bf164d09450f83fb30c4fde8e3f Mon Sep 17 00:00:00 2001
From: dzhwinter <dzhwinter@gmail.com>
Date: Wed, 12 Sep 2018 13:35:19 +0800
Subject: [PATCH] add demo

---
 CMakeLists.txt                                |   5 +-
 cmake/configure.cmake                         |  21 +-
 cmake/external/boost.cmake                    |   7 +-
 cmake/external/gflags.cmake                   |   2 +
 cmake/external/glog.cmake                     |   2 +-
 cmake/external/gtest.cmake                    |   2 +-
 cmake/external/openblas.cmake                 |   1 +
 cmake/flags.cmake                             |  21 +-
 paddle/fluid/framework/CMakeLists.txt         |  12 ++
 paddle/fluid/framework/eigen.h                |   3 +
 paddle/fluid/framework/op_registry.h          |   1 +
 paddle/fluid/framework/operator.cc            |   3 +
 paddle/fluid/framework/tensor.h               |   1 +
 paddle/fluid/inference/CMakeLists.txt         |   3 +-
 paddle/fluid/inference/api/api_impl.cc        |  13 +-
 paddle/fluid/inference/api/api_impl.h         |   6 +
 .../inference/api/demo_ci/CMakeLists.txt      |  46 ++++-
 .../inference/api/demo_ci/inference_icnet.cc  | 184 ++++++++++++++++++
 paddle/fluid/inference/api/demo_ci/run.sh     |  54 ++---
 .../inference/api/paddle_inference_api.h      |  23 +--
 .../fluid/memory/detail/system_allocator.cc   |   1 +
 paddle/fluid/operators/CMakeLists.txt         |   3 +-
 .../fluid/operators/elementwise_op_function.h |   5 +-
 paddle/fluid/operators/lstm_unit_op.h         |   2 +-
 paddle/fluid/operators/print_op.cc            |   1 +
 paddle/fluid/platform/enforce.h               |   1 +
 paddle/fluid/platform/init.cc                 |   2 +
 paddle/fluid/platform/init.h                  |   3 +
 paddle/fluid/platform/macros.h                |  10 +
 paddle/fluid/platform/port.h                  |   1 +
 30 files changed, 362 insertions(+), 77 deletions(-)
 create mode 100644 paddle/fluid/inference/api/demo_ci/inference_icnet.cc
diff --git a/CMakeLists.txt b/CMakeLists.txt
index bc020792a6..11f543d4ba 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -26,6 +26,7 @@ message(STATUS "C compiler: ${CMAKE_C_COMPILER}, version: "
         "${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}")
 if(WIN32)
     set(CMAKE_STATIC_LIBRARY_PREFIX lib)
+    set(CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS} "/MT") #create multithread dynamic library
 endif(WIN32)
 
 if(NOT CMAKE_CROSSCOMPILING)
@@ -33,7 +34,7 @@ if(NOT CMAKE_CROSSCOMPILING)
 endif(NOT CMAKE_CROSSCOMPILING)
 find_package(Git REQUIRED)
 find_package(Threads REQUIRED)
-
+include(flags)              # set paddle compile flags
 include(simd)
 
 ################################ Configurations #######################################
@@ -206,8 +207,6 @@ endif()
 
 
 include(external/threadpool)
-
-include(flags)              # set paddle compile flags
 include(cudnn)              # set cudnn libraries, must before configure
 include(configure)          # add paddle env configuration
 
diff --git a/cmake/configure.cmake b/cmake/configure.cmake
index ce1857582b..42ad79aac2 100644
--- a/cmake/configure.cmake
+++ b/cmake/configure.cmake
@@ -62,8 +62,27 @@ if(NOT CMAKE_CROSSCOMPILING)
 endif()
 
 if(WIN32)
-  # windows stupid compile option for all targets.
+  # windows header option for all targets.
   add_definitions(-D_XKEYCHECK_H)
+  # Use symbols instead of absolute path, reduce the cmake link command length. 
+  SET(CMAKE_C_USE_RESPONSE_FILE_FOR_LIBRARIES 1)
+  SET(CMAKE_CXX_USE_RESPONSE_FILE_FOR_LIBRARIES 1)
+  SET(CMAKE_C_USE_RESPONSE_FILE_FOR_OBJECTS 1)
+  SET(CMAKE_CXX_USE_RESPONSE_FILE_FOR_OBJECTS 1)
+  SET(CMAKE_C_USE_RESPONSE_FILE_FOR_INCLUDES 1)
+  SET(CMAKE_CXX_USE_RESPONSE_FILE_FOR_INCLUDES 1)
+  SET(CMAKE_C_RESPONSE_FILE_LINK_FLAG "@")
+  SET(CMAKE_CXX_RESPONSE_FILE_LINK_FLAG "@")
+
+  # Specify the program to use when building static libraries
+  SET(CMAKE_C_CREATE_STATIC_LIBRARY "<CMAKE_AR> lib <TARGET> <LINK_FLAGS> <OBJECTS>")
+  SET(CMAKE_CXX_CREATE_STATIC_LIBRARY "<CMAKE_AR> lib <TARGET> <LINK_FLAGS> <OBJECTS>")
+
+  # set defination for the dll export
+  if (NOT MSVC)
+    message(FATAL "Windows build only support msvc. Which was binded by the nvcc compiler of NVIDIA.")
+  endif(NOT MSVC)
+  add_definitions(/DPADDLE_COMPILE)
 endif(WIN32)
 
 if(NOT WITH_GOLANG)
diff --git a/cmake/external/boost.cmake b/cmake/external/boost.cmake
index 497764e0ef..65f55b64ca 100644
--- a/cmake/external/boost.cmake
+++ b/cmake/external/boost.cmake
@@ -46,14 +46,9 @@ ExternalProject_Add(
     ${BOOST_PROJECT}
     ${EXTERNAL_PROJECT_LOG_ARGS}
     DOWNLOAD_DIR          ${BOOST_DOWNLOAD_DIR}
-<<<<<<< HEAD
-    DOWNLOAD_COMMAND      "wget --no-check-certificate ${BOOST_URL} -c -q -O ${BOOST_TAR}.tar.gz
-                          && tar zxf ${BOOST_TAR}.tar.gz"
-=======
     DOWNLOAD_COMMAND      wget --no-check-certificate ${BOOST_URL} -c -q -O ${BOOST_TAR}.tar.gz
     && tar zxf ${BOOST_TAR}.tar.gz
->>>>>>> origin/develop
-    DOWNLOAD_NO_PROGRESS  1
+DOWNLOAD_NO_PROGRESS  1
     PREFIX                ${BOOST_SOURCES_DIR}
     CONFIGURE_COMMAND     ""
     BUILD_COMMAND         ""
diff --git a/cmake/external/gflags.cmake b/cmake/external/gflags.cmake
index cf58cc3976..d9aa10c532 100644
--- a/cmake/external/gflags.cmake
+++ b/cmake/external/gflags.cmake
@@ -35,7 +35,9 @@ ExternalProject_Add(
     CMAKE_ARGS      -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
                     -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
                     -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
+                    -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
                     -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
+                    -DBUILD_STATIC_LIBS=ON
                     -DCMAKE_INSTALL_PREFIX=${GFLAGS_INSTALL_DIR}
                     -DCMAKE_POSITION_INDEPENDENT_CODE=ON
                     -DBUILD_TESTING=OFF
diff --git a/cmake/external/glog.cmake b/cmake/external/glog.cmake
index 25ef2970ac..a205d4ec77 100644
--- a/cmake/external/glog.cmake
+++ b/cmake/external/glog.cmake
@@ -34,7 +34,6 @@ ELSE()
   SET(GLOG_REPOSITORY "https://github.com/google/glog.git")
   SET(GLOG_TAG "v0.3.5")
 ENDIF()
-
 ExternalProject_Add(
     extern_glog
     ${EXTERNAL_PROJECT_LOG_ARGS}
@@ -46,6 +45,7 @@ ExternalProject_Add(
     CMAKE_ARGS      -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
                     -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
                     -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
+                    -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
                     -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
                     -DCMAKE_INSTALL_PREFIX=${GLOG_INSTALL_DIR}
                     -DCMAKE_INSTALL_LIBDIR=${GLOG_INSTALL_DIR}/lib
diff --git a/cmake/external/gtest.cmake b/cmake/external/gtest.cmake
index d335298742..bfb04916dc 100644
--- a/cmake/external/gtest.cmake
+++ b/cmake/external/gtest.cmake
@@ -51,6 +51,7 @@ IF(WITH_TESTING)
                         -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
                         -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
                         -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
+                        -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
                         -DCMAKE_INSTALL_PREFIX=${GTEST_INSTALL_DIR}
                         -DCMAKE_POSITION_INDEPENDENT_CODE=ON
                         -DBUILD_GMOCK=ON
@@ -70,6 +71,5 @@ IF(WITH_TESTING)
     ADD_LIBRARY(gtest_main STATIC IMPORTED GLOBAL)
     SET_PROPERTY(TARGET gtest_main PROPERTY IMPORTED_LOCATION ${GTEST_MAIN_LIBRARIES})
     ADD_DEPENDENCIES(gtest_main extern_gtest)
-
     LIST(APPEND external_project_dependencies gtest gtest_main)
 ENDIF(WITH_TESTING)
diff --git a/cmake/external/openblas.cmake b/cmake/external/openblas.cmake
index c3fbe4dbdb..5509817680 100644
--- a/cmake/external/openblas.cmake
+++ b/cmake/external/openblas.cmake
@@ -124,6 +124,7 @@ INCLUDE_DIRECTORIES(${CBLAS_INCLUDE_DIR})
 # linear algebra libraries for cc_library(xxx SRCS xxx.c DEPS cblas)
 SET(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/cblas_dummy.c)
 FILE(WRITE ${dummyfile} "const char *dummy_cblas = \"${dummyfile}\";")
+
 ADD_LIBRARY(cblas STATIC ${dummyfile})
 
 IF("${CBLAS_PROVIDER}" STREQUAL "MKLML")
diff --git a/cmake/flags.cmake b/cmake/flags.cmake
index 683da7f6e4..cf0ca71d12 100644
--- a/cmake/flags.cmake
+++ b/cmake/flags.cmake
@@ -70,6 +70,20 @@ macro(safe_set_nvflag flag_name)
     endif()
 endmacro()
 
+macro(safe_set_static_flag) # set c_flags and cxx_flags to static or shared
+    if (BUILD_SHARED_LIBS) 
+        return() # if build shared libs, the flags keep same with '/MD'
+    endif(BUILD_SHARED_LIBS)
+    foreach(flag_var
+        CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
+        CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO
+        CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE
+        CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO)
+      if(${flag_var} MATCHES "/MD")
+        string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
+      endif(${flag_var} MATCHES "/MD")
+    endforeach(flag_var)
+endmacro()
 
 CHECK_CXX_SYMBOL_EXISTS(UINT64_MAX "stdint.h" UINT64_MAX_EXISTS)
 if(NOT UINT64_MAX_EXISTS)
@@ -133,7 +147,8 @@ set(GPU_COMMON_FLAGS
 
 else(NOT WIN32)
 set(COMMON_FLAGS
-    "/w") #disable all warnings
+    "/w") #disable all warnings.
+
 set(GPU_COMMON_FLAGS
     "") #disable all warnings
 
@@ -167,3 +182,7 @@ endforeach()
 foreach(flag ${GPU_COMMON_FLAGS})
     safe_set_nvflag(${flag})
 endforeach()
+
+if(MSVC)
+safe_set_static_flag()
+endif(MSVC)
\ No newline at end of file
diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt
index b344661f18..1e36114c67 100644
--- a/paddle/fluid/framework/CMakeLists.txt
+++ b/paddle/fluid/framework/CMakeLists.txt
@@ -10,10 +10,22 @@ function(windows_symbolic TARGET)
   if (NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${src}.cc OR NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${src}.cu)
       message(FATAL " ${src}.cc and ${src}.cu must exsits, and ${src}.cu must be symbolic file.")
   endif()
+
+  # only copy the xx.cu to .xx.cu when the content are modified
+  set(copy_flag 1)
+  if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/.${src}.cu)
+  file(READ ${CMAKE_CURRENT_SOURCE_DIR}/${src}.cc SOURCE_STR)
+  file(READ ${CMAKE_CURRENT_SOURCE_DIR}/.${src}.cu TARGET_STR)
+  if (SOURCE_STR STREQUAL TARGET_STR)
+    set(copy_flag 0)
+  endif()
+  endif()
+  if (copy_flag)
   add_custom_command(OUTPUT .${src}.cu 
           COMMAND ${CMAKE_COMMAND} -E remove ${CMAKE_CURRENT_SOURCE_DIR}/.${src}.cu
           COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/${src}.cc" "${CMAKE_CURRENT_SOURCE_DIR}/.${src}.cu"
           COMMENT "create hidden file of ${src}.cu")
+  endif(copy_flag)
   add_custom_target(${TARGET} ALL DEPENDS .${src}.cu)  
   endforeach()
 endfunction()
diff --git a/paddle/fluid/framework/eigen.h b/paddle/fluid/framework/eigen.h
index f13e9d3cc2..2b265a773f 100644
--- a/paddle/fluid/framework/eigen.h
+++ b/paddle/fluid/framework/eigen.h
@@ -15,6 +15,9 @@ limitations under the License. */
 #pragma once
 // logging.h and windows.h conflict
 #define GLOG_NO_ABBREVIATED_SEVERITIES
+// solve static linking error in windows
+// https://github.com/google/glog/issues/301
+#define GOOGLE_GLOG_DLL_DECL
 
 #include "paddle/fluid/framework/tensor.h"
 #include "unsupported/Eigen/CXX11/Tensor"
diff --git a/paddle/fluid/framework/op_registry.h b/paddle/fluid/framework/op_registry.h
index 2bd2dd4200..ef2eb334a4 100644
--- a/paddle/fluid/framework/op_registry.h
+++ b/paddle/fluid/framework/op_registry.h
@@ -25,6 +25,7 @@ limitations under the License. */
 
 #if defined(_WIN32)
 #define GLOG_NO_ABBREVIATED_SEVERITIES  // msvc conflict logging with windows.h
+#define GOOGLE_GLOG_DLL_DECL
 #endif
 
 #include "glog/logging.h"  // For VLOG()
diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc
index d58d6e4f3e..73306912ce 100644
--- a/paddle/fluid/framework/operator.cc
+++ b/paddle/fluid/framework/operator.cc
@@ -11,6 +11,9 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#define GLOG_NO_ABBREVIATED_SEVERITIES
+#define GOOGLE_GLOG_DLL_DECL
+
 #include <gflags/gflags.h>
 #include <glog/logging.h>
 
diff --git a/paddle/fluid/framework/tensor.h b/paddle/fluid/framework/tensor.h
index bb52787b4b..ff25d7b961 100644
--- a/paddle/fluid/framework/tensor.h
+++ b/paddle/fluid/framework/tensor.h
@@ -22,6 +22,7 @@ limitations under the License. */
 
 #if defined(_WIN32)
 #define GLOG_NO_ABBREVIATED_SEVERITIES  // msvc conflict logging with windows.h
+#define GOOGLE_GLOG_DLL_DECL
 #endif
 
 #include "paddle/fluid/framework/data_layout.h"
diff --git a/paddle/fluid/inference/CMakeLists.txt b/paddle/fluid/inference/CMakeLists.txt
index 0b515b79c6..f275af5509 100644
--- a/paddle/fluid/inference/CMakeLists.txt
+++ b/paddle/fluid/inference/CMakeLists.txt
@@ -26,8 +26,9 @@ cc_library(paddle_fluid_origin DEPS ${fluid_modules} paddle_fluid_api)
 #endif()
 
 # Create static library
-
+if (WIN32)
 cc_library(paddle_fluid DEPS ${fluid_modules} ${fluid_third_partys} paddle_fluid_api paddle_inference_api)
+endif(WIN32)
 if(NOT APPLE)
   # TODO(liuyiqu: Temporarily disable the link flag because it is not support on Mac.
   set(LINK_FLAGS "-Wl,--retain-symbols-file ${CMAKE_CURRENT_SOURCE_DIR}/paddle_fluid.sym")
diff --git a/paddle/fluid/inference/api/api_impl.cc b/paddle/fluid/inference/api/api_impl.cc
index bc939f417b..0ce78b3965 100644
--- a/paddle/fluid/inference/api/api_impl.cc
+++ b/paddle/fluid/inference/api/api_impl.cc
@@ -26,18 +26,8 @@ limitations under the License. */
 #include "paddle/fluid/platform/profiler.h"
 
 DEFINE_bool(profile, false, "Turn on profiler for fluid");
-using Timer = paddle::inference::Timer;
 
 namespace paddle {
-namespace {
-
-template <class T>
-std::string num2str(T a) {
-  std::stringstream istr;
-  istr << a;
-  return istr.str();
-}
-}  // namespace
 
 void NativePaddlePredictor::PrepareFeedFetch() {
   for (auto *op : inference_program_->Block(0).AllOps()) {
@@ -130,6 +120,7 @@ bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
                                 std::vector<PaddleTensor> *output_data,
                                 int batch_size) {
   VLOG(3) << "Predictor::predict";
+  using Timer = paddle::inference::Timer;
   Timer timer;
   timer.tic();
   // set feed variable
@@ -307,7 +298,7 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
         config.fraction_of_gpu_memory <= 0.95f) {
       flags.push_back("dummpy");
       std::string flag = "--fraction_of_gpu_memory_to_use=" +
-                         num2str<float>(config.fraction_of_gpu_memory);
+                         std::to_string(config.fraction_of_gpu_memory);
       flags.push_back(flag);
       VLOG(3) << "set flag: " << flag;
       framework::InitGflags(flags);
diff --git a/paddle/fluid/inference/api/api_impl.h b/paddle/fluid/inference/api/api_impl.h
index ec801c5885..6386d60126 100644
--- a/paddle/fluid/inference/api/api_impl.h
+++ b/paddle/fluid/inference/api/api_impl.h
@@ -14,6 +14,12 @@
 
 #pragma once
 
+// logging.h and windows.h conflict
+#define GLOG_NO_ABBREVIATED_SEVERITIES
+// solve static linking error in windows
+// https://github.com/google/glog/issues/301
+#define GOOGLE_GLOG_DLL_DECL
+
 #include <glog/logging.h>
 #include <map>
 #include <memory>
diff --git a/paddle/fluid/inference/api/demo_ci/CMakeLists.txt b/paddle/fluid/inference/api/demo_ci/CMakeLists.txt
index afb46a7139..f161550655 100644
--- a/paddle/fluid/inference/api/demo_ci/CMakeLists.txt
+++ b/paddle/fluid/inference/api/demo_ci/CMakeLists.txt
@@ -1,13 +1,31 @@
 cmake_minimum_required(VERSION 3.0)
-
 project(cpp_inference_demo CXX C)
+option(WITH_MKL        "Compile demo with MKL/OpenBlas support, default use MKL."       ON)
+option(WITH_GPU        "Compile demo with GPU/CPU, default use CPU."                    OFF)
+option(WITH_STATIC_LIB "Compile demo with static/shared library, default use static."   ON)
+
+macro(safe_set_static_flag)
+    foreach(flag_var
+        CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
+        CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
+      if(${flag_var} MATCHES "/MD")
+        string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
+      endif(${flag_var} MATCHES "/MD")
+    endforeach(flag_var)
+endmacro()
 
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
 if (WIN32)
-set(CMAKE_STATIC_LIBRARY_PREFIX "lib")
+  if (WITH_STATIC_LIB)
+    safe_set_static_flag()
+    set(CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS} "/w")
+    set(CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE} "/w")
+  endif()
+  set(CMAKE_STATIC_LIBRARY_PREFIX "lib")
 else()
-set(CMAKE_STATIC_LIBRARY_PREFIX "")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
+  set(CMAKE_STATIC_LIBRARY_PREFIX "")
 endif()
+message("flags" ${CMAKE_CXX_FLAGS})
 
 if(NOT DEFINED PADDLE_LIB)
   message(FATAL_ERROR "please set PADDLE_LIB with -DPADDLE_LIB=/path/paddle/lib")
@@ -16,14 +34,18 @@ if(NOT DEFINED DEMO_NAME)
   message(FATAL_ERROR "please set DEMO_NAME with -DDEMO_NAME=demo_name")
 endif()
 
-option(WITH_MKL        "Compile demo with MKL/OpenBlas support, default use MKL."       ON)
-option(WITH_GPU        "Compile demo with GPU/CPU, default use CPU."                    OFF)
-option(WITH_STATIC_LIB "Compile demo with static/shared library, default use static."   ON)
 
 if(WITH_GPU)
-  set(CUDA_LIB "/usr/local/cuda/lib64/" CACHE STRING "CUDA Library")
+  if(NOT WIN32)
+    set(CUDA_LIB "/usr/local/cuda/lib64/" CACHE STRING "CUDA Library")
+  else()
+    if(CUDA_LIB STREQUAL "")
+    set(CUDA_LIB "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v8.0\\lib\\x64")
+    endif()
+  endif(NOT WIN32)
 endif()
 
+include_directories("D:/Paddle/")
 include_directories("${PADDLE_LIB}")
 include_directories("${PADDLE_LIB}/third_party/install/protobuf/include")
 include_directories("${PADDLE_LIB}/third_party/install/glog/include")
@@ -83,10 +105,16 @@ set(DEPS ${DEPS}
     ${MATH_LIB} ${MKLDNN_LIB}
     ${CMAKE_STATIC_LIBRARY_PREFIX}glog  ${CMAKE_STATIC_LIBRARY_PREFIX}gflags  ${CMAKE_STATIC_LIBRARY_PREFIX}protobuf
     ${EXTERNAL_LIB})
+# NOTE(dzhwinter) shlwapi is deprecated.
+set(DEPS ${DEPS} libcmt shlwapi)
 endif(NOT WIN32)
 
 if(WITH_GPU)
-  set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX})
+  if(NOT WIN32)
+    set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX})
+  else()
+    set(DEPS ${DEPS} ${CUDA_LIB}/cudart${CMAKE_STATIC_LIBRARY_SUFFIX})
+  endif()
 endif()
 
 target_link_libraries(${DEMO_NAME} ${DEPS})
diff --git a/paddle/fluid/inference/api/demo_ci/inference_icnet.cc b/paddle/fluid/inference/api/demo_ci/inference_icnet.cc
new file mode 100644
index 0000000000..5e06c3161e
--- /dev/null
+++ b/paddle/fluid/inference/api/demo_ci/inference_icnet.cc
@@ -0,0 +1,184 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+/*
+ * This file contains a simple demo for how to take a model for inference.
+ */
+#include <cassert>
+#include <cctype>
+
+#include <algorithm>
+#include <iostream>
+#include <iterator>
+#include <memory>
+#include <sstream>
+#include <string>
+#include <thread>  //NOLINT
+#include "paddle/fluid/inference/paddle_inference_api.h"
+
+std::string DIRNAME = ""; /* "Directory of the inference model." */ // NOLINT
+bool USE_GPU = false;     /*"Whether use gpu."*/
+
+auto message_err = []() {
+  std::cout << "Copyright (c) 2018 PaddlePaddle Authors." << std::endl;
+  std::cout << "Demo Case for windows inference. "
+            << "\n"
+            << "Usage: Input your model path and use_gpu as the guide requires,"
+            << "then run the demo inference, and will get a result."
+            << std::endl;
+  std::cout << std::endl;
+};
+
+void ParseArgs() {
+  message_err();
+  std::cout << "DIRNAME:[D:/Paddle/xxx/path_to_model_dir]" << std::endl;
+  std::cin >> DIRNAME;
+  std::cout << "USE_GPU:[yes|no]";
+  std::string value;
+  std::cin >> value;
+  std::transform(value.begin(), value.end(), value.begin(), ::toupper);
+  USE_GPU = (value == "YES") ? true : false;
+}
+
+namespace paddle {
+namespace demo {
+std::string ToString(const NativeConfig& config) {
+  std::stringstream ss;
+  ss << "Use GPU : " << (config.use_gpu ? "True" : "False") << "\n"
+     << "Device : " << config.device << "\n"
+     << "fraction_of_gpu_memory : " << config.fraction_of_gpu_memory << "\n"
+     << "specify_input_name : "
+     << (config.specify_input_name ? "True" : "False") << "\n"
+     << "Program File : " << config.prog_file << "\n"
+     << "Param File : " << config.param_file;
+  return ss.str();
+}
+
+void Main(bool use_gpu) {
+  //# 1. Create PaddlePredictor with a config.
+  NativeConfig config;
+  config.model_dir = DIRNAME;
+  config.use_gpu = USE_GPU;
+  config.fraction_of_gpu_memory = 0.15;
+  config.device = 0;
+  std::cout << ToString(config) << std::endl;
+  auto predictor =
+      CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config);
+
+  for (int batch_id = 0; batch_id < 3; batch_id++) {
+    //# 2. Prepare input.
+    int64_t data[4] = {1, 2, 3, 4};
+
+    PaddleTensor tensor;
+    tensor.shape = std::vector<int>({4, 1});
+    tensor.data = PaddleBuf(data, sizeof(data));
+    tensor.dtype = PaddleDType::INT64;
+
+    // For simplicity, we set all the slots with the same data.
+    std::vector<PaddleTensor> slots(4, tensor);
+
+    //# 3. Run
+    std::vector<PaddleTensor> outputs;
+    assert(predictor->Run(slots, &outputs) == true &&
+           "Predict run expect true");
+
+    //# 4. Get output.
+    assert(outputs.size() == 1UL);
+    // Check the output buffer size and result of each tid.
+    assert(outputs.front().data.length() == 33168UL);
+    float result[5] = {0.00129761, 0.00151112, 0.000423564, 0.00108815,
+                       0.000932706};
+    const size_t num_elements = outputs.front().data.length() / sizeof(float);
+    // The outputs' buffers are in CPU memory.
+    for (size_t i = 0; i < std::min(static_cast<size_t>(5), num_elements);
+         i++) {
+      assert(static_cast<float*>(outputs.front().data.data())[i] == result[i]);
+      std::cout << "expect the output "
+                << static_cast<float*>(outputs.front().data.data())[i]
+                << std::endl;
+    }
+  }
+}
+
+void MainThreads(int num_threads, bool USE_GPU) {
+  // Multi-threads only support on CPU
+  // 0. Create PaddlePredictor with a config.
+  NativeConfig config;
+  config.model_dir = DIRNAME;
+  config.use_gpu = USE_GPU;
+  config.fraction_of_gpu_memory = 0.15;
+  config.device = 0;
+  std::cout << ToString(config) << std::endl;
+  auto main_predictor =
+      CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config);
+
+  std::vector<std::thread> threads;
+  for (int tid = 0; tid < num_threads; ++tid) {
+    threads.emplace_back([&, tid]() {
+      // 1. clone a predictor which shares the same parameters
+      auto predictor = main_predictor->Clone();
+      constexpr int num_batches = 3;
+      for (int batch_id = 0; batch_id < num_batches; ++batch_id) {
+        // 2. Dummy Input Data
+        int64_t data[4] = {1, 2, 3, 4};
+        PaddleTensor tensor;
+        tensor.shape = std::vector<int>({4, 1});
+        tensor.data = PaddleBuf(data, sizeof(data));
+        tensor.dtype = PaddleDType::INT64;
+
+        std::vector<PaddleTensor> inputs(4, tensor);
+        std::vector<PaddleTensor> outputs;
+        // 3. Run
+        assert(predictor->Run(inputs, &outputs) == true);
+
+        // 4. Get output.
+        assert(outputs.size() == 1UL);
+        // Check the output buffer size and result of each tid.
+        assert(outputs.front().data.length() == 33168UL);
+        float result[5] = {0.00129761, 0.00151112, 0.000423564, 0.00108815,
+                           0.000932706};
+        const size_t num_elements =
+            outputs.front().data.length() / sizeof(float);
+        // The outputs' buffers are in CPU memory.
+        for (size_t i = 0; i < std::min(static_cast<size_t>(5), num_elements);
+             i++) {
+          assert(static_cast<float*>(outputs.front().data.data())[i] ==
+                 result[i]);
+        }
+      }
+    });
+  }
+  for (int i = 0; i < num_threads; ++i) {
+    threads[i].join();
+  }
+}
+
+}  // namespace demo
+}  // namespace paddle
+
+int main(int argc, char** argv) {
+  // ParseArgs();
+  DIRNAME = "./icnet";
+  USE_GPU = true;
+  paddle::demo::Main(false /* USE_GPU*/);
+  paddle::demo::MainThreads(1, false /* USE_GPU*/);
+  paddle::demo::MainThreads(4, false /* USE_GPU*/);
+  if (USE_GPU) {
+    paddle::demo::Main(true /*USE_GPU*/);
+    paddle::demo::MainThreads(1, true /*USE_GPU*/);
+    paddle::demo::MainThreads(4, true /*USE_GPU*/);
+  }
+  system("pause");
+  return 0;
+}
diff --git a/paddle/fluid/inference/api/demo_ci/run.sh b/paddle/fluid/inference/api/demo_ci/run.sh
index 7824ef2649..639997d35a 100755
--- a/paddle/fluid/inference/api/demo_ci/run.sh
+++ b/paddle/fluid/inference/api/demo_ci/run.sh
@@ -29,13 +29,13 @@ function download() {
   fi
   cd ..
 }
-mkdir -p data
-cd data
-vis_demo_list='se_resnext50 ocr mobilenet'
-for vis_demo_name in $vis_demo_list; do
-  download $vis_demo_name
-done
-cd ..
+# mkdir -p data
+# cd data
+# vis_demo_list='se_resnext50 ocr mobilenet'
+# for vis_demo_name in $vis_demo_list; do
+#   download $vis_demo_name
+# done
+# cd ..
 
 # compile and test the demo
 mkdir -p build
@@ -63,25 +63,25 @@ for WITH_STATIC_LIB in ON OFF; do
     done
   fi
   # ---------vis_demo---------
-  rm -rf *
-  cmake .. -DPADDLE_LIB=${PADDLE_ROOT}/build/fluid_install_dir/ \
-    -DWITH_MKL=$TURN_ON_MKL \
-    -DDEMO_NAME=vis_demo \
-    -DWITH_GPU=$TEST_GPU_CPU \
-    -DWITH_STATIC_LIB=$WITH_STATIC_LIB
-  make -j
-  for use_gpu in $use_gpu_list; do
-    for vis_demo_name in $vis_demo_list; do 
-      ./vis_demo \
-        --modeldir=../data/$vis_demo_name/model \
-        --data=../data/$vis_demo_name/data.txt \
-        --refer=../data/$vis_demo_name/result.txt \
-        --use_gpu=$use_gpu
-      if [ $? -ne 0 ]; then
-        echo "vis demo $vis_demo_name runs fail."
-        exit 1
-      fi
-    done
-  done
+  # rm -rf *
+  # cmake .. -DPADDLE_LIB=${PADDLE_ROOT}/build/fluid_install_dir/ \
+  #   -DWITH_MKL=$TURN_ON_MKL \
+  #   -DDEMO_NAME=vis_demo \
+  #   -DWITH_GPU=$TEST_GPU_CPU \
+  #   -DWITH_STATIC_LIB=$WITH_STATIC_LIB
+  # make -j
+  # for use_gpu in $use_gpu_list; do
+  #   for vis_demo_name in $vis_demo_list; do 
+  #     ./vis_demo \
+  #       --modeldir=../data/$vis_demo_name/model \
+  #       --data=../data/$vis_demo_name/data.txt \
+  #       --refer=../data/$vis_demo_name/result.txt \
+  #       --use_gpu=$use_gpu
+  #     if [ $? -ne 0 ]; then
+  #       echo "vis demo $vis_demo_name runs fail."
+  #       exit 1
+  #     fi
+  #   done
+  # done
 done
 set +x
diff --git a/paddle/fluid/inference/api/paddle_inference_api.h b/paddle/fluid/inference/api/paddle_inference_api.h
index 1baa64c249..4b084009ff 100644
--- a/paddle/fluid/inference/api/paddle_inference_api.h
+++ b/paddle/fluid/inference/api/paddle_inference_api.h
@@ -25,6 +25,7 @@ limitations under the License. */
 #include <memory>
 #include <string>
 #include <vector>
+#include "paddle/fluid/platform/macros.h"
 
 namespace paddle {
 
@@ -33,7 +34,7 @@ enum PaddleDType {
   INT64,
 };
 
-class PaddleBuf {
+class PADDLE_DLL PaddleBuf {
  public:
   PaddleBuf() = default;
   PaddleBuf(PaddleBuf&& other);
@@ -45,7 +46,7 @@ class PaddleBuf {
   PaddleBuf(void* data, size_t length)
       : data_(data), length_(length), memory_owned_{false} {}
   // Own memory.
-  PaddleBuf(size_t length)
+  explicit PaddleBuf(size_t length)
       : data_(new char[length]), length_(length), memory_owned_(true) {}
   // Resize to `length` bytes.
   void Resize(size_t length);
@@ -64,7 +65,7 @@ class PaddleBuf {
   bool memory_owned_{true};
 };
 
-struct PaddleTensor {
+struct PADDLE_DLL PaddleTensor {
   PaddleTensor() = default;
   std::string name;  // variable name.
   std::vector<int> shape;
@@ -87,7 +88,7 @@ enum class PaddleEngineKind {
  * A simple Inference API for Paddle. Currently this API can be used by
  * non-sequence scenerios.
  */
-class PaddlePredictor {
+class PADDLE_DLL PaddlePredictor {
  public:
   struct Config;
   PaddlePredictor() = default;
@@ -96,7 +97,6 @@ class PaddlePredictor {
 
   // Predict an record.
   // The caller should be responsible for allocating and releasing the memory of
-  // `inputs`. `inputs` should be available until Run returns. Caller should be
   // responsible for the output tensor's buffer, either allocated or passed from
   // outside.
   virtual bool Run(const std::vector<PaddleTensor>& inputs,
@@ -111,12 +111,12 @@ class PaddlePredictor {
   virtual ~PaddlePredictor() = default;
 
   // The common configs for all the predictors.
-  struct Config {
+  struct PADDLE_DLL Config {
     std::string model_dir;  // path to the model directory.
   };
 };
 
-struct NativeConfig : public PaddlePredictor::Config {
+struct PADDLE_DLL NativeConfig : public PaddlePredictor::Config {
   // GPU related fields.
   bool use_gpu{false};
   int device{0};
@@ -129,7 +129,7 @@ struct NativeConfig : public PaddlePredictor::Config {
 };
 
 // Configurations for Anakin engine.
-struct AnakinConfig : public PaddlePredictor::Config {
+struct PADDLE_DLL AnakinConfig : public PaddlePredictor::Config {
   enum TargetType { NVGPU = 0, X86 };
   int device;
   std::string model_file;
@@ -137,7 +137,7 @@ struct AnakinConfig : public PaddlePredictor::Config {
   TargetType target_type;
 };
 
-struct TensorRTConfig : public NativeConfig {
+struct PADDLE_DLL TensorRTConfig : public NativeConfig {
   // Determine whether a subgraph will be executed by TRT.
   int min_subgraph_size{1};
   // While TensorRT allows an engine optimized for a given max batch size
@@ -159,8 +159,9 @@ struct TensorRTConfig : public NativeConfig {
 //
 // Similarly, each engine kind should map to a unique predictor implementation.
 template <typename ConfigT, PaddleEngineKind engine = PaddleEngineKind::kNative>
-std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(const ConfigT& config);
+PADDLE_DLL std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(
+    const ConfigT& config);
 
-int PaddleDtypeSize(PaddleDType dtype);
+PADDLE_DLL int PaddleDtypeSize(PaddleDType dtype);
 
 }  // namespace paddle
diff --git a/paddle/fluid/memory/detail/system_allocator.cc b/paddle/fluid/memory/detail/system_allocator.cc
index 1b96798d23..92849bc2c0 100644
--- a/paddle/fluid/memory/detail/system_allocator.cc
+++ b/paddle/fluid/memory/detail/system_allocator.cc
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #define GLOG_NO_ABBREVIATED_SEVERITIES
+#define GOOGLE_GLOG_DLL_DECL
 
 #include "paddle/fluid/memory/detail/system_allocator.h"
 
diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt
index 98e6cdc01a..50add28179 100644
--- a/paddle/fluid/operators/CMakeLists.txt
+++ b/paddle/fluid/operators/CMakeLists.txt
@@ -87,7 +87,8 @@ function(op_library TARGET)
     if (WIN32)
     # no nccl, no avx instructions ops.
     foreach(windows_unsupport_op "nccl_op" "gen_nccl_id_op" "warpctc_op" "hierarchical_sigmoid_op"
-     "crf_decoding_op" "select_op" "lstmp_op" "gru_op" "lstm_op" "fusion_lstm_op" "cumsum_op")
+     "crf_decoding_op" "select_op" "lstmp_op" "gru_op" "fusion_gru_op" "lstm_op" "fusion_lstm_op" "cumsum_op"
+     "channel_send_op" "channel_create_op" "channel_close_op" "channel_recv_op")
         if ("${TARGET}" STREQUAL "${windows_unsupport_op}")
           return()
         endif()
diff --git a/paddle/fluid/operators/elementwise_op_function.h b/paddle/fluid/operators/elementwise_op_function.h
index 52d2de60f6..57bb20dfd3 100644
--- a/paddle/fluid/operators/elementwise_op_function.h
+++ b/paddle/fluid/operators/elementwise_op_function.h
@@ -14,7 +14,6 @@ limitations under the License. */
 
 #pragma once
 
-#include <glog/logging.h>
 #include <algorithm>
 #include <iterator>
 #include <vector>
@@ -99,7 +98,7 @@ class MidWiseTransformIterator;
 template <typename T>
 class RowwiseTransformIterator<T, platform::CPUDeviceContext>
     : public std::iterator<std::random_access_iterator_tag, typename T,
-                           std::ptrdiff_t, typename T*, typename T&> {
+                           std::ptrdiff_t, typename T *, typename T &> {
  public:
   RowwiseTransformIterator(const T *ptr, int n) : ptr_(ptr), i_(0), n_(n) {}
 
@@ -132,7 +131,7 @@ class RowwiseTransformIterator<T, platform::CPUDeviceContext>
 template <typename T>
 class MidWiseTransformIterator<T, platform::CPUDeviceContext>
     : public std::iterator<std::random_access_iterator_tag, T, std::ptrdiff_t,
-                           T*, T&> {
+                           T *, T &> {
  public:
   MidWiseTransformIterator(const T *ptr, int n, int post)
       : ptr_(ptr), i_(0), j_(0), n_(n), post_(post) {}
diff --git a/paddle/fluid/operators/lstm_unit_op.h b/paddle/fluid/operators/lstm_unit_op.h
index 4ead9c2293..5d1d667fe1 100644
--- a/paddle/fluid/operators/lstm_unit_op.h
+++ b/paddle/fluid/operators/lstm_unit_op.h
@@ -4,7 +4,7 @@ Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
 
-    http://www.apache.org/licenses/LICENSE-2.0
+   http://www.apache.org/licenses/LICENSE-2.0
 
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
diff --git a/paddle/fluid/operators/print_op.cc b/paddle/fluid/operators/print_op.cc
index e7f1caf4d3..e18bc17fd6 100644
--- a/paddle/fluid/operators/print_op.cc
+++ b/paddle/fluid/operators/print_op.cc
@@ -13,6 +13,7 @@
    limitations under the License. */
 
 #include <algorithm>
+#include <iostream>
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/var_type.h"
 
diff --git a/paddle/fluid/platform/enforce.h b/paddle/fluid/platform/enforce.h
index 29cbf6a398..78bca5cb33 100644
--- a/paddle/fluid/platform/enforce.h
+++ b/paddle/fluid/platform/enforce.h
@@ -21,6 +21,7 @@ limitations under the License. */
 #if defined(_WIN32)
 #define NOMINMAX  // msvc max/min macro conflict with std::min/max
 #define GLOG_NO_ABBREVIATED_SEVERITIES  // msvc conflict logging with windows.h
+#define GOOGLE_GLOG_DLL_DECL
 #endif
 
 #ifdef PADDLE_WITH_CUDA
diff --git a/paddle/fluid/platform/init.cc b/paddle/fluid/platform/init.cc
index 4c99f4be32..2a7bf87d10 100644
--- a/paddle/fluid/platform/init.cc
+++ b/paddle/fluid/platform/init.cc
@@ -137,7 +137,9 @@ void InitGLOG(const std::string &prog_name) {
   // glog will not hold the ARGV[0] inside.
   // Use strdup to alloc a new string.
   google::InitGoogleLogging(strdup(prog_name.c_str()));
+#if !defined(_WIN32)
   google::InstallFailureSignalHandler();
+#endif
 }
 
 }  // namespace framework
diff --git a/paddle/fluid/platform/init.h b/paddle/fluid/platform/init.h
index 0e30594672..992ca5e6f6 100644
--- a/paddle/fluid/platform/init.h
+++ b/paddle/fluid/platform/init.h
@@ -16,6 +16,9 @@ limitations under the License. */
 #include <string>
 #include <vector>
 
+#define GLOG_NO_ABBREVIATED_SEVERITIES
+#define GOOGLE_GLOG_DLL_DECL
+
 #include "gflags/gflags.h"
 #include "glog/logging.h"
 
diff --git a/paddle/fluid/platform/macros.h b/paddle/fluid/platform/macros.h
index 32b7efc04c..bbb1c60f09 100644
--- a/paddle/fluid/platform/macros.h
+++ b/paddle/fluid/platform/macros.h
@@ -28,3 +28,13 @@ limitations under the License. */
 #if defined(__FLT_MAX__)
 #define FLT_MAX __FLT_MAX__
 #endif  // __FLT_MAX__
+
+#ifdef _WIN32
+#ifdef PADDLE_COMPILE
+#define PADDLE_DLL __declspec(dllexport)
+#else
+#define PADDLE_DLL __declspec(dllimport)
+#endif
+#else
+#define PADDLE_COMPILE
+#endif
diff --git a/paddle/fluid/platform/port.h b/paddle/fluid/platform/port.h
index 85923dea07..ec681f8b2a 100644
--- a/paddle/fluid/platform/port.h
+++ b/paddle/fluid/platform/port.h
@@ -20,6 +20,7 @@
 #include <string>
 
 #define GLOG_NO_ABBREVIATED_SEVERITIES  // msvc conflict logging with windows.h
+#define GOOGLE_GLOG_DLL_DECL
 #include "glog/logging.h"
 
 #if !defined(_WIN32)
-- 
GitLab