[windows compile]support inference library compiling on windows (#3403)

58b2d7dd · silingtong123 · GitHub · eb4f381c · 58b2d7dd · 58b2d7dd
31 changed file
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -36,6 +36,31 @@ message(STATUS "C compiler: ${CMAKE_C_COMPILER}, version: "
        "${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}")
 message(STATUS "AR tools: ${CMAKE_AR}")

+
+if(WIN32)
+    option(MSVC_STATIC_CRT "use static C Runtime library by default" ON)
+
+    set(CMAKE_SUPPRESS_REGENERATION ON)
+    set(CMAKE_STATIC_LIBRARY_PREFIX lib)
+    add_definitions("/DGOOGLE_GLOG_DLL_DECL=")
+
+    if (MSVC_STATIC_CRT)
+      set(CMAKE_C_FLAGS_DEBUG   "${CMAKE_C_FLAGS_DEBUG} /bigobj /MTd")
+      set(CMAKE_C_FLAGS_RELEASE  "${CMAKE_C_FLAGS_RELEASE} /bigobj /MT")
+      set(CMAKE_CXX_FLAGS_DEBUG  "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MTd")
+      set(CMAKE_CXX_FLAGS_RELEASE   "${CMAKE_CXX_FLAGS_RELEASE} /bigobj /MT")
+    endif()
+
+    add_compile_options(/wd4068 /wd4129 /wd4244 /wd4267 /wd4297 /wd4530 /wd4577 /wd4819 /wd4838)
+    add_compile_options(/MP)
+    message(STATUS "Using parallel compiling (/MP)")
+    set(PADDLE_LINK_FLAGS "/IGNORE:4006 /IGNORE:4098 /IGNORE:4217 /IGNORE:4221")
+    set(CMAKE_STATIC_LINKER_FLAGS  "${CMAKE_STATIC_LINKER_FLAGS} ${PADDLE_LINK_FLAGS}")
+    set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${PADDLE_LINK_FLAGS}")
+    set(CMAKE_EXE_LINKER_FLAGS  "${CMAKE_EXE_LINKER_FLAGS} ${PADDLE_LINK_FLAGS}")
+
+endif()
+
 if(NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
    find_package(CUDA QUIET)
 endif()
@@ -106,9 +131,16 @@ set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING

 # CMAKE_BUILD_TYPE
 if(NOT CMAKE_BUILD_TYPE)
+    if(WIN32)
+        set(CMAKE_BUILD_TYPE "Release" CACHE STRING
+        "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel"
+        FORCE)
+    else()
+    
    set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING
            "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel"
            FORCE)
+    endif()
 endif()
 message(STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}")

@@ -190,6 +222,7 @@ endif()

 include(external/mklml)     # download mklml package
 include(external/xbyak)     # download xbyak package
+
 include(external/libxsmm)   # download, build, install libxsmm
 include(external/gflags)    # download, build, install gflags
 include(external/glog)      # download, build, install glog

--- a/build.bat
+++ b/build.bat
+@echo off
+setlocal
+setlocal enabledelayedexpansion
+
+set source_path=%~dp0
+rem  global variables
+set BUILD_EXTRA=OFF
+set BUILD_JAVA=ON
+set BUILD_PYTHON=OFF
+set BUILD_DIR=%source_path%
+set OPTMODEL_DIR=""
+set BUILD_TAILOR=OFF
+set BUILD_CV=OFF
+set SHUTDOWN_LOG=ON  
+
+set THIRDPARTY_TAR=https://paddle-inference-dist.bj.bcebos.com/PaddleLite/third-party-05b862.tar.gz
+
+set workspace=%source_path%
+
+:set_vcvarsall_dir
+SET /P vcvarsall_dir="Please input the path of visual studio command Prompt, such as C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat   =======>"
+set tmp_var=!vcvarsall_dir!
+call:remove_space
+set vcvarsall_dir=!tmp_var!   
+IF NOT EXIST "%vcvarsall_dir%" (
+    echo "------------%vcvarsall_dir% not exist------------"
+    goto set_vcvarsall_dir
+)
+
+call:prepare_thirdparty
+
+if EXIST "%build_directory%" (
+    call:rm_rebuild_dir "%build_directory%"
+    md "%build_directory%"
+) 
+
+set root_dir=%workspace%
+set build_directory=%BUILD_DIR%\build.lite.x86
+set GEN_CODE_PATH_PREFIX=%build_directory%\lite\gen_code
+set DEBUG_TOOL_PATH_PREFIX=%build_directory%\lite\tools\debug
+
+rem for code gen, a source file is generated after a test, but is dependended by some targets in cmake.
+rem here we fake an empty file to make cmake works.
+if NOT EXIST "%GEN_CODE_PATH_PREFIX%" (
+    md "%GEN_CODE_PATH_PREFIX%"
+)
+
+type nul >"%GEN_CODE_PATH_PREFIX%\__generated_code__.cc"
+
+if NOT EXIST "%DEBUG_TOOL_PATH_PREFIX%" (
+     md "%DEBUG_TOOL_PATH_PREFIX%"
+)
+
+copy "%root_dir%\lite\tools\debug\analysis_tool.py" "%DEBUG_TOOL_PATH_PREFIX%\"
+
+cd "%build_directory%"
+
+  cmake ..   -G "Visual Studio 14 2015 Win64" -T host=x64  -DWITH_MKL=ON      ^
+            -DWITH_MKLDNN=OFF   ^
+            -DLITE_WITH_X86=ON  ^
+            -DLITE_WITH_PROFILE=OFF ^
+            -DWITH_LITE=ON ^
+            -DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=OFF ^
+            -DLITE_WITH_ARM=OFF ^
+            -DWITH_GPU=OFF ^
+            -DLITE_BUILD_EXTRA=ON ^
+            -DLITE_WITH_PYTHON=ON ^
+            -DPYTHON_EXECUTABLE="%python_path%"
+
+call "%vcvarsall_dir%" amd64
+
+msbuild /m /p:Configuration=Release lite\publish_inference.vcxproj >mylog.txt 2>&1
+goto:eof
+
+:prepare_thirdparty 
+    SET /P python_path="Please input the path of python.exe, such as C:\Python35\python.exe, C:\Python35\python3.exe   =======>"
+    set tmp_var=!python_path!
+    call:remove_space
+    set python_path=!tmp_var!   
+    if "!python_path!"=="" (
+      set python_path=python.exe
+    ) else (
+      if NOT exist "!python_path!" (
+        echo "------------!python_path! not exist------------" 
+        goto:eof
+      )  
+    )
+
+    if  EXIST "%workspace%\third-party" (
+        if NOT EXIST "%workspace%\third-party-05b862.tar.gz" (
+            echo "The directory of third_party exists, the third-party-05b862.tar.gz not exists."            
+        ) else (
+               echo "The directory of third_party exists, the third-party-05b862.tar.gz exists."
+               call:rm_rebuild_dir "%workspace%\third-party"
+               !python_path! %workspace%\untar.py %source_path%\third-party-05b862.tar.gz %workspace%
+        )
+    ) else (
+        if NOT EXIST "%workspace%\third-party-05b862.tar.gz" (
+            echo "The directory of third_party not exists, the third-party-05b862.tar.gz not exists."
+            call:download_third_party
+            !python_path! %workspace%\untar.py %source_path%\third-party-05b862.tar.gz %workspace%
+        ) else (
+            echo "The directory of third_party not exists, the third-party-05b862.tar.gz exists."
+               !python_path! %workspace%\untar.py %source_path%\third-party-05b862.tar.gz %workspace%
+        )
+
+    )
+    git submodule update --init --recursive
+goto:eof
+
+:download_third_party
+powershell.exe (new-object System.Net.WebClient).DownloadFile('https://paddle-inference-dist.bj.bcebos.com/PaddleLite/third-party-05b862.tar.gz', ^
+'%workspace%third-party-05b862.tar.gz')
+goto:eof
+
+:rm_rebuild_dir
+    del /f /s /q "%~1\*.*"  >nul 2>&1
+    rd /s /q  "%~1" >nul 2>&1
+goto:eof
+
+
+:remove_space
+:remove_left_space
+if "%tmp_var:~0,1%"==" " (
+    set "tmp_var=%tmp_var:~1%"
+    goto remove_left_space
+)
+
+:remove_right_space
+if "%tmp_var:~-1%"==" " (
+    set "tmp_var=%tmp_var:~0,-1%"
+    goto remove_left_space
+)
+goto:eof
\ No newline at end of file
--- a/cmake/configure.cmake
+++ b/cmake/configure.cmake
@@ -34,6 +34,15 @@ elseif(SSE3_FOUND)
    set(SIMD_FLAG ${SSE3_FLAG})
 endif()

+if(WIN32)
+  # windows header option for all targets.
+  add_definitions(-D_XKEYCHECK_H)
+  
+  if (NOT MSVC)
+    message(FATAL "Windows build only support msvc. Which was binded by the nvcc compiler of NVIDIA.")
+  endif(NOT MSVC)
+endif(WIN32)
+
 if(LITE_WITH_CUDA)
    add_definitions(-DLITE_WITH_CUDA)
    add_definitions(-DEIGEN_USE_GPU)

--- a/cmake/external/mklml.cmake
+++ b/cmake/external/mklml.cmake
@@ -32,6 +32,7 @@ IF(WIN32)
    SET(MKLML_LIB                 ${MKLML_LIB_DIR}/mklml.lib)
    SET(MKLML_IOMP_LIB            ${MKLML_LIB_DIR}/libiomp5md.lib)
    SET(MKLML_SHARED_LIB          ${MKLML_LIB_DIR}/mklml.dll)
+    SET(MKLML_SHARED_LIB_DEPS     ${MKLML_LIB_DIR}/msvcr120.dll)
    SET(MKLML_SHARED_IOMP_LIB     ${MKLML_LIB_DIR}/libiomp5md.dll)
 ELSEIF(APPLE)
    #TODO(intel-huying):

--- a/cmake/lite.cmake
+++ b/cmake/lite.cmake
@@ -168,8 +168,10 @@ function(lite_cc_library TARGET)
    else()
        cc_library(${TARGET} SRCS ${args_SRCS} DEPS ${deps})
    endif()
-    target_compile_options(${TARGET} BEFORE PRIVATE -Wno-ignored-qualifiers)

+    if(NOT WIN32)
+      target_compile_options(${TARGET} BEFORE PRIVATE -Wno-ignored-qualifiers)
+    endif()
    # collect targets need to compile for lite
    if (args_SRCS AND NOT args_EXCLUDE_COMPILE_DEPS)
        add_dependencies(lite_compile_deps ${TARGET})
@@ -207,7 +209,9 @@ function(lite_cc_binary TARGET)
            MLU_DEPS ${args_MLU_DEPS}
            )
    cc_binary(${TARGET} SRCS ${args_SRCS} DEPS ${deps})
+    if(NOT WIN32)
      target_compile_options(${TARGET} BEFORE PRIVATE -Wno-ignored-qualifiers)
+    endif()
    if (NOT APPLE)
        # strip binary target to reduce size
        if(NOT "${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
@@ -272,7 +276,9 @@ function(lite_cc_test TARGET)
                "${TARGET}"
                COMMENT "Strip debug symbols done on final executable file.")
    endif()
+    if(NOT WIN32)
      target_compile_options(${TARGET} BEFORE PRIVATE -Wno-ignored-qualifiers)
+    endif()
    file(APPEND ${offline_test_registry_file} "${TARGET}\n")

    # collect targets need to compile for lite
@@ -516,6 +522,29 @@ function(add_operator TARGET level)
      )
 endfunction()

+#only for windows 
+function(create_static_lib TARGET_NAME)
+  set(libs ${ARGN})
+  list(REMOVE_DUPLICATES libs)
+    set(dummy_index 1)
+    set(dummy_offset 1)
+    # the dummy target would be consisted of limit size libraries
+    set(dummy_limit 60)
+    list(LENGTH libs libs_len)
+
+    foreach(lib ${libs})
+      list(APPEND dummy_list ${lib})
+      list(LENGTH dummy_list listlen)
+      if ((${listlen} GREATER ${dummy_limit}) OR (${dummy_offset} EQUAL ${libs_len}))
+        merge_static_libs(${TARGET_NAME}_dummy_${dummy_index} ${dummy_list})
+        set(dummy_list)
+        list(APPEND ${TARGET_NAME}_dummy_list ${TARGET_NAME}_dummy_${dummy_index})
+        MATH(EXPR dummy_index "${dummy_index}+1")
+      endif()
+      MATH(EXPR dummy_offset "${dummy_offset}+1")
+    endforeach()
+    merge_static_libs(${TARGET_NAME} ${${TARGET_NAME}_dummy_list})
+endfunction()

 # Bundle several static libraries into one.
 function(bundle_static_library tgt_name bundled_tgt_name fake_target)
@@ -559,7 +588,22 @@ function(bundle_static_library tgt_name bundled_tgt_name fake_target)
  set(bundled_tgt_full_name
    ${CMAKE_BINARY_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}${bundled_tgt_name}${CMAKE_STATIC_LIBRARY_SUFFIX})

-  #message(STATUS "bundled_tgt_full_name: ${bundled_tgt_full_name}")
+  message(STATUS "bundled_tgt_full_name:  ${CMAKE_BINARY_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}${bundled_tgt_name}${CMAKE_STATIC_LIBRARY_SUFFIX}")
+  
+  if(WIN32)
+    set(dummy_tgt_name dummy_${bundled_tgt_name})
+    create_static_lib(${bundled_tgt_name} ${static_libs})
+    add_custom_target(${fake_target} ALL DEPENDS ${bundled_tgt_name})
+    add_dependencies(${fake_target} ${tgt_name})
+  
+    add_library(${dummy_tgt_name} STATIC IMPORTED)
+    set_target_properties(${dummy_tgt_name}
+      PROPERTIES
+        IMPORTED_LOCATION ${bundled_tgt_full_name}
+        INTERFACE_INCLUDE_DIRECTORIES $<TARGET_PROPERTY:${tgt_name},INTERFACE_INCLUDE_DIRECTORIES>)
+    add_dependencies(${dummy_tgt_name} ${fake_target})
+    return()
+  endif()

  if(NOT IOS)
    file(WRITE ${CMAKE_BINARY_DIR}/${bundled_tgt_name}.ar.in

--- a/lite/CMakeLists.txt
+++ b/lite/CMakeLists.txt
@@ -87,6 +87,38 @@ message(STATUS "publish inference lib to ${INFER_LITE_PUBLISH_ROOT}")

 # add python lib
 if (LITE_WITH_PYTHON)
+    if(WIN32)   
+        set(LITE_CORE "${CMAKE_BINARY_DIR}/lite/api/python/pybind/${CMAKE_BUILD_TYPE}/lite.pyd")
+        set(LITE_CORE_DEPS ${LITE_CORE})
+        add_custom_command(OUTPUT   ${LITE_CORE}
+            COMMAND cmake -E copy $<TARGET_FILE:lite_pybind> ${LITE_CORE}
+            DEPENDS lite_pybind)
+        add_custom_target(copy_lite_pybind ALL DEPENDS ${LITE_CORE_DEPS})
+        
+        add_custom_target(publish_inference_python_lib ${TARGET}
+            COMMAND ${CMAKE_COMMAND} -E make_directory "${INFER_LITE_PUBLISH_ROOT}/python/lib"
+            COMMAND ${CMAKE_COMMAND} -E make_directory "${INFER_LITE_PUBLISH_ROOT}/python/install/libs"
+            COMMAND ${CMAKE_COMMAND} -E make_directory "${INFER_LITE_PUBLISH_ROOT}/python/install/lite"
+            COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_BINARY_DIR}/lite/api/python/setup.py" "${INFER_LITE_PUBLISH_ROOT}/python/install/"
+            COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_SOURCE_DIR}/lite/api/python/__init__.py" "${INFER_LITE_PUBLISH_ROOT}/python/install/lite"
+            COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_BINARY_DIR}/lite/api/python/pybind/${CMAKE_BUILD_TYPE}/lite.pyd" "${INFER_LITE_PUBLISH_ROOT}/python/install/lite/lite.pyd"
+            COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_BINARY_DIR}/lite/api/python/pybind/${CMAKE_BUILD_TYPE}/lite.pyd" "${INFER_LITE_PUBLISH_ROOT}/python/lib/lite.pyd"
+            DEPENDS copy_lite_pybind
+            )
+            
+        add_custom_target(publish_inference_python_installer ${TARGET}
+            COMMAND ${PYTHON_EXECUTABLE} setup.py bdist_wheel
+            WORKING_DIRECTORY ${INFER_LITE_PUBLISH_ROOT}/python/install/
+            DEPENDS  publish_inference_python_lib)
+        add_custom_target(publish_inference_python_light_demo ${TARGET}
+            COMMAND ${CMAKE_COMMAND} -E make_directory "${INFER_LITE_PUBLISH_ROOT}/demo/python"
+            COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_SOURCE_DIR}/lite/demo/python/mobilenetv1_light_api.py" "${INFER_LITE_PUBLISH_ROOT}/demo/python/"
+            COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_SOURCE_DIR}/lite/demo/python/mobilenetv1_full_api.py" "${INFER_LITE_PUBLISH_ROOT}/demo/python/"
+            )
+        add_dependencies(publish_inference publish_inference_python_lib)
+        add_dependencies(publish_inference publish_inference_python_installer)
+        add_dependencies(publish_inference publish_inference_python_light_demo)
+    else()
    if(APPLE)
        add_custom_target(publish_inference_python_lib ${TARGET}
                COMMAND mkdir -p "${INFER_LITE_PUBLISH_ROOT}/python/lib"
@@ -123,6 +155,7 @@ if (LITE_WITH_PYTHON)
    add_dependencies(publish_inference publish_inference_python_lib)
    add_dependencies(publish_inference publish_inference_python_installer)
    add_dependencies(publish_inference publish_inference_python_light_demo)
+    endif(WIN32)
 endif()

 if (LITE_WITH_CUDA OR LITE_WITH_X86)
@@ -141,7 +174,7 @@ if (LITE_WITH_CUDA OR LITE_WITH_X86)
        add_dependencies(publish_inference_cxx_lib paddle_light_api_shared)
        add_dependencies(publish_inference publish_inference_cxx_lib)
        add_dependencies(publish_inference publish_inference_third_party)
-    else()
+    elseif(NOT WIN32)
        add_custom_target(publish_inference_cxx_lib ${TARGET}
            COMMAND mkdir -p "${INFER_LITE_PUBLISH_ROOT}/cxx/lib"
            COMMAND mkdir -p "${INFER_LITE_PUBLISH_ROOT}/bin"
@@ -164,6 +197,36 @@ if (LITE_WITH_CUDA OR LITE_WITH_X86)
 endif()

 if (LITE_WITH_X86)
+  if(WIN32)
+        add_custom_target(publish_inference_x86_cxx_lib ${TARGET}
+            COMMAND ${CMAKE_COMMAND} -E make_directory "${INFER_LITE_PUBLISH_ROOT}/cxx/lib"
+            COMMAND ${CMAKE_COMMAND} -E make_directory "${INFER_LITE_PUBLISH_ROOT}/bin"
+            COMMAND ${CMAKE_COMMAND} -E make_directory "${INFER_LITE_PUBLISH_ROOT}/cxx/include"
+            COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_SOURCE_DIR}/lite/api/paddle_api.h" "${INFER_LITE_PUBLISH_ROOT}/cxx/include"
+            COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_SOURCE_DIR}/lite/api/paddle_place.h" "${INFER_LITE_PUBLISH_ROOT}/cxx/include"
+            COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_SOURCE_DIR}/lite/api/paddle_use_kernels.h" "${INFER_LITE_PUBLISH_ROOT}/cxx/include"
+            COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_SOURCE_DIR}/lite/api/paddle_use_ops.h" "${INFER_LITE_PUBLISH_ROOT}/cxx/include"
+            COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_SOURCE_DIR}/lite/api/paddle_use_passes.h" "${INFER_LITE_PUBLISH_ROOT}/cxx/include"
+            COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_SOURCE_DIR}/lite/api/paddle_lite_factory_helper.h" "${INFER_LITE_PUBLISH_ROOT}/cxx/include"
+            COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_BINARY_DIR}/lite/api/${CMAKE_BUILD_TYPE}/libpaddle_api_full_bundled.lib" "${INFER_LITE_PUBLISH_ROOT}/cxx/lib"
+            COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_BINARY_DIR}/lite/api/${CMAKE_BUILD_TYPE}/libpaddle_api_light_bundled.lib" "${INFER_LITE_PUBLISH_ROOT}/cxx/lib"
+        )
+
+        add_dependencies(publish_inference_x86_cxx_lib bundle_full_api)
+        add_dependencies(publish_inference_x86_cxx_lib bundle_light_api)
+        add_dependencies(publish_inference publish_inference_x86_cxx_lib)
+
+        add_custom_target(publish_inference_x86_cxx_demos ${TARGET}
+            COMMAND ${CMAKE_COMMAND} -E make_directory "${INFER_LITE_PUBLISH_ROOT}/third_party"
+            COMMAND ${CMAKE_COMMAND} -E copy_directory "${CMAKE_BINARY_DIR}/third_party/install" "${INFER_LITE_PUBLISH_ROOT}/third_party"
+            COMMAND ${CMAKE_COMMAND} -E copy_directory "${CMAKE_BINARY_DIR}/third_party/eigen3" "${INFER_LITE_PUBLISH_ROOT}/third_party"
+            COMMAND ${CMAKE_COMMAND} -E make_directory "${INFER_LITE_PUBLISH_ROOT}/demo/cxx"
+        )
+        add_dependencies(publish_inference_x86_cxx_lib publish_inference_x86_cxx_demos)
+        add_dependencies(publish_inference_x86_cxx_demos paddle_api_full_bundled eigen3)
+
+  else()
+
    add_custom_target(publish_inference_x86_cxx_lib ${TARGET}
            COMMAND mkdir -p "${INFER_LITE_PUBLISH_ROOT}/bin"
            COMMAND cp "${CMAKE_BINARY_DIR}/lite/api/test_model_bin" "${INFER_LITE_PUBLISH_ROOT}/bin"
@@ -178,6 +241,7 @@ if (LITE_WITH_X86)
    add_dependencies(publish_inference_x86_cxx_demos paddle_full_api_shared eigen3)
    add_dependencies(publish_inference publish_inference_x86_cxx_lib)
    add_dependencies(publish_inference publish_inference_x86_cxx_demos)
+  endif()
 endif()

 if(LITE_WITH_CUDA)

--- a/lite/api/CMakeLists.txt
+++ b/lite/api/CMakeLists.txt
@@ -23,6 +23,9 @@ if ((NOT LITE_ON_TINY_PUBLISH) AND (LITE_WITH_CUDA OR LITE_WITH_X86 OR LITE_WITH
        if (NOT LITE_ON_MODEL_OPTIMIZE_TOOL)
            add_dependencies(paddle_full_api_shared dynload_mklml)
        endif()
+        if(WIN32)
+             target_link_libraries(paddle_full_api_shared shlwapi.lib)
+        endif()
    endif()
    if(LITE_WITH_CUDA)
        target_link_libraries(paddle_full_api_shared ${math_cuda} "-Wl,--whole-archive" ${cuda_kernels} "-Wl,--no-whole-archive")
@@ -39,7 +42,7 @@ if ((NOT LITE_ON_TINY_PUBLISH) AND (LITE_WITH_CUDA OR LITE_WITH_X86 OR LITE_WITH

    add_dependencies(paddle_light_api_shared op_list_h kernel_list_h)
    target_link_libraries(paddle_light_api_shared ${light_lib_DEPS} ${arm_kernels} ${npu_kernels} ${rknpu_kernels})
-    if(NOT APPLE)
+    if(NOT APPLE AND NOT WIN32)
        set(LINK_MAP_FILE "${PADDLE_SOURCE_DIR}/lite/core/lite.map")
        set(LINK_FLAGS "-Wl,--version-script ${LINK_MAP_FILE}")
        add_custom_command(OUTPUT ${LINK_MAP_FILE} COMMAND ...)

--- a/lite/api/benchmark.cc
+++ b/lite/api/benchmark.cc
@@ -13,7 +13,13 @@
 // limitations under the License.

 #include <gflags/gflags.h>
+#if !defined(_WIN32)
 #include <sys/time.h>
+#else
+#include <windows.h>
+#include "lite/backends/x86/port.h"
+#endif
+#define GLOG_NO_ABBREVIATED_SEVERITIES  // msvc conflict logging with windows.h
 #include <time.h>
 #include <algorithm>
 #include <cstdio>

--- a/lite/api/cxx_api_impl.cc
+++ b/lite/api/cxx_api_impl.cc
@@ -19,6 +19,11 @@
 #include "lite/api/paddle_api.h"
 #include "lite/core/device_info.h"
 #include "lite/core/version.h"
+
+#ifndef LITE_ON_TINY_PUBLISH
+#include "lite/api/paddle_use_passes.h"
+#endif
+
 #if (defined LITE_WITH_X86) && (defined PADDLE_WITH_MKLML) && \
    !(defined LITE_ON_MODEL_OPTIMIZE_TOOL) && !defined(__APPLE__)
 #include <omp.h>

--- a/lite/api/paddle_lite_factory_helper.h
+++ b/lite/api/paddle_lite_factory_helper.h
@@ -18,20 +18,21 @@
 */
 #pragma once

+// some platform-independent defintion
+#include "lite/utils/macros.h"
+
 #define USE_LITE_OP(op_type__)       \
  extern int touch_op_##op_type__(); \
-  int LITE_OP_REGISTER_FAKE(op_type__) __attribute__((unused)) = \
-      touch_op_##op_type__();
+  int LITE_OP_REGISTER_FAKE(op_type__) UNUSED = touch_op_##op_type__();

 #define USE_LITE_KERNEL(op_type__, target__, precision__, layout__, alias__) \
  extern int touch_##op_type__##target__##precision__##layout__##alias__();  \
  int op_type__##target__##precision__##layout__##alias__##__use_lite_kernel \
-      __attribute__((unused)) =                                              \
-          touch_##op_type__##target__##precision__##layout__##alias__();
+      UNUSED = touch_##op_type__##target__##precision__##layout__##alias__();

 #define USE_MIR_PASS(name__)                      \
  extern bool mir_pass_registry##name__##_fake(); \
-  static bool mir_pass_usage##name__ __attribute__((unused)) = \
+  static bool mir_pass_usage##name__ UNUSED =     \
      mir_pass_registry##name__##_fake();

 #define LITE_OP_REGISTER_FAKE(op_type__) op_type__##__registry__
--- a/lite/api/python/__init__.py
+++ b/lite/api/python/__init__.py
@@ -11,3 +11,12 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+import os 
+import sys
+
+if os.name =='nt':
+    current_path = os.path.abspath(os.path.dirname(__file__))
+    third_lib_path = current_path + os.sep + 'libs'
+    os.environ['path'] =  third_lib_path+ ';' + os.environ['path']
+    sys.path.insert(0, third_lib_path)
--- a/lite/api/python/pybind/CMakeLists.txt
+++ b/lite/api/python/pybind/CMakeLists.txt
@@ -3,7 +3,14 @@ if (NOT LITE_ON_TINY_PUBLISH)
   set(PYBIND_DEPS ${PYBIND_DEPS} paddle_api_full opt_base)
 endif()

-lite_cc_library(lite_pybind SHARED SRCS pybind.cc DEPS ${PYBIND_DEPS})
+if(WIN32)
+   lite_cc_library(lite_pybind SHARED SRCS pybind.cc DEPS ${PYBIND_DEPS})
+   get_property (os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
+   target_link_libraries(lite_pybind ${os_dependency_modules})
+else()
+   lite_cc_library(lite_pybind SHARED SRCS pybind.cc DEPS ${PYBIND_DEPS})
+endif(WIN32)
+
 if (LITE_ON_TINY_PUBLISH)
   set_target_properties(lite_pybind PROPERTIES COMPILE_FLAGS "-flto -fdata-sections")
 endif()
--- a/lite/api/python/setup.py.in
+++ b/lite/api/python/setup.py.in
@@ -34,21 +34,28 @@ else:

 # core lib of paddlelite is stored as lite.so
 LITE_PATH = '${PADDLE_BINARY_DIR}/inference_lite_lib/python/install/lite'
-PACKAGE_DATA = {'paddlelite': ['lite.so']}
+PACKAGE_DATA = {'paddlelite': ['lite.so' if os.name!='nt' else 'lite.pyd']}
 # put all thirdparty libraries in paddlelite.libs
 PACKAGE_DATA['paddlelite.libs'] = []
 LIB_PATH = '${PADDLE_BINARY_DIR}/inference_lite_lib/python/install/libs'
 if '${WITH_MKL}' == 'ON':
    shutil.copy('${MKLML_SHARED_IOMP_LIB}', LIB_PATH)
    shutil.copy('${MKLML_SHARED_LIB}', LIB_PATH)
+    if os.name != 'nt':
        PACKAGE_DATA['paddlelite.libs'] += ['libmklml_intel.so', 'libiomp5.so']
-
+    else:
+        PACKAGE_DATA['paddlelite.libs'] += ['libiomp5md.dll', 'mklml.dll']
+        shutil.copy('${MKLML_SHARED_LIB_DEPS}', LIB_PATH)
+        PACKAGE_DATA['paddlelite.libs'] += ['msvcr120.dll']
 # link lite.so to paddlelite.libs
-COMMAND = "patchelf --set-rpath '$ORIGIN/../libs/' ${PADDLE_BINARY_DIR}\
-/inference_lite_lib/python/install/lite/lite.so"
-if os.system(COMMAND) != 0:
+if os.name != 'nt':
+    COMMAND = "patchelf --set-rpath '$ORIGIN/../libs/' ${PADDLE_BINARY_DIR}\
+    /inference_lite_lib/python/install/lite/lite.so"
+    if os.system(COMMAND) != 0:
        raise Exception("patch third_party libs failed, command: %s" % COMMAND)

+  
+
 # remove unused paddle/libs/__init__.py
 if os.path.isfile(LIB_PATH+'/__init__.py'):
    os.remove(LIB_PATH+'/__init__.py')
@@ -61,6 +68,14 @@ PACKAGE_DIR = {
    'paddlelite': LITE_PATH
 }

+if os.name == 'nt':
+    # fix the path separator under windows
+    fix_package_dir = {}
+    for k, v in PACKAGE_DIR.items():
+        fix_package_dir[k] = v.replace('/', '\\')
+    PACKAGE_DIR = fix_package_dir
+
+
 setup(
    name='paddlelite',
    version=PADDLELITE_VERSION,

--- a/lite/api/test_helper.h
+++ b/lite/api/test_helper.h
@@ -15,7 +15,12 @@
 #pragma once

 #include <gflags/gflags.h>
+#if !defined(_WIN32)
 #include <sys/time.h>
+#else
+#include <windows.h>
+#include "lite/backends/x86/port.h"
+#endif
 #include <time.h>
 #include <cmath>


--- a/lite/backends/x86/jit/gen_base.cc
+++ b/lite/backends/x86/jit/gen_base.cc
@@ -28,6 +28,12 @@
 #define posix_memalign_free free
 #endif

+#ifdef _WIN32
+#define posix_memalign_free _aligned_free
+#define posix_memalign(p, a, s) \
+  (((*(p)) = _aligned_malloc((s), (a))), *(p) ? 0 : errno)
+#endif
+
 // DEFINE_bool(dump_jitcode, false, "Whether to dump the jitcode to file");
 bool dump_jitcode = paddle::lite::GetBoolFromEnv("dump_jitcode");

@@ -53,10 +59,14 @@ void GenBase::dumpCode(const unsigned char* code) const {
 void* GenBase::operator new(size_t size) {
  void* ptr;
  constexpr size_t alignment = 32ul;
+#ifdef _WIN32
+  ptr = _aligned_malloc(size, alignment);
+#else
  PADDLE_ENFORCE_EQ(posix_memalign(&ptr, alignment, size),
                    0,
                    "GenBase Alloc %ld error!",
                    size);
+#endif
  PADDLE_ENFORCE(ptr, "Fail to allocate GenBase CPU memory: size = %d .", size);
  return ptr;
 }

--- a/lite/backends/x86/parallel.h
+++ b/lite/backends/x86/parallel.h
@@ -38,7 +38,7 @@ static inline int64_t GetMaxThreads() {
  // Do not support nested omp parallem.
  num_threads = omp_in_parallel() ? 1 : omp_get_max_threads();
 #endif
-  return std::max(num_threads, 1L);
+  return std::max<int>(num_threads, 1L);
 }

 using ThreadHandler =

--- a/lite/backends/x86/port.h
+++ b/lite/backends/x86/port.h
@@ -14,10 +14,10 @@

 #pragma once

+#include <time.h>
 #include <cstdio>
 #include <stdexcept>

-#include <time.h>
 #include <memory>
 #include <string>

@@ -37,7 +37,9 @@
 #define GOOGLE_GLOG_DLL_DECL
 #include <io.h>  // _popen, _pclose
 #include <stdio.h>
+#define NOMINMAX  // msvc max/min macro conflict with std::min/max
 #include <windows.h>
+#include <winsock.h>
 #include <numeric>  // std::accumulate in msvc
 #ifndef S_ISDIR     // windows port for sys/stat.h
 #define S_ISDIR(mode) (((mode)&S_IFMT) == S_IFDIR)
@@ -62,6 +64,7 @@ static void *dlopen(const char *filename, int flag) {
  return reinterpret_cast<void *>(hModule);
 }

+extern struct timeval;
 static int gettimeofday(struct timeval *tp, void *tzp) {
  time_t clock;
  struct tm tm;

--- a/lite/core/mir/pass_registry.h
+++ b/lite/core/mir/pass_registry.h
@@ -59,6 +59,9 @@ class PassRegistry {
 }  // namespace lite
 }  // namespace paddle

+// some platform-independent defintion
+#include "lite/utils/macros.h"
+
 #define REGISTER_MIR_PASS(name__, class__)                                \
  paddle::lite::mir::PassRegistry mir_pass_registry##name__(#name__,      \
                                                            new class__); \
@@ -66,4 +69,4 @@ class PassRegistry {
    return mir_pass_registry##name__.Touch();                             \
  }                                                                       \
  static paddle::lite::mir::PassRegistry mir_pass_registry_func_##name__  \
-      __attribute__((unused)) = mir_pass_registry##name__
+      UNUSED = mir_pass_registry##name__
--- a/lite/core/op_registry.h
+++ b/lite/core/op_registry.h
@@ -465,8 +465,7 @@ class KernelRegistor : public lite::Registor<KernelType> {
    return 0;                                                                 \
  }                                                                           \
  static bool LITE_KERNEL_PARAM_INSTANCE(                                     \
-      op_type__, target__, precision__, layout__, alias__)                     \
-      __attribute__((unused)) =                                                \
+      op_type__, target__, precision__, layout__, alias__) UNUSED =           \
      paddle::lite::ParamTypeRegistry::NewInstance<TARGET(target__),          \
                                                   PRECISION(precision__),    \
                                                   DATALAYOUT(layout__)>(     \

--- a/lite/core/types.h
+++ b/lite/core/types.h
@@ -30,7 +30,7 @@ namespace core {
 // TODO(Superjomn) unify all the type representation across the lite framework.
 enum class Type {
  UNK = -1,
-  // primary types
+  // primary typesINT32,
  INT32,
  INT64,
  FLOAT32,
@@ -92,6 +92,8 @@ Type StdTypeToRepr<float>();
 template <>
 Type StdTypeToRepr<bool>();
 template <>
+Type StdTypeToRepr<double>();
+template <>
 Type StdTypeToRepr<std::vector<char>>();
 template <>
 Type StdTypeToRepr<std::string>();

--- a/lite/demo/cxx/mobile_full/mobilenetv1_full_api.cc
+++ b/lite/demo/cxx/mobile_full/mobilenetv1_full_api.cc
@@ -18,6 +18,11 @@
 #include "paddle_api.h"         // NOLINT
 #include "paddle_use_passes.h"  // NOLINT

+#if defined(_WIN32)
+#include "paddle_use_kernels.h"  // NOLINT
+#include "paddle_use_ops.h"      // NOLINT
+#endif
+
 using namespace paddle::lite_api;  // NOLINT

 DEFINE_string(model_dir, "", "Model dir path.");

--- a/lite/demo/python/mobilenetv1_full_api.py
+++ b/lite/demo/python/mobilenetv1_full_api.py
@@ -23,7 +23,7 @@ import argparse
 import sys
 sys.path.append('../../python/lib')

-from lite_core import *
+from paddlelite.lite import *

 # Command arguments
 parser = argparse.ArgumentParser()

--- a/lite/demo/python/mobilenetv1_light_api.py
+++ b/lite/demo/python/mobilenetv1_light_api.py
@@ -23,7 +23,7 @@ import argparse
 import sys
 sys.path.append('../../python/lib')

-from lite_core import *
+from paddlelite.lite import *

 # Command arguments
 parser = argparse.ArgumentParser()

--- a/lite/fluid/data_type.cc
+++ b/lite/fluid/data_type.cc
@@ -12,6 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+#define GLOG_NO_ABBREVIATED_SEVERITIES  // msvc conflict logging with windows.h
 #include "lite/fluid/data_type.h"
 #include <stdint.h>
 #include <string>

--- a/lite/kernels/npu/bridges/registry.h
+++ b/lite/kernels/npu/bridges/registry.h
@@ -58,14 +58,6 @@ class Registry {
 }  // namespace lite
 }  // namespace paddle

-// some platform-independent defintion
-#if defined(_WIN32)
-#define UNUSED
-#define __builtin_expect(EXP, C) (EXP)
-#else
-#define UNUSED __attribute__((unused))
-#endif
-
 #define STATIC_ASSERT_JITKERNEL_GLOBAL_NAMESPACE_LITE(uniq_name, msg)         \
  struct __test_global_namespace_##uniq_name##__ {};                          \
  static_assert(std::is_same<::__test_global_namespace_##uniq_name##__,       \

--- a/lite/kernels/x86/activation_compute.h
+++ b/lite/kernels/x86/activation_compute.h
@@ -16,6 +16,12 @@
 #include <algorithm>
 #include <utility>
 #include <vector>
+
+#include <cmath>
+#ifndef _USE_MATH_DEFINES
+#define _USE_MATH_DEFINES
+#endif
+
 #include "lite/backends/x86/math/blas.h"
 #include "lite/core/kernel.h"
 #include "lite/core/op_lite.h"

--- a/lite/model_parser/model_parser.cc
+++ b/lite/model_parser/model_parser.cc
@@ -253,7 +253,7 @@ void LoadModelPb(const std::string &model_dir,
      std::string file_path = model_dir + "/" + var.name();
      VLOG(4) << "reading weight " << var.name();

-      std::ifstream file(file_path);
+      std::ifstream file(file_path, std::ios::binary);
      switch (var.type().type()) {
        case framework::proto::VarType_Type_LOD_TENSOR:
          LoadLoDTensor(file, scope->Var(var.name()));

--- a/lite/operators/op_params.h
+++ b/lite/operators/op_params.h
@@ -46,9 +46,9 @@ struct ParamBase {
 using param_t = Any;
 #define WITH_INT8_CONFIG             \
  bool enable_int8{false};           \
-  float input_scale{1.0};            \
+  float input_scale{1.0f};           \
  std::vector<float> weight_scale{}; \
-  float output_scale{1.0};           \
+  float output_scale{1.0f};          \
  int bit_length{8};

 /// ----------------------- Functional operators ------------------------------
@@ -346,8 +346,8 @@ struct ActivationParam : ParamBase {
  lite::Tensor* Prelu_alpha{};  // prelu param
  float Swish_beta;             // swish param
  // hard_sigmoid param
-  float hard_sigmoid_slope{0.2};
-  float hard_sigmoid_offset{0.5};
+  float hard_sigmoid_slope{0.2f};
+  float hard_sigmoid_offset{0.5f};
  // hard_swish param
  float hard_swish_threshold{6.0};
  float hard_swish_scale{6.0};
@@ -654,7 +654,7 @@ struct FakeQuantizeMovingAvgMaxAbsParam : ParamBase {
  lite::Tensor* out_accum{};
  int bit_length;
  bool is_test{true};
-  float moving_rate{0.9};
+  float moving_rate{0.9f};
 };

 struct FakeDequantizeMaxAbsParam : ParamBase {
@@ -748,9 +748,9 @@ struct LrnParam : ParamBase {
  const lite::Tensor* X{};
  lite::Tensor* Out{};
  int n{5};
-  float alpha{1e-4};
-  float beta{0.75};
-  float k{1.};
+  float alpha{1e-4f};
+  float beta{0.75f};
+  float k{1.f};
  std::string norm_region{"AcrossChannels"};
 };

@@ -792,8 +792,8 @@ struct MulticlassNmsParam : ParamBase {
  int background_label{0};
  float score_threshold{};
  int nms_top_k{};
-  float nms_threshold{0.3};
-  float nms_eta{1.0};
+  float nms_threshold{0.3f};
+  float nms_eta{1.0f};
  int keep_top_k;
  bool normalized{true};
 };
@@ -904,7 +904,7 @@ struct NormParam : ParamBase {
  lite::Tensor* Out{};
  lite::Tensor* Norm{};
  int axis{1};
-  float epsilon{1e-10};
+  float epsilon{1e-10f};
 };
 struct LayerNormParam : ParamBase {
  const lite::Tensor* X{};
@@ -914,7 +914,7 @@ struct LayerNormParam : ParamBase {
  lite::Tensor* Mean{};
  lite::Tensor* Variance{};
  int begin_norm_axis{1};
-  float epsilon{1e-5};
+  float epsilon{1e-5f};
 };

 struct LogicalParam : ParamBase {
@@ -1163,8 +1163,8 @@ struct AnchorGeneratorParam : ParamBase {
  std::vector<float> anchor_sizes{};
  std::vector<float> aspect_ratios{};
  std::vector<float> stride{};
-  std::vector<float> variances{{0.1, 0.1, 0.2, 0.2}};
-  float offset{0.5};
+  std::vector<float> variances{{0.1f, 0.1f, 0.2f, 0.2f}};
+  float offset{0.5f};

  lite::Tensor* Anchors{};
  lite::Tensor* Variances{};
@@ -1181,9 +1181,9 @@ struct GenerateProposalsParam : ParamBase {
  // attrs
  int pre_nms_topN{6000};
  int post_nms_topN{1000};
-  float nms_thresh{0.5};
-  float min_size{0.1};
-  float eta{1.0};
+  float nms_thresh{0.5f};
+  float min_size{0.1f};
+  float eta{1.0f};

  // outputs
  lite::Tensor* RpnRois{};

--- a/lite/utils/string.h
+++ b/lite/utils/string.h
@@ -16,6 +16,7 @@
 #include <stdarg.h>  // For va_start, etc.
 #include <algorithm>
 #include <cstring>
+#include <iterator>
 #include <memory>  // For std::unique_ptr
 #include <string>
 #include <vector>

--- a/lite/utils/variant.h
+++ b/lite/utils/variant.h
@@ -21,12 +21,7 @@ limitations under the License. */
 // https://github.com/PaddlePaddle/Paddle/issues/3386

 // some platform-independent defintion
-#if defined(_WIN32)
-#define UNUSED
-#define __builtin_expect(EXP, C) (EXP)
-#else
-#define UNUSED __attribute__((unused))
-#endif
+#include "lite/utils/macros.h"

 #if !defined(_WIN32)
 #define UNLIKELY(condition) __builtin_expect(static_cast<bool>(condition), 0)

--- a/mobile/src/framework/load_ops.h
+++ b/mobile/src/framework/load_ops.h
@@ -14,10 +14,13 @@ limitations under the License. */

 #pragma once

+// some platform-independent defintion
+#include "lite/utils/macros.h"
+
 #ifdef PADDLE_MOBILE_CPU
 #define LOAD_CPU_OP(op_type)                                           \
  extern int TouchOpRegistrar_##op_type##_##cpu();                     \
-  static int use_op_itself_##op_type##_##cpu __attribute__((unused)) = \
+  static int use_op_itself_##op_type##_##cpu UNUSED = \
      TouchOpRegistrar_##op_type##_##cpu()
 #else
 #define LOAD_CPU_OP(op_type)
@@ -26,7 +29,7 @@ limitations under the License. */
 #ifdef PADDLE_MOBILE_CL
 #define LOAD_GPU_CL_OP(op_type)                                       \
  extern int TouchOpRegistrar_##op_type##_##cl();                     \
-  static int use_op_itself_##op_type##_##cl __attribute__((unused)) = \
+  static int use_op_itself_##op_type##_##cl UNUSED = \
      TouchOpRegistrar_##op_type##_##cl()
 #else
 #define LOAD_GPU_CL_OP(op_type)
@@ -35,7 +38,7 @@ limitations under the License. */
 #ifdef PADDLE_MOBILE_FPGA
 #define LOAD_FPGA_OP(op_type)                                           \
  extern int TouchOpRegistrar_##op_type##_##fpga();                     \
-  static int use_op_itself_##op_type##_##fpga __attribute__((unused)) = \
+  static int use_op_itself_##op_type##_##fpga UNUSED = \
      TouchOpRegistrar_##op_type##_##fpga()
 #else
 #define LOAD_FPGA_OP(op_type)
@@ -43,7 +46,7 @@ limitations under the License. */

 #define LOAD_FUSION_MATCHER(op_type)                                       \
  extern int TouchFusionMatcherRegistrar_##op_type();                      \
-  static int use_fusion_matcher_itself_##op_type __attribute__((unused)) = \
+  static int use_fusion_matcher_itself_##op_type UNUSED = \
      TouchFusionMatcherRegistrar_##op_type();

 #define LOAD_OP(op_type)   \