diff --git a/cmake/external/xpu.cmake b/cmake/external/xpu.cmake
index 8a927d8e282a03e8a74c0814ee8d9b247451a091..07fe7d245ef57814d704d11be6f6fe45cf514b2d 100644
--- a/cmake/external/xpu.cmake
+++ b/cmake/external/xpu.cmake
@@ -4,7 +4,7 @@ endif()
 
 INCLUDE(ExternalProject)
 SET(XPU_PROJECT                 "extern_xpu")
-SET(XPU_URL    "https://kunlun1.su.bcebos.com/xpu.tar.gz" CACHE STRING "" FORCE)
+SET(XPU_URL    "https://baidu-kunlun-public.su.bcebos.com/paddle_depence/xpu.tar.gz" CACHE STRING "" FORCE)
 SET(XPU_SOURCE_DIR              "${THIRD_PARTY_PATH}/xpu")
 SET(XPU_DOWNLOAD_DIR            "${XPU_SOURCE_DIR}/src/${XPU_PROJECT}")
 SET(XPU_INSTALL_DIR             "${THIRD_PARTY_PATH}/install/xpu")
diff --git a/cmake/generic.cmake b/cmake/generic.cmake
index b0a6dfe29020781e57d57861137861366864abdb..a23862653677d6d92f244503ac3bd95c101e91bf 100644
--- a/cmake/generic.cmake
+++ b/cmake/generic.cmake
@@ -446,6 +446,9 @@ function(nv_library TARGET_NAME)
         message(FATAL "Please specify source file or library in nv_library.")
       endif()
     endif(nv_library_SRCS)
+    if (WIN32)
+      set_target_properties(${TARGET_NAME} PROPERTIES VS_USER_PROPS ${WIN_PROPS})
+    endif(WIN32)
   endif()
 endfunction(nv_library)
 
@@ -461,6 +464,9 @@ function(nv_binary TARGET_NAME)
       add_dependencies(${TARGET_NAME} ${nv_binary_DEPS})
       common_link(${TARGET_NAME})
     endif()
+    if (WIN32)
+      set_target_properties(${TARGET_NAME} PROPERTIES VS_USER_PROPS ${WIN_PROPS})
+    endif(WIN32)
   endif()
 endfunction(nv_binary)
 
@@ -482,6 +488,9 @@ function(nv_test TARGET_NAME)
     set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cpu_deterministic=true)
     set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_init_allocated_mem=true)
     set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cudnn_deterministic=true)
+    if (WIN32)
+      set_target_properties(${TARGET_NAME} PROPERTIES VS_USER_PROPS ${WIN_PROPS})
+    endif(WIN32)
   endif()
 endfunction(nv_test)
 
@@ -712,6 +721,7 @@ function(proto_library TARGET_NAME)
   set(proto_hdrs)
   paddle_protobuf_generate_cpp(proto_srcs proto_hdrs ${proto_library_SRCS})
   cc_library(${TARGET_NAME} SRCS ${proto_srcs} DEPS ${proto_library_DEPS} protobuf)
+  add_dependencies(extern_xxhash ${TARGET_NAME})
 endfunction()
 
 function(py_proto_compile TARGET_NAME)
diff --git a/cmake/inference_lib.cmake b/cmake/inference_lib.cmake
index f19f0eb43d34bd0f3748d7beb1fcf403fa1c9037..f4603051a0e7e9f206d5344fd948f7750a09c173 100644
--- a/cmake/inference_lib.cmake
+++ b/cmake/inference_lib.cmake
@@ -19,9 +19,8 @@ set(PADDLE_INSTALL_DIR "${CMAKE_BINARY_DIR}/paddle_install_dir" CACHE STRING
 set(PADDLE_INFERENCE_INSTALL_DIR "${CMAKE_BINARY_DIR}/paddle_inference_install_dir" CACHE STRING
   "A path setting paddle inference shared and static libraries")
   
-# TODO(zhaolong)
-# At present, the size of static lib in Windows exceeds the system limit,
-# so the generation of static lib is temporarily turned off.
+# At present, the size of static lib in Windows is very large,
+# so we need to crop the library size.
 if(WIN32)
     #todo: remove the option 
     option(WITH_STATIC_LIB "Compile demo with static/shared library, default use dynamic."   OFF)
@@ -196,7 +195,11 @@ set(PADDLE_INFERENCE_C_INSTALL_DIR "${CMAKE_BINARY_DIR}/paddle_inference_c_insta
 copy_part_of_thrid_party(inference_lib_dist ${PADDLE_INFERENCE_C_INSTALL_DIR})
 
 set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid")
-set(paddle_fluid_c_lib ${PADDLE_BINARY_DIR}/paddle/fluid/inference/capi/libpaddle_fluid_c.*)
+if(WIN32)
+  set(paddle_fluid_c_lib ${PADDLE_BINARY_DIR}/paddle/fluid/inference/capi/${CMAKE_BUILD_TYPE}/paddle_fluid_c.*)
+else(WIN32)
+  set(paddle_fluid_c_lib ${PADDLE_BINARY_DIR}/paddle/fluid/inference/capi/libpaddle_fluid_c.*)
+endif(WIN32)
 
 copy(inference_lib_dist
       SRCS  ${src_dir}/inference/capi/paddle_c_api.h  ${paddle_fluid_c_lib}
diff --git a/cmake/init.cmake b/cmake/init.cmake
index 7dfe60f9dd8f021facba6925a465cb58bc5de25d..902dfb11fc0afa40ea5bd2b36543b2432a3bf384 100644
--- a/cmake/init.cmake
+++ b/cmake/init.cmake
@@ -26,4 +26,7 @@ if(WITH_GPU)
     set(CMAKE_CUDA_FLAGS_MINSIZEREL "-O1 -DNDEBUG")
 endif()
 
+if(WIN32)
+    set(WIN_PROPS ${CMAKE_SOURCE_DIR}/cmake/paddle_win.props)
+endif()
 
diff --git a/cmake/operators.cmake b/cmake/operators.cmake
index 21080fbe8fd2e14cf7fd805e01948f2f28535c22..7aa2766763ce9441b0e4de969930af50fb7a55e0 100644
--- a/cmake/operators.cmake
+++ b/cmake/operators.cmake
@@ -62,9 +62,9 @@ function(op_library TARGET)
             endif()
         endif()
         if(WITH_XPU)
-            string(REPLACE "_op" "_xpu_op" XPU_FILE "${TARGET}")
-            if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/xpu/${XPU_FILE}.cc)
-                list(APPEND xpu_cc_srcs xpu/${XPU_FILE}.cc)
+            string(REPLACE "_op" "_op_xpu" XPU_FILE "${TARGET}")
+            if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${XPU_FILE}.cc)
+                list(APPEND xpu_cc_srcs ${XPU_FILE}.cc)
             endif()
         endif()
     else()
@@ -83,7 +83,7 @@ function(op_library TARGET)
                 list(APPEND mkldnn_cc_srcs ${src})
             elseif(${src} MATCHES ".*\\.cu.cc$")
                 list(APPEND cu_cc_srcs ${src})
-            elseif(WITH_XPU AND ${src} MATCHES ".*_xpu_op.cc$")
+            elseif(WITH_XPU AND ${src} MATCHES ".*_op_xpu.cc$")
                 list(APPEND xpu_cc_srcs ${src})
             elseif(${src} MATCHES ".*\\.cc$")
                 list(APPEND cc_srcs ${src})
diff --git a/cmake/paddle_win.props b/cmake/paddle_win.props
new file mode 100644
index 0000000000000000000000000000000000000000..7e434c6d907cc40733a81d8a7cdbe7c285a2bd41
--- /dev/null
+++ b/cmake/paddle_win.props
@@ -0,0 +1,91 @@
+
+
+    
+        
+            
+            true
+            false
+            true
+            false
+            false
+            InheritFromHost
+            InheritFromHost
+            InheritFromHost
+            InheritFromHost
+            InheritFromHost
+
+            -ccbin "%(VCBinDir)" -x cu [GenerateRelocatableDeviceCode] [Include] [RequiredIncludes] [InterleaveSourceInPTX] [GPUDebugInfo] [GenerateLineInfo] [Keep] [KeepDir] [MaxRegCount] [PtxAsOptionV] [TargetMachinePlatform] [NvccCompilation] [CudaRuntime] [AdditionalOptions]
+            --use-local-env
+            [CodeGeneration]
+            -clean
+            
+            -Xcompiler "/EHsc [Warning] /nologo [Optimization] [ProgramDataBaseFileName] $(CudaForceSynchronousPdbWrites) [RuntimeChecks] [Runtime] [TypeInfo]"
+
+            %(BaseCommandLineTemplate) [CompileOut] "%(FullPath)"
+            %(BaseCommandLineTemplate) [HostDebugInfo] [Emulation] [FastMath] [Defines] %(HostCommandLineTemplate) [CompileOut] "%(FullPath)"
+
+            
+# (Approximate command-line.  Settings inherited from host are not visible below.)
+# (Please see the output window after a build for the full command-line)
+
+# Driver API (NVCC Compilation Type is .cubin, .gpu, or .ptx)
+set CUDAFE_FLAGS=--sdk_dir "$(WindowsSdkDir)"
+"$(CudaToolkitNvccPath)" %(BuildCommandLineTemplate) %(DriverApiCommandLineTemplate)
+
+# Runtime API (NVCC Compilation Type is hybrid object or .c file)
+set CUDAFE_FLAGS=--sdk_dir "$(WindowsSdkDir)"
+"$(CudaToolkitNvccPath)" %(BuildCommandLineTemplate) %(RuntimeApiCommandLineTemplate)
+            
+            Compiling CUDA source file %(Identity)...
+            Skipping CUDA source file %(Identity) (excluded from build).
+
+            
+            %(Filename)%(Extension).cache
+            $(IntDir)%(PropsCacheOutputFile)
+
+            $(MSBuildProjectFullPath)
+        
+
+        
+            true
+            $(IntDir)$(TargetName).device-link.obj
+
+            
+            true
+            
+            true
+
+            InheritFromProject
+            InheritFromProject
+
+            
+            
+            
+            
+            
+            
+            
+
+            
+
+            
+            -Xcompiler "/EHsc [Warning] /nologo [Optimization] [RuntimeChecks] [Runtime] [TypeInfo]"
+            "$(CudaToolkitNvccPath)" -dlink [LinkOut] %(HostCommandLineTemplate) [AdditionalLibraryDirectories] [AdditionalDependencies] [AdditionalOptions] [CodeGeneration] [GPUDebugInfo] [TargetMachinePlatform] [Inputs]
+            
+# (Approximate command-line.  Settings inherited from host are not visible below.)
+# (Please see the output window after a build for the full command-line)
+
+%(LinkCommandLineTemplate)
+            
+        
+
+        
+            %(AdditionalLibraryDirectories);$(CudaToolkitLibDir)
+        
+
+        
+            %(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir)
+        
+    
+
+
diff --git a/cmake/third_party.cmake b/cmake/third_party.cmake
index 9edfcb967abc26a25a94d368298c1c475295019f..1eb2096af91dc99ac22b000d2de269bde2efcbbf 100644
--- a/cmake/third_party.cmake
+++ b/cmake/third_party.cmake
@@ -39,6 +39,7 @@ set(third_party_deps)
 #            REPOSITORY ${TARGET_REPOSITORY}
 #            TAG        ${TARGET_TAG}
 #            DIR        ${TARGET_SOURCE_DIR})
+
 FUNCTION(cache_third_party TARGET)
     SET(options "")
     SET(oneValueArgs URL REPOSITORY TAG DIR)
@@ -269,6 +270,10 @@ if(WITH_PSLIB)
     endif()
 endif(WITH_PSLIB)
 
+if(NOT WIN32 AND NOT APPLE)
+    include(external/gloo)
+    list(APPEND third_party_deps extern_gloo)
+endif()
 
 if(WITH_BOX_PS)
     include(external/box_ps)
@@ -276,10 +281,6 @@ if(WITH_BOX_PS)
 endif(WITH_BOX_PS)
 
 if(WITH_DISTRIBUTE)
-    if(WITH_GLOO)
-        include(external/gloo)
-        list(APPEND third_party_deps extern_gloo)
-    endif()
 
     if(WITH_GRPC)
         list(APPEND third_party_deps extern_grpc)
diff --git a/paddle/fluid/framework/attribute.cc b/paddle/fluid/framework/attribute.cc
index 9ca3fe31a33c78621b9e25acaf095e8240af7db6..7460686c1a383531191377cf56ceefa6fbb26a5f 100644
--- a/paddle/fluid/framework/attribute.cc
+++ b/paddle/fluid/framework/attribute.cc
@@ -14,8 +14,6 @@ limitations under the License. */
 
 #include "paddle/fluid/framework/attribute.h"
 
-#include 
-
 namespace paddle {
 namespace framework {
 
diff --git a/paddle/fluid/framework/block_desc.h b/paddle/fluid/framework/block_desc.h
index 5c6e421516269a9b9865605400efa772f944a96f..8c8fcadb05be080699fb6f7a6ab2ac3bc6328c09 100644
--- a/paddle/fluid/framework/block_desc.h
+++ b/paddle/fluid/framework/block_desc.h
@@ -30,6 +30,8 @@ namespace paddle {
 namespace framework {
 
 class ProgramDesc;
+class OpDesc;
+class VarDesc;
 
 // Each Protobuf Message, we provide a XXXBind class. In that class, we optimize
 // read/write speed. Only when we want the protobuf message, the local changes
diff --git a/paddle/fluid/framework/c/c_api.cc b/paddle/fluid/framework/c/c_api.cc
index 0dd2768ccb9ffa1dc7b85dca500095f8c10479c3..48181dac662272679556c444b3055fc950b8c74d 100644
--- a/paddle/fluid/framework/c/c_api.cc
+++ b/paddle/fluid/framework/c/c_api.cc
@@ -12,17 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include 
-#include 
-#include 
-#include 
-
-#include "paddle/fluid/framework/block_desc.h"
 #include "paddle/fluid/framework/c/c_api.h"
-#include "paddle/fluid/framework/op_registry.h"
-#include "paddle/fluid/framework/program_desc.h"
-#include "paddle/fluid/framework/scope.h"
-#include "paddle/fluid/platform/init.h"
 
 extern "C" {
 
diff --git a/paddle/fluid/framework/c/c_api.h b/paddle/fluid/framework/c/c_api.h
index 04dbfbebe5d7646e93678fff3418c4eed6bc691a..a9ec402f381e43b51887b6467d8d1baccf98ad37 100644
--- a/paddle/fluid/framework/c/c_api.h
+++ b/paddle/fluid/framework/c/c_api.h
@@ -24,6 +24,15 @@ limitations under the License. */
 #include "paddle/fluid/framework/op_info.h"
 #include "paddle/fluid/platform/device_context.h"
 
+namespace paddle {
+namespace framework {
+class OpInfoMap;
+}  // namespace framework
+namespace platform {
+class DeviceContextPool;
+}  // namespace platform
+}  // namespace paddle
+
 #ifdef __cplusplus
 extern "C" {
 #endif
diff --git a/paddle/fluid/framework/channel.h b/paddle/fluid/framework/channel.h
index 64a645bf8b28a84b153db74afd2ec268d79ff245..503f1513aad20c1598b034ef1fb90bd0aa1a0224 100644
--- a/paddle/fluid/framework/channel.h
+++ b/paddle/fluid/framework/channel.h
@@ -277,7 +277,7 @@ class ChannelObject {
     size_t finished = 0;
     while (finished < n && WaitForWrite(lock)) {
       size_t m =
-          std::min(n - finished, capacity_ + reading_count_ - data_.size());
+          (std::min)(n - finished, capacity_ + reading_count_ - data_.size());
       for (size_t i = 0; i < m; i++) {
         data_.push_back(std::move(p[finished++]));
       }
diff --git a/paddle/fluid/framework/copy_same_tensor_test.cc b/paddle/fluid/framework/copy_same_tensor_test.cc
index 9350c387a6e49122c6b4c3fe01f5de782a1ae5e4..5b89166e2f4826fe836c74a570abcb8a1790f58e 100644
--- a/paddle/fluid/framework/copy_same_tensor_test.cc
+++ b/paddle/fluid/framework/copy_same_tensor_test.cc
@@ -12,8 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include 
 #include 
+
 #include "gflags/gflags.h"
 #include "gtest/gtest.h"
 #include "paddle/fluid/framework/tensor.h"
diff --git a/paddle/fluid/framework/data_device_transform.h b/paddle/fluid/framework/data_device_transform.h
index 8ff97646cfce7979b1c9c570e6de4f1bd26916c3..60b52a5e7069fb8fdd0664bcfa99eaa000dbff12 100644
--- a/paddle/fluid/framework/data_device_transform.h
+++ b/paddle/fluid/framework/data_device_transform.h
@@ -21,6 +21,8 @@ limitations under the License. */
 namespace paddle {
 namespace framework {
 
+class Tensor;
+
 void TransDataDevice(const Tensor& in, const platform::Place& dst_place,
                      Tensor* out);
 
diff --git a/paddle/fluid/framework/data_feed.h b/paddle/fluid/framework/data_feed.h
index b48d152fe35826363a77104a5cbe39ad800b5eb1..da156bfc5c79f6faf7544794675c40dfd10e7349 100644
--- a/paddle/fluid/framework/data_feed.h
+++ b/paddle/fluid/framework/data_feed.h
@@ -41,6 +41,15 @@ limitations under the License. */
 #include "paddle/fluid/framework/variable.h"
 #include "paddle/fluid/string/string_helper.h"
 
+namespace paddle {
+namespace framework {
+class DataFeedDesc;
+class LoDTensor;
+class Scope;
+class Variable;
+}  // namespace framework
+}  // namespace paddle
+
 namespace paddle {
 namespace framework {
 
@@ -418,6 +427,7 @@ class MultiSlotType {
 
   std::string DebugString() {
     std::stringstream ss;
+
     ss << "\ntype: " << type_ << "\n";
     ss << "offset: ";
     ss << "[";
diff --git a/paddle/fluid/framework/data_feed_factory.cc b/paddle/fluid/framework/data_feed_factory.cc
index 1d8aec7624043f96c5aae908c5faa536e818b5a5..048d539f9b9e5039b7b3b5ec2ae672032f89be48 100644
--- a/paddle/fluid/framework/data_feed_factory.cc
+++ b/paddle/fluid/framework/data_feed_factory.cc
@@ -17,10 +17,10 @@ limitations under the License. */
 #include 
 #include 
 
-#include "paddle/fluid/framework/data_feed.h"
-
 namespace paddle {
 namespace framework {
+class DataFeed;
+
 typedef std::shared_ptr (*Createdata_feedFunction)();
 typedef std::unordered_map data_feedMap;
 data_feedMap g_data_feed_map;
diff --git a/paddle/fluid/framework/data_feed_factory.h b/paddle/fluid/framework/data_feed_factory.h
index 13678edb0b8d084a0b3016d93f6e1bc32ce0169a..49381a98706ddedb9bb0b5900fcb85ac4d4a3719 100644
--- a/paddle/fluid/framework/data_feed_factory.h
+++ b/paddle/fluid/framework/data_feed_factory.h
@@ -16,10 +16,13 @@ limitations under the License. */
 
 #include 
 #include 
+
 #include "paddle/fluid/framework/data_feed.h"
 
 namespace paddle {
 namespace framework {
+class DataFeed;
+
 class DataFeedFactory {
  public:
   static std::string DataFeedTypeList();
diff --git a/paddle/fluid/framework/data_layout_transform.cc b/paddle/fluid/framework/data_layout_transform.cc
index f757e244e38ec965d62d673e63ed082ca70c63c7..108cd9ac6d1c0778b7f614116b5739502fcfb0ee 100644
--- a/paddle/fluid/framework/data_layout_transform.cc
+++ b/paddle/fluid/framework/data_layout_transform.cc
@@ -13,8 +13,8 @@
 // limitations under the License.
 
 #include "paddle/fluid/framework/data_layout_transform.h"
+
 #include 
-#include 
 
 #include "paddle/fluid/operators/math/math_function.h"
 #ifdef PADDLE_WITH_MKLDNN
diff --git a/paddle/fluid/framework/data_layout_transform.h b/paddle/fluid/framework/data_layout_transform.h
index b92c47c2eb018603e1b3156921fb2c1702864c57..238f2d2e67914c7ae1443d09cf915439ebad4dd5 100644
--- a/paddle/fluid/framework/data_layout_transform.h
+++ b/paddle/fluid/framework/data_layout_transform.h
@@ -17,10 +17,18 @@
 #include