diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake
index 2a028b8dc7e7f04eda63e380ef27d3a415bfd04d..f7cb7716969f5ccaa97d1ad7964510376b86870a 100644
--- a/cmake/external/protobuf.cmake
+++ b/cmake/external/protobuf.cmake
@@ -207,6 +207,10 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST)
     elseif(WITH_IPU)
         SET(PROTOBUF_REPOSITORY  ${GIT_URL}/protocolbuffers/protobuf.git)
         SET(PROTOBUF_TAG         d750fbf648256c7c631f51ffdbf67d7c18b0114e)
+    elseif(WIN32)
+        SET(PROTOBUF_REPOSITORY  ${GIT_URL}/protocolbuffers/protobuf.git)
+        # Change the tag to support building with vs2019
+        SET(PROTOBUF_TAG         01a05a53f40ca2ac5f0af10c6cc0810bee39b792)
     else()
         SET(PROTOBUF_REPOSITORY  ${GIT_URL}/protocolbuffers/protobuf.git)
         SET(PROTOBUF_TAG         9f75c5aa851cd877fb0d93ccc31b8567a6706546)
diff --git a/paddle/fluid/operators/elementwise/elementwise_functor.h b/paddle/fluid/operators/elementwise/elementwise_functor.h
index a62c531ff07336f433a9f2f30cb6595e8a730764..0a6866f578d011c840b3166db7fbe0f9bebe44ce 100644
--- a/paddle/fluid/operators/elementwise/elementwise_functor.h
+++ b/paddle/fluid/operators/elementwise/elementwise_functor.h
@@ -174,6 +174,27 @@ struct FMaxFunctor<paddle::platform::float16> {
   }
 };
 
+template <>
+struct FMaxFunctor<int> {
+  inline HOSTDEVICE int operator()(const int& a, const int& b) const {
+    float float_a = static_cast<float>(a);
+    float float_b = static_cast<float>(b);
+    auto result = std::fmax(float_a, float_b);
+    return std::lrint(result);
+  }
+};
+
+template <>
+struct FMaxFunctor<int64_t> {
+  inline HOSTDEVICE int64_t operator()(const int64_t& a,
+                                       const int64_t& b) const {
+    double double_a = static_cast<double>(a);
+    double double_b = static_cast<double>(b);
+    auto result = std::fmax(double_a, double_b);
+    return std::llrint(result);
+  }
+};
+
 // Fmin
 template <typename T>
 struct FMinFunctor {
@@ -194,6 +215,27 @@ struct FMinFunctor<paddle::platform::float16> {
   }
 };
 
+template <>
+struct FMinFunctor<int> {
+  inline HOSTDEVICE int operator()(const int& a, const int& b) const {
+    float float_a = static_cast<float>(a);
+    float float_b = static_cast<float>(b);
+    auto result = std::fmin(float_a, float_b);
+    return std::lrint(result);
+  }
+};
+
+template <>
+struct FMinFunctor<int64_t> {
+  inline HOSTDEVICE int64_t operator()(const int64_t& a,
+                                       const int64_t& b) const {
+    double double_a = static_cast<double>(a);
+    double double_b = static_cast<double>(b);
+    auto result = std::fmin(double_a, double_b);
+    return std::llrint(result);
+  }
+};
+
 template <typename T>
 struct MulGradFunctor {
   inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a * b; }
diff --git a/paddle/fluid/operators/elementwise/elementwise_pow_op.cu b/paddle/fluid/operators/elementwise/elementwise_pow_op.cu
index 5335f274ef126f228694d1bfb23cb15f6da158ee..a5570f2cb85d5a425e2831baee89c5601054d0d1 100644
--- a/paddle/fluid/operators/elementwise/elementwise_pow_op.cu
+++ b/paddle/fluid/operators/elementwise/elementwise_pow_op.cu
@@ -31,7 +31,8 @@ struct CudaPowFunctor<
   // when cast to int by default and it is wrong.
   // Use llrint to cast it to the nearest integer, which is 3.
   inline HOSTDEVICE T operator()(const T args[]) const {
-    return std::llrint(std::pow(args[0], args[1]));
+    return std::llrint(
+        std::pow(static_cast<double>(args[0]), static_cast<double>(args[1])));
   }
 };
 
diff --git a/paddle/fluid/operators/elementwise/elementwise_pow_op.h b/paddle/fluid/operators/elementwise/elementwise_pow_op.h
old mode 100755
new mode 100644
index ee718a3ecd1ecfae71aa9bf67f24f2a06e1aa7b8..256ab31ead69ca166c2479a85aa21693f4d3865b
--- a/paddle/fluid/operators/elementwise/elementwise_pow_op.h
+++ b/paddle/fluid/operators/elementwise/elementwise_pow_op.h
@@ -31,7 +31,8 @@ struct PowFunctor {
     // when cast to int by default and it is wrong.
     // Use llrint to cast it to the nearest integer, which is 3.
     if (std::is_integral<T>::value) {
-      return std::llrint(std::pow(a, b));
+      return std::llrint(
+          std::pow(static_cast<double>(a), static_cast<double>(b)));
     }
 #endif
     return std::pow(a, b);
@@ -60,13 +61,25 @@ class ElementwisePowKernel : public framework::OpKernel<T> {
 template <typename T>
 struct PowGradDX {
   HOSTDEVICE T operator()(T x, T y, T out, T dout) const {
+#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
+    if (std::is_integral<T>::value) {
+      return dout * y *
+             std::pow(static_cast<double>(x), static_cast<double>(y - 1));
+    }
+#endif
     return dout * y * std::pow(x, y - 1);
   }
 };
 
-template <typename T>
+template <typename T, typename Enable = void>
 struct PowGradDY {
   HOSTDEVICE T operator()(T x, T y, T out, T dout) const {
+#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
+    if (std::is_integral<T>::value) {
+      return dout * std::log(static_cast<double>(x)) *
+             std::pow(static_cast<double>(x), static_cast<double>(y));
+    }
+#endif
     return dout * std::log(x) * std::pow(x, y);
   }
 };
diff --git a/paddle/fluid/operators/svd_helper.h b/paddle/fluid/operators/svd_helper.h
index 8d17ddec6fbb4df536d4734d0cfc25e22e441373..8a3622a6b1b5ef974dc3ec3fe1a04246bc3bd52f 100644
--- a/paddle/fluid/operators/svd_helper.h
+++ b/paddle/fluid/operators/svd_helper.h
@@ -84,7 +84,7 @@ void BatchSvd(const T* X, T* U, T* VH, T* S, int rows, int cols, int batches,
 
 template <typename T>
 struct PowFunctor {
-  PowFunctor(const T* input, T* output, int64_t numel, float exp)
+  PowFunctor(const T* input, T* output, int64_t numel, T exp)
       : input_(input), output_(output), numel_(numel), exp_(exp) {}
 
   HOSTDEVICE void operator()(int64_t idx) const {
@@ -93,7 +93,7 @@ struct PowFunctor {
   const T* input_;
   T* output_;
   int64_t numel_;
-  float exp_;
+  T exp_;
 };
 
 template <typename T>
@@ -297,7 +297,7 @@ struct DeviceIndependenceTensorOperations {
       const framework::ExecutionContext& context)
       : context(context) {}
 
-  framework::Tensor Pow(const framework::Tensor& x, float exp) {
+  framework::Tensor Pow(const framework::Tensor& x, T exp) {
     framework::Tensor out;
     auto for_range = GetForRange(x.numel());
     int numel = x.numel();
diff --git a/paddle/scripts/paddle_build.bat b/paddle/scripts/paddle_build.bat
index 8bb21fa4ef2e1c32913292e64940c00d6bc7f831..f64acbeb72307815300093748e89748d797dbf0e 100644
--- a/paddle/scripts/paddle_build.bat
+++ b/paddle/scripts/paddle_build.bat
@@ -261,6 +261,7 @@ set ON_INFER=ON
 set WITH_TESTING=ON
 set WITH_TENSORRT=ON
 set WITH_INFERENCE_API_TEST=ON
+set WITH_TPCACHE=OFF
 
 call :cmake || goto cmake_error
 call :build || goto build_error
@@ -325,7 +326,11 @@ echo    ========================================
 rem set vs language to english to block showIncludes, this need vs has installed English language package.
 set VSLANG=1033
 rem Configure the environment for 64-bit builds. 'DISTUTILS_USE_SDK' indicates that the user has selected the compiler.
-call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvars64.bat"
+echo %task_name%|findstr wincheck_inference >nul && (
+    call "D:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvars64.bat"
+) || (
+    call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvars64.bat"
+)
 set DISTUTILS_USE_SDK=1
 rem Windows 10 Kit bin dir
 set PATH=C:\Program Files (x86)\Windows Kits\10\bin\10.0.17763.0\x64;%PATH%
diff --git a/paddle/utils/small_vector.h b/paddle/utils/small_vector.h
index e9e7996babcf7ad57a34b972c33eeaf16be90059..48af2491b89f877249bdebaccd9010c07b69bdcf 100644
--- a/paddle/utils/small_vector.h
+++ b/paddle/utils/small_vector.h
@@ -31,6 +31,7 @@
 #include <limits>
 #include <memory>
 #include <new>
+#include <stdexcept>
 #include <string>
 #include <type_traits>
 #include <utility>