diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake
index f507bb41a1103c093e9569176ee868cfaac6bf7b..1cc882cce7961726b96b55ccbe3913a01c0c58be 100644
--- a/cmake/cuda.cmake
+++ b/cmake/cuda.cmake
@@ -157,6 +157,9 @@ list(APPEND EXTERNAL_LIBS ${CUDA_LIBRARIES} ${CUDA_rt_LIBRARY})
 if(NOT WITH_DSO)
     # TODO(panyx0718): CUPTI only allows DSO?
     list(APPEND EXTERNAL_LIBS ${CUDNN_LIBRARY} ${CUPTI_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY} ${NCCL_LIBRARY})
+    if(WIN32)
+      set_property(GLOBAL PROPERTY CUDA_MODULES ${CUDNN_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY})
+    endif(WIN32)
 endif(NOT WITH_DSO)
 
 # setting nvcc arch flags
diff --git a/paddle/fluid/inference/CMakeLists.txt b/paddle/fluid/inference/CMakeLists.txt
index 921bca77e9bb46999199e9ed7322a657d6b14b3e..c8a950fce0bfc2dd0856c48f7026fa7334eedeaf 100644
--- a/paddle/fluid/inference/CMakeLists.txt
+++ b/paddle/fluid/inference/CMakeLists.txt
@@ -13,10 +13,14 @@ cc_library(paddle_fluid_api
     DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB}) 
 
 get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES)
+get_property(cuda_modules GLOBAL PROPERTY CUDA_MODULES)
 
 # paddle_fluid_origin exclude inference api interface
 if(WIN32)
   sep_library(paddle_fluid_origin DEPS ${fluid_modules} paddle_fluid_api)
+  if(WITH_GPU AND NOT WITH_DSO)
+    target_link_libraries(paddle_fluid_origin ${cuda_modules})
+  endif(WITH_GPU AND NOT WITH_DSO)
 else(WIN32)
   cc_library(paddle_fluid_origin DEPS ${fluid_modules} paddle_fluid_api)
 endif(WIN32)
@@ -36,6 +40,9 @@ endif()
 # Create static library
 if(WIN32)
   sep_library(paddle_fluid DEPS ${fluid_modules} ${STATIC_INFERENCE_APIS} zero_copy_tensor reset_tensor_array)
+  if(WITH_GPU AND NOT WITH_DSO)
+    target_link_libraries(paddle_fluid ${cuda_modules})
+  endif(WITH_GPU AND NOT WITH_DSO)
 else(WIN32)
   cc_library(paddle_fluid DEPS ${fluid_modules} ${STATIC_INFERENCE_APIS} zero_copy_tensor reset_tensor_array)
 endif(WIN32)
@@ -50,6 +57,9 @@ endif()
 if(WIN32)
   sep_library(paddle_fluid_shared SHARED SRCS ${SHARED_INFERENCE_SRCS}
           DEPS ${fluid_modules} paddle_fluid_api reset_tensor_array)
+  if(WITH_GPU AND NOT WITH_DSO)
+    target_link_libraries(paddle_fluid_origin ${cuda_modules})
+  endif(WITH_GPU AND NOT WITH_DSO)
 else(WIN32)
   cc_library(paddle_fluid_shared SHARED SRCS ${SHARED_INFERENCE_SRCS}
       DEPS ${fluid_modules} paddle_fluid_api reset_tensor_array)
diff --git a/paddle/fluid/operators/pad_constant_like_op.cc b/paddle/fluid/operators/pad_constant_like_op.cc
index 37646c7b4c50fc7409002aca56e5462bde93cc30..685ebc393794337e03de0b9ce5134d6ce382c8bf 100644
--- a/paddle/fluid/operators/pad_constant_like_op.cc
+++ b/paddle/fluid/operators/pad_constant_like_op.cc
@@ -74,7 +74,7 @@ PadConstantLikeOp Operator.
 
 Pad input(Y) with a pad_value, the number of values padded to the edges of each
 axis is specified by the difference of the shape of X and Y.
-((0, shape_x_0 - shape_y_0), … (0, shape_x_n - shape_y_n)) unique pad widths for
+((0, shape_x_0 - shape_y_0), ... (0, shape_x_n - shape_y_n)) unique pad widths for
 each axis.
 The input should be a k-D tensor(k > 0 and k < 7). As an example:
 
diff --git a/paddle/fluid/operators/roi_pool_op.cc b/paddle/fluid/operators/roi_pool_op.cc
index 8e29761ec208764e263e357a0b3c9456c932d093..043ea680d1506e7b7e33ba5537a71f37feaf81be 100644
--- a/paddle/fluid/operators/roi_pool_op.cc
+++ b/paddle/fluid/operators/roi_pool_op.cc
@@ -122,7 +122,7 @@ class ROIPoolOpMaker : public framework::OpProtoAndCheckerMaker {
               "(Tensor), "
               "Argmaxes corresponding to indices in X used "
               "for gradient computation. Only output "
-              "if arg “is_test” is false.")
+              "if arg \"is_test\" is false.")
         .AsIntermediate();
     AddAttr<float>("spatial_scale",
                    "(float, default 1.0), "
diff --git a/paddle/fluid/operators/unpool_op.cc b/paddle/fluid/operators/unpool_op.cc
index 1d441b43b14ea194152095874645f8133c423efd..6d2ccb38f677962d52ff97df25321bf195759dcd 100644
--- a/paddle/fluid/operators/unpool_op.cc
+++ b/paddle/fluid/operators/unpool_op.cc
@@ -57,8 +57,8 @@ class Unpool2dOpMaker : public framework::OpProtoAndCheckerMaker {
 Input shape is: $(N, C_{in}, H_{in}, W_{in})$, Output shape is:
 $(N, C_{out}, H_{out}, W_{out})$, where
 $$
-H_{out} = (H_{in}−1) * strides[0] − 2 * paddings[0] + ksize[0] \\
-W_{out} = (W_{in}−1) * strides[1] − 2 * paddings[1] + ksize[1]
+H_{out} = (H_{in}-1) * strides[0] - 2 * paddings[0] + ksize[0] \\
+W_{out} = (W_{in}-1) * strides[1] - 2 * paddings[1] + ksize[1]
 $$
 Paper: http://www.matthewzeiler.com/wp-content/uploads/2017/07/iccv2011.pdf
 )DOC");
diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt
index a4baa37c320bd34a6be0255f87b7e153ea33665f..6afa53cd36dee2004ada707a52995ce33b3650e9 100644
--- a/paddle/fluid/pybind/CMakeLists.txt
+++ b/paddle/fluid/pybind/CMakeLists.txt
@@ -22,6 +22,10 @@ if(WITH_PYTHON)
   endif(WITH_AMD_GPU)
 
   if(WIN32)
+    if(WITH_GPU AND NOT WITH_DSO)
+      get_property(cuda_modules GLOBAL PROPERTY CUDA_MODULES)
+      target_link_libraries(paddle_pybind ${cuda_modules})
+    endif(WITH_GPU AND NOT WITH_DSO)
     target_link_libraries(paddle_pybind shlwapi)
   endif(WIN32)
 
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 391094b5b2d5114da8db0e4b71ba03f89c5c9715..879d4d6bf91e2f292a6e02d08bbfdb7fa2d10715 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -61,12 +61,13 @@ IF(WIN32)
 	add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
 #		COMMAND ${CMAKE_COMMAND} -E touch stub.cc
         COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_BINARY_DIR}/python/paddle
+        COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_BINARY_DIR}/python/paddle/libs
         COMMAND ${CMAKE_COMMAND} -E copy ${PADDLE_SOURCE_DIR}/python/paddle ${PADDLE_BINARY_DIR}/python/paddle
 		COMMAND ${CMAKE_COMMAND} -E copy ${PADDLE_SOURCE_DIR}/paddle/py_paddle ${PADDLE_BINARY_DIR}/python/
 		COMMAND ${CMAKE_COMMAND} -E env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
 		COMMAND ${CMAKE_COMMAND} -E touch ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
-		COMMAND ${CMAKE_COMMAND} -E remove_directory ${PADDLE_PYTHON_BUILD_DIR}/lib-python
-		COMMAND ${CMAKE_COMMAND} -E copy ${PADDLE_PYTHON_BUILD_DIR}/libs ${PADDLE_PYTHON_BUILD_DIR}/lib-python
+#		COMMAND ${CMAKE_COMMAND} -E remove_directory ${PADDLE_PYTHON_BUILD_DIR}/libs
+#		COMMAND ${CMAKE_COMMAND} -E copy ${PADDLE_PYTHON_BUILD_DIR}/libs ${PADDLE_PYTHON_BUILD_DIR}/libs
 		DEPENDS gen_proto_py copy_paddle_pybind ${FLUID_CORE} framework_py_proto profiler_py_proto ${PY_FILES} ${external_project_dependencies} ${COPY_PADDLE_MASTER})
 ELSE(WIN32)
 	add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py
index 737c8be8147a7efaf9b89827f063430146d3c078..70c1f9589967e952598a112233b180e2865a8e40 100644
--- a/python/paddle/fluid/__init__.py
+++ b/python/paddle/fluid/__init__.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 from __future__ import print_function
+import os
 # import all class inside framework into fluid module
 from . import framework
 from .framework import *
@@ -43,16 +44,17 @@ from .lod_tensor import create_lod_tensor, create_random_int_lodtensor
 from . import clip
 from . import profiler
 from . import unique_name
-from . import recordio_writer
-from . import parallel_executor
-from .parallel_executor import *
+if os.name != 'nt':
+    from . import recordio_writer
+    from . import parallel_executor
+    from .parallel_executor import *
 from paddle.fluid.layers.math_op_patch import monkey_patch_variable
 
 Tensor = LoDTensor
 
 __all__ = framework.__all__ + executor.__all__ + \
     trainer.__all__ + inferencer.__all__ + transpiler.__all__ + \
-    parallel_executor.__all__ + lod_tensor.__all__ + [
+    lod_tensor.__all__ + [
         'io',
         'initializer',
         'layers',
@@ -78,7 +80,8 @@ __all__ = framework.__all__ + executor.__all__ + \
         'recordio_writer',
         'Scope',
     ]
-
+if os.name != 'nt':
+    __all__ += parallel_executor.__all__
 
 def __bootstrap__():
     """
@@ -110,12 +113,16 @@ def __bootstrap__():
     os.environ['OMP_NUM_THREADS'] = str(num_threads)
 
     read_env_flags = [
-        'use_pinned_memory', 'check_nan_inf', 'benchmark', 'warpctc_dir',
+        'use_pinned_memory', 'check_nan_inf', 'benchmark',
         'eager_delete_scope', 'use_mkldnn', 'initial_cpu_memory_in_mb',
         'init_allocated_mem', 'free_idle_memory', 'paddle_num_threads',
-        'dist_threadpool_size', 'cpu_deterministic', 'eager_delete_tensor_gb',
+        'dist_threadpool_size', 'eager_delete_tensor_gb',
         'reader_queue_speed_test_mode'
     ]
+    if os.name != 'nt':
+        read_env_flags.append('warpctc_dir')
+        read_env_flags.append('cpu_deterministic')
+
     if core.is_compiled_with_dist():
         read_env_flags.append('rpc_deadline')
         read_env_flags.append('rpc_server_profile_period')
diff --git a/python/paddle/fluid/contrib/inferencer.py b/python/paddle/fluid/contrib/inferencer.py
index b8d5f4ffeadca0a7b103682f175d50dc46fa258a..b966ae01d039d7e9510dae73ecadb97b494f68c2 100644
--- a/python/paddle/fluid/contrib/inferencer.py
+++ b/python/paddle/fluid/contrib/inferencer.py
@@ -15,13 +15,15 @@
 from __future__ import print_function
 
 import contextlib
+import os
 
 from .. import core
 
 from .. import executor
 from .. import framework
 from .. import io
-from .. import parallel_executor
+if os.name != 'nt':
+    from .. import parallel_executor
 from .. import unique_name
 from .trainer import check_and_get_place
 
diff --git a/python/paddle/fluid/contrib/trainer.py b/python/paddle/fluid/contrib/trainer.py
index 8569e486f91786b5562e84dcdccf6d91da0612cc..096821a5ba690074ecbf023cf87fed7e206d023f 100644
--- a/python/paddle/fluid/contrib/trainer.py
+++ b/python/paddle/fluid/contrib/trainer.py
@@ -28,7 +28,8 @@ from .. import framework
 from .. import io
 # optimizer is same as the parameter of Trainer.__init__. Rename it to opt_module
 from .. import optimizer as opt_module
-from .. import parallel_executor
+if os.name != 'nt':
+    from .. import parallel_executor
 from ..transpiler import distribute_transpiler
 
 __all__ = [
diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py
index fd03dff386cad21c727ca0f266fa1b37ad65b4ad..0282ffec16737e80c52ec3fa63da9ad15f51701e 100644
--- a/python/paddle/fluid/framework.py
+++ b/python/paddle/fluid/framework.py
@@ -536,7 +536,7 @@ class Operator(object):
     OP_WITHOUT_KERNEL_SET = {
         'feed', 'fetch', 'save', 'load', 'recurrent', 'go',
         'rnn_memory_helper_grad', 'conditional_block', 'while', 'send', 'recv',
-        'listen_and_serv', 'parallel_do', 'save_combine', 'load_combine',
+        'listen_and_serv', 'parallel_do', 'save_combine', 'loadload_combine',
         'ncclInit', 'select', 'checkpoint_notify', 'gen_nccl_id'
     }
 
diff --git a/python/paddle/fluid/layers/io.py b/python/paddle/fluid/layers/io.py
index 95e13669ad927cdda56153c7fe58325f4a3ec474..e2d304dc86e731067e63cef4ee905368965365c6 100644
--- a/python/paddle/fluid/layers/io.py
+++ b/python/paddle/fluid/layers/io.py
@@ -15,6 +15,7 @@
 from __future__ import print_function
 import contextlib
 import multiprocessing
+import os
 import six
 import threading
 
@@ -344,70 +345,71 @@ def _copy_reader_create_op_(block, op):
     return new_op
 
 
-@templatedoc(op_type='create_recordio_file_reader')
-def open_recordio_file(filename,
-                       shapes,
-                       lod_levels,
-                       dtypes,
-                       pass_num=1,
-                       for_parallel=True):
-    """
-    ${comment}
-
-    Args:
-       filename(${filename_type}): ${filename_comment}.
-       shapes(list): List of tuples which declaring data shapes.
-       lod_levels(${lod_levels_type}): ${lod_levels_comment}.
-       dtypes(list): List of strs which declaring data type.
-       pass_num(int): Number of passes to run.
-       for_parallel(Bool): Set it as True if you are going to run
-            subsequent operators in parallel.
-
-    Returns:
-       ${out_comment}.
-
-    Examples:
-
-        >>> import paddle.fluid as fluid
-        >>> reader = fluid.layers.io.open_recordio_file(
-        >>>                               filename='./data.recordio',
-        >>>                               shapes=[(3,224,224), (1)],
-        >>>                               lod_levels=[0, 0],
-        >>>                               dtypes=['float32', 'int64'])
-        >>> # Via the reader, we can use 'read_file' layer to get data:
-        >>> image, label = fluid.layers.io.read_file(reader)
-    """
-    dtypes = [convert_np_dtype_to_dtype_(dt) for dt in dtypes]
-    shape_concat = []
-    ranks = []
-
-    for shape in shapes:
-        shape_concat.extend(shape)
-        ranks.append(len(shape))
-
-    var_name = unique_name('open_recordio_file')
-
-    startup_blk = default_startup_program().current_block()
-    startup_var = startup_blk.create_var(name=var_name)
-    startup_blk.append_op(
-        type='create_recordio_file_reader',
-        outputs={'Out': [startup_var]},
-        attrs={
-            'shape_concat': shape_concat,
-            'lod_levels': lod_levels,
-            'filename': filename,
-            'ranks': ranks
-        })
+if os.name != 'nt':
+    @templatedoc(op_type='create_recordio_file_reader')
+    def open_recordio_file(filename,
+                           shapes,
+                           lod_levels,
+                           dtypes,
+                           pass_num=1,
+                           for_parallel=True):
+        """
+        ${comment}
+
+        Args:
+           filename(${filename_type}): ${filename_comment}.
+           shapes(list): List of tuples which declaring data shapes.
+           lod_levels(${lod_levels_type}): ${lod_levels_comment}.
+           dtypes(list): List of strs which declaring data type.
+           pass_num(int): Number of passes to run.
+           for_parallel(Bool): Set it as True if you are going to run
+                subsequent operators in parallel.
+
+        Returns:
+           ${out_comment}.
+
+        Examples:
+
+            >>> import paddle.fluid as fluid
+            >>> reader = fluid.layers.io.open_recordio_file(
+            >>>                               filename='./data.recordio',
+            >>>                               shapes=[(3,224,224), (1)],
+            >>>                               lod_levels=[0, 0],
+            >>>                               dtypes=['float32', 'int64'])
+            >>> # Via the reader, we can use 'read_file' layer to get data:
+            >>> image, label = fluid.layers.io.read_file(reader)
+        """
+        dtypes = [convert_np_dtype_to_dtype_(dt) for dt in dtypes]
+        shape_concat = []
+        ranks = []
+
+        for shape in shapes:
+            shape_concat.extend(shape)
+            ranks.append(len(shape))
+
+        var_name = unique_name('open_recordio_file')
+
+        startup_blk = default_startup_program().current_block()
+        startup_var = startup_blk.create_var(name=var_name)
+        startup_blk.append_op(
+            type='create_recordio_file_reader',
+            outputs={'Out': [startup_var]},
+            attrs={
+                'shape_concat': shape_concat,
+                'lod_levels': lod_levels,
+                'filename': filename,
+                'ranks': ranks
+            })
 
-    startup_var.desc.set_dtypes(dtypes)
-    startup_var.persistable = True
-    main_prog_var = _copy_reader_var_(default_main_program().current_block(),
-                                      startup_var)
+        startup_var.desc.set_dtypes(dtypes)
+        startup_var.persistable = True
+        main_prog_var = _copy_reader_var_(default_main_program().current_block(),
+                                          startup_var)
 
-    if pass_num > 1:
-        main_prog_var = multi_pass(reader=main_prog_var, pass_num=pass_num)
+        if pass_num > 1:
+            main_prog_var = multi_pass(reader=main_prog_var, pass_num=pass_num)
 
-    return monkey_patch_reader_methods(main_prog_var)
+        return monkey_patch_reader_methods(main_prog_var)
 
 
 def random_data_generator(low, high, shapes, lod_levels, for_parallel=True):
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 110e6d5ab236a9baa645ad02ba69c59673152024..d201357e6f8607f4d5f0405889958d5282b74c23 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -18,6 +18,7 @@ All layers just related to the neural network.
 from __future__ import print_function
 
 import numpy as np
+import os
 from ..layer_helper import LayerHelper
 from ..initializer import Normal, Constant
 from ..framework import Variable, OpProtoHolder
@@ -31,12 +32,10 @@ from functools import reduce
 __all__ = [
     'fc',
     'embedding',
-    'dynamic_lstm',
     'dynamic_lstmp',
     'dynamic_gru',
     'gru_unit',
     'linear_chain_crf',
-    'crf_decoding',
     'cos_sim',
     'cross_entropy',
     'square_error_cost',
@@ -95,7 +94,6 @@ __all__ = [
     'pad',
     'pad_constant_like',
     'label_smooth',
-    'roi_pool',
     'roi_align',
     'dice_loss',
     'image_resize',
@@ -160,6 +158,10 @@ __all__ = [
     'log_loss',
     'add_position_encoding',
 ]
+if os.name != 'nt':
+    __all__.append('dynamic_lstm')
+    __all__.append('crf_decoding')
+    __all__.append('roi_pool')
 
 
 def fc(input,
@@ -334,126 +336,127 @@ def embedding(input,
     return tmp
 
 
-@templatedoc(op_type="lstm")
-def dynamic_lstm(input,
-                 size,
-                 h_0=None,
-                 c_0=None,
-                 param_attr=None,
-                 bias_attr=None,
-                 use_peepholes=True,
-                 is_reverse=False,
-                 gate_activation='sigmoid',
-                 cell_activation='tanh',
-                 candidate_activation='tanh',
-                 dtype='float32',
-                 name=None):
-    """
-    ${comment}
-
-    Args:
-        input (Variable): ${input_comment}
-        size (int): 4 * hidden size.
-        h_0(Variable): The initial hidden state is an optional input, default is zero.
-                       This is a tensor with shape (N x D), where N is the
-                       batch size and D is the hidden size.
-        c_0(Variable): The initial cell state is an optional input, default is zero.
-                       This is a tensor with shape (N x D), where N is the
-                       batch size. `h_0` and `c_0` can be NULL but only at the same time.
-        param_attr(ParamAttr|None): The parameter attribute for the learnable
-                               hidden-hidden weights.
-
-                               - Weights = {:math:`W_{ch}, W_{ih}, \
-                                                W_{fh}, W_{oh}`}
-                               - The shape is (D x 4D), where D is the hidden
-                                 size.
-
-                               If it is set to None or one attribute of ParamAttr,
-                               dynamic_lstm will create ParamAttr as param_attr.
-                               If the Initializer of the param_attr is not set, the
-                               parameter is initialized with Xavier. Default: None.
-        bias_attr (ParamAttr|None): The bias attribute for the learnable bias
-                              weights, which contains two parts, input-hidden
-                              bias weights and peephole connections weights if
-                              setting `use_peepholes` to `True`.
-
-                              1. `use_peepholes = False`
-                                 - Biases = {:math:`b_c, b_i, b_f, b_o`}.
-                                 - The shape is (1 x 4D).
-                              2. `use_peepholes = True`
-                                 - Biases = { :math:`b_c, b_i, b_f, b_o, W_{ic}, \
-                                                 W_{fc}, W_{oc}`}.
-                                 - The shape is (1 x 7D).
-
-                              If it is set to None or one attribute of ParamAttr,
-                              dynamic_lstm will create ParamAttr as bias_attr.
-                              If the Initializer of the bias_attr is not set,
-                              the bias is initialized zero. Default: None.
-        use_peepholes (bool): ${use_peepholes_comment}
-        is_reverse (bool): ${is_reverse_comment}
-        gate_activation (str): ${gate_activation_comment}
-        cell_activation (str): ${cell_activation_comment}
-        candidate_activation (str): ${candidate_activation_comment}
-        dtype (str): Data type. Choices = ["float32", "float64"], default "float32".
-        name (str|None): A name for this layer(optional). If set None, the layer
-                         will be named automatically.
-
-    Returns:
-        tuple: The hidden state, and cell state of LSTM. The shape of both \
-        is (T x D), and lod is the same with the `input`.
-
-    Examples:
-        .. code-block:: python
-
-            hidden_dim = 512
-            forward_proj = fluid.layers.fc(input=input_seq, size=hidden_dim * 4,
-                                           bias_attr=False)
-            forward, _ = fluid.layers.dynamic_lstm(
-                input=forward_proj, size=hidden_dim * 4, use_peepholes=False)
-    """
-    assert bias_attr is not False, "bias_attr should not be False in dynamic_lstmp."
-    helper = LayerHelper('lstm', **locals())
-    size = size // 4
-    weight = helper.create_parameter(
-        attr=helper.param_attr, shape=[size, 4 * size], dtype=dtype)
-    bias_size = [1, 7 * size]
-    if not use_peepholes:
-        bias_size[1] = 4 * size
-    bias = helper.create_parameter(
-        attr=helper.bias_attr, shape=bias_size, dtype=dtype, is_bias=True)
+if os.name != 'nt':
+    @templatedoc(op_type="lstm")
+    def dynamic_lstm(input,
+                     size,
+                     h_0=None,
+                     c_0=None,
+                     param_attr=None,
+                     bias_attr=None,
+                     use_peepholes=True,
+                     is_reverse=False,
+                     gate_activation='sigmoid',
+                     cell_activation='tanh',
+                     candidate_activation='tanh',
+                     dtype='float32',
+                     name=None):
+        """
+        ${comment}
+
+        Args:
+            input (Variable): ${input_comment}
+            size (int): 4 * hidden size.
+            h_0(Variable): The initial hidden state is an optional input, default is zero.
+                           This is a tensor with shape (N x D), where N is the
+                           batch size and D is the hidden size.
+            c_0(Variable): The initial cell state is an optional input, default is zero.
+                           This is a tensor with shape (N x D), where N is the
+                           batch size. `h_0` and `c_0` can be NULL but only at the same time.
+            param_attr(ParamAttr|None): The parameter attribute for the learnable
+                                   hidden-hidden weights.
+
+                                   - Weights = {:math:`W_{ch}, W_{ih}, \
+                                                    W_{fh}, W_{oh}`}
+                                   - The shape is (D x 4D), where D is the hidden
+                                     size.
+
+                                   If it is set to None or one attribute of ParamAttr,
+                                   dynamic_lstm will create ParamAttr as param_attr.
+                                   If the Initializer of the param_attr is not set, the
+                                   parameter is initialized with Xavier. Default: None.
+            bias_attr (ParamAttr|None): The bias attribute for the learnable bias
+                                  weights, which contains two parts, input-hidden
+                                  bias weights and peephole connections weights if
+                                  setting `use_peepholes` to `True`.
+
+                                  1. `use_peepholes = False`
+                                     - Biases = {:math:`b_c, b_i, b_f, b_o`}.
+                                     - The shape is (1 x 4D).
+                                  2. `use_peepholes = True`
+                                     - Biases = { :math:`b_c, b_i, b_f, b_o, W_{ic}, \
+                                                     W_{fc}, W_{oc}`}.
+                                     - The shape is (1 x 7D).
+
+                                  If it is set to None or one attribute of ParamAttr,
+                                  dynamic_lstm will create ParamAttr as bias_attr.
+                                  If the Initializer of the bias_attr is not set,
+                                  the bias is initialized zero. Default: None.
+            use_peepholes (bool): ${use_peepholes_comment}
+            is_reverse (bool): ${is_reverse_comment}
+            gate_activation (str): ${gate_activation_comment}
+            cell_activation (str): ${cell_activation_comment}
+            candidate_activation (str): ${candidate_activation_comment}
+            dtype (str): Data type. Choices = ["float32", "float64"], default "float32".
+            name (str|None): A name for this layer(optional). If set None, the layer
+                             will be named automatically.
+
+        Returns:
+            tuple: The hidden state, and cell state of LSTM. The shape of both \
+            is (T x D), and lod is the same with the `input`.
+
+        Examples:
+            .. code-block:: python
+
+                hidden_dim = 512
+                forward_proj = fluid.layers.fc(input=input_seq, size=hidden_dim * 4,
+                                               bias_attr=False)
+                forward, _ = fluid.layers.dynamic_lstm(
+                    input=forward_proj, size=hidden_dim * 4, use_peepholes=False)
+        """
+        assert bias_attr is not False, "bias_attr should not be False in dynamic_lstmp."
+        helper = LayerHelper('lstm', **locals())
+        size = size // 4
+        weight = helper.create_parameter(
+            attr=helper.param_attr, shape=[size, 4 * size], dtype=dtype)
+        bias_size = [1, 7 * size]
+        if not use_peepholes:
+            bias_size[1] = 4 * size
+        bias = helper.create_parameter(
+            attr=helper.bias_attr, shape=bias_size, dtype=dtype, is_bias=True)
 
-    hidden = helper.create_variable_for_type_inference(dtype)
-    cell = helper.create_variable_for_type_inference(dtype)
-    batch_gate = helper.create_variable_for_type_inference(dtype)
-    batch_cell_pre_act = helper.create_variable_for_type_inference(dtype)
-    inputs = {'Input': input, 'Weight': weight, 'Bias': bias}
-    batch_size = input.shape[0]
-    if h_0:
-        assert h_0.shape == (batch_size, size), \
-            'The shape of h0 should be (batch_size, %d)' % size
-        inputs['H0'] = h_0
-    if c_0:
-        assert c_0.shape == (batch_size, size), \
-            'The shape of c0 should be (batch_size, %d)' % size
-        inputs['C0'] = c_0
+        hidden = helper.create_variable_for_type_inference(dtype)
+        cell = helper.create_variable_for_type_inference(dtype)
+        batch_gate = helper.create_variable_for_type_inference(dtype)
+        batch_cell_pre_act = helper.create_variable_for_type_inference(dtype)
+        inputs = {'Input': input, 'Weight': weight, 'Bias': bias}
+        batch_size = input.shape[0]
+        if h_0:
+            assert h_0.shape == (batch_size, size), \
+                'The shape of h0 should be (batch_size, %d)' % size
+            inputs['H0'] = h_0
+        if c_0:
+            assert c_0.shape == (batch_size, size), \
+                'The shape of c0 should be (batch_size, %d)' % size
+            inputs['C0'] = c_0
 
-    helper.append_op(
-        type='lstm',
-        inputs=inputs,
-        outputs={
-            'Hidden': hidden,
-            'Cell': cell,
-            'BatchGate': batch_gate,
-            'BatchCellPreAct': batch_cell_pre_act
-        },
-        attrs={
-            'use_peepholes': use_peepholes,
-            'is_reverse': is_reverse,
-            'gate_activation': gate_activation,
-            'cell_activation': cell_activation,
-            'candidate_activation': candidate_activation
-        })
-    return hidden, cell
+        helper.append_op(
+            type='lstm',
+            inputs=inputs,
+            outputs={
+                'Hidden': hidden,
+                'Cell': cell,
+                'BatchGate': batch_gate,
+                'BatchCellPreAct': batch_cell_pre_act
+            },
+            attrs={
+                'use_peepholes': use_peepholes,
+                'is_reverse': is_reverse,
+                'gate_activation': gate_activation,
+                'cell_activation': cell_activation,
+                'candidate_activation': candidate_activation
+            })
+        return hidden, cell
 
 
 def dynamic_lstmp(input,
@@ -923,39 +926,40 @@ def linear_chain_crf(input, label, param_attr=None):
     return log_likelihood
 
 
-@templatedoc()
-def crf_decoding(input, param_attr, label=None):
-    """
-    ${comment}
+if os.name != 'nt':
+    @templatedoc()
+    def crf_decoding(input, param_attr, label=None):
+        """
+        ${comment}
 
-    Args:
-        input(${emission_type}): ${emission_comment}
+        Args:
+            input(${emission_type}): ${emission_comment}
 
-        param_attr(ParamAttr): The parameter attribute for training.
+            param_attr(ParamAttr): The parameter attribute for training.
 
-        label(${label_type}): ${label_comment}
+            label(${label_type}): ${label_comment}
 
-    Returns:
-        Variable: ${viterbi_path_comment}
+        Returns:
+            Variable: ${viterbi_path_comment}
 
-    Examples:
-        .. code-block:: python
+        Examples:
+            .. code-block:: python
 
-           crf_decode = layers.crf_decoding(
-                input=hidden, param_attr=ParamAttr(name="crfw"))
-    """
-    helper = LayerHelper('crf_decoding', **locals())
-    transition = helper.get_parameter(param_attr.name)
-    viterbi_path = helper.create_variable_for_type_inference(
-        dtype=helper.input_dtype())
-    helper.append_op(
-        type='crf_decoding',
-        inputs={"Emission": [input],
-                "Transition": transition,
-                "Label": label},
-        outputs={"ViterbiPath": [viterbi_path]})
+               crf_decode = layers.crf_decoding(
+                    input=hidden, param_attr=ParamAttr(name="crfw"))
+        """
+        helper = LayerHelper('crf_decoding', **locals())
+        transition = helper.get_parameter(param_attr.name)
+        viterbi_path = helper.create_variable_for_type_inference(
+            dtype=helper.input_dtype())
+        helper.append_op(
+            type='crf_decoding',
+            inputs={"Emission": [input],
+                    "Transition": transition,
+                    "Label": label},
+            outputs={"ViterbiPath": [viterbi_path]})
 
-    return viterbi_path
+        return viterbi_path
 
 
 @templatedoc()
@@ -5443,42 +5447,43 @@ def label_smooth(label,
     return smooth_label
 
 
-@templatedoc()
-def roi_pool(input, rois, pooled_height=1, pooled_width=1, spatial_scale=1.0):
-    """
-    ${comment}
+if os.name != 'nt':
+    @templatedoc()
+    def roi_pool(input, rois, pooled_height=1, pooled_width=1, spatial_scale=1.0):
+        """
+        ${comment}
 
-    Args:
-        input (Variable): ${x_comment}
-        rois (Variable): ROIs (Regions of Interest) to pool over.
-        pooled_height (integer): ${pooled_height_comment} Default: 1
-        pooled_width (integer): ${pooled_width_comment} Default: 1
-        spatial_scale (float): ${spatial_scale_comment} Default: 1.0
+        Args:
+            input (Variable): ${x_comment}
+            rois (Variable): ROIs (Regions of Interest) to pool over.
+            pooled_height (integer): ${pooled_height_comment} Default: 1
+            pooled_width (integer): ${pooled_width_comment} Default: 1
+            spatial_scale (float): ${spatial_scale_comment} Default: 1.0
 
-    Returns:
-        Variable: ${out_comment}.
+        Returns:
+            Variable: ${out_comment}.
 
-    Examples:
-        .. code-block:: python
+        Examples:
+            .. code-block:: python
 
-            pool_out = fluid.layers.roi_pool(input=x, rois=rois, 7, 7, 1.0)
-    """
-    helper = LayerHelper('roi_pool', **locals())
-    dtype = helper.input_dtype()
-    pool_out = helper.create_variable_for_type_inference(dtype)
-    argmaxes = helper.create_variable_for_type_inference(dtype='int32')
-    helper.append_op(
-        type="roi_pool",
-        inputs={"X": input,
-                "ROIs": rois},
-        outputs={"Out": pool_out,
-                 "Argmax": argmaxes},
-        attrs={
-            "pooled_height": pooled_height,
-            "pooled_width": pooled_width,
-            "spatial_scale": spatial_scale
-        })
-    return pool_out
+                pool_out = fluid.layers.roi_pool(input=x, rois=rois, 7, 7, 1.0)
+        """
+        helper = LayerHelper('roi_pool', **locals())
+        dtype = helper.input_dtype()
+        pool_out = helper.create_variable_for_type_inference(dtype)
+        argmaxes = helper.create_variable_for_type_inference(dtype='int32')
+        helper.append_op(
+            type="roi_pool",
+            inputs={"X": input,
+                    "ROIs": rois},
+            outputs={"Out": pool_out,
+                     "Argmax": argmaxes},
+            attrs={
+                "pooled_height": pooled_height,
+                "pooled_width": pooled_width,
+                "spatial_scale": spatial_scale
+            })
+        return pool_out
 
 
 @templatedoc()
diff --git a/python/paddle/fluid/layers/ops.py b/python/paddle/fluid/layers/ops.py
index 1ff40a26f2f24e2ff06719972489b0c1e5d140c3..df52b7042f411d9bf9e25eee78512225d92e43b2 100644
--- a/python/paddle/fluid/layers/ops.py
+++ b/python/paddle/fluid/layers/ops.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 from __future__ import print_function
+import os
 from .layer_function_generator import generate_layer_fn, generate_layer_fn_noattr
 from .. import core
 from ..framework import convert_np_dtype_to_dtype_
@@ -99,27 +100,28 @@ Examples:
     >>> result = fluid.layers.hard_shrink(x=data, threshold=0.3)
 """
 
-__all__ += ['cumsum']
+if os.name != 'nt':
+    __all__ += ['cumsum']
 
-_cum_sum_ = generate_layer_fn('cumsum')
+    _cum_sum_ = generate_layer_fn('cumsum')
 
 
-def cumsum(x, axis=None, exclusive=None, reverse=None):
-    locals_var = locals().keys()
-    kwargs = dict()
-    for name in locals_var:
-        val = locals()[name]
-        if val is not None:
-            kwargs[name] = val
-    return _cum_sum_(**kwargs)
-
+    def cumsum(x, axis=None, exclusive=None, reverse=None):
+        locals_var = locals().keys()
+        kwargs = dict()
+        for name in locals_var:
+            val = locals()[name]
+            if val is not None:
+                kwargs[name] = val
+        return _cum_sum_(**kwargs)
 
-cumsum.__doc__ = _cum_sum_.__doc__ + """
-Examples:
 
-    >>> data = fluid.layers.data(name="input", shape=[32, 784])
-    >>> result = fluid.layers.cumsum(data, axis=0)
-"""
+    cumsum.__doc__ = _cum_sum_.__doc__ + """
+    Examples:
+    
+        >>> data = fluid.layers.data(name="input", shape=[32, 784])
+        >>> result = fluid.layers.cumsum(data, axis=0)
+    """
 
 __all__ += ['thresholded_relu']
 
diff --git a/python/setup.py.in b/python/setup.py.in
index c442055208b7faf968717ee5bda82ae54a5cc658..ce65d0003fb7132b8b51f3e3eae7d43a51a0bde8 100644
--- a/python/setup.py.in
+++ b/python/setup.py.in
@@ -180,7 +180,8 @@ if '${CMAKE_BUILD_TYPE}' == 'Release':
         package_data['paddle.libs']+=['libmkldnn.so.0']
         shutil.copy('${MKLDNN_SHARED_LIB}', libs_path)
 # remove unused paddle/libs/__init__.py
-os.remove(libs_path+'/__init__.py')
+if os.path.isfile(libs_path+'/__init__.py'):
+    os.remove(libs_path+'/__init__.py')
 package_dir['paddle.libs']=libs_path
 
 # change rpath of core.so, add $ORIGIN/../libs/ to it.