Merge branch 'windows/build' into windows/online

b6d89204 · peizhilin · 8dd0f885 · 2ceb4ae9 · b6d89204 · b6d89204
10 changed file
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -130,6 +130,11 @@ if (APPLE OR WIN32)
        "Disable MKL for building on mac and windows" FORCE)
 endif()
+if (WIN32)
+    set(WITH_AVX OFF CACHE STRING
+            "Disable AVX when compiling for Windows" FORCE)
+endif()
 set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING
  "A path setting third party libraries download & build directories.")

--- a/cmake/operators.cmake
+++ b/cmake/operators.cmake
@@ -85,9 +85,7 @@ function(op_library TARGET)
    if (WIN32)
    # remove windows unsupported op, because windows has no nccl, no warpctc such ops.
    foreach(windows_unsupport_op "nccl_op" "gen_nccl_id_op" "warpctc_op"
-#     "hierarchical_sigmoid_op" "cumsum_op"
+            "channel_send_op" "channel_create_op" "channel_close_op" "channel_recv_op")
-#     "crf_decoding_op" "select_op" "lstmp_op" "gru_op" "fusion_gru_op" "lstm_op" "fusion_lstm_op"
-      "fusion_seqconv_eltadd_relu_op" "channel_send_op" "channel_create_op" "channel_close_op" "channel_recv_op")
        if ("${TARGET}" STREQUAL "${windows_unsupport_op}")
          return()
        endif()

--- a/cmake/simd.cmake
+++ b/cmake/simd.cmake
@@ -70,17 +70,20 @@ int main()
    return 0;
 }" AVX_FOUND)
-# Check AVX 2
+# disable AVX2 by default on windows
-set(CMAKE_REQUIRED_FLAGS ${AVX2_FLAG})
+if(NOT WIN32)
-set(AVX2_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE)
+    # Check AVX 2
-CHECK_CXX_SOURCE_RUNS("
+    set(CMAKE_REQUIRED_FLAGS ${AVX2_FLAG})
-#include <immintrin.h>
+    set(AVX2_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE)
-int main()
+    CHECK_CXX_SOURCE_RUNS("
-{
+    #include <immintrin.h>
-    __m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4);
+    int main()
-    __m256i result = _mm256_abs_epi32 (a);
+    {
-    return 0;
+        __m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4);
-}" AVX2_FOUND)
+        __m256i result = _mm256_abs_epi32 (a);
+        return 0;
+    }" AVX2_FOUND)
+endif(NOT WIN32)
 # Check AVX512F
 set(CMAKE_REQUIRED_FLAGS ${AVX512F_FLAG})

--- a/paddle/fluid/operators/CMakeLists.txt
+++ b/paddle/fluid/operators/CMakeLists.txt
@@ -48,9 +48,9 @@ endif()
 set(COMMON_OP_DEPS "")
 set(COMMON_OP_DEPS ${COMMON_OP_DEPS} xxhash selected_rows_functor selected_rows lod_tensor maxouting unpooling pooling lod_rank_table context_project sequence_pooling executor sequence_padding sequence_scale cos_sim_functor memory concat_and_split cross_entropy softmax vol2col im2col sampler)
-set(COMMON_OP_DEPS ${COMMON_OP_DEPS} lstm_compute matrix_bit_code gru_compute activation_functions jit_kernel)
+set(COMMON_OP_DEPS ${COMMON_OP_DEPS} lstm_compute matrix_bit_code sequence2batch gru_compute activation_functions jit_kernel)
 if (NOT WIN32)
-  set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence2batch dynload_warpctc)
+  set(COMMON_OP_DEPS ${COMMON_OP_DEPS} dynload_warpctc)
 endif()
 if (WITH_GPU)
  set(COMMON_OP_DEPS ${COMMON_OP_DEPS} depthwise_conv cub)

--- a/paddle/fluid/operators/hierarchical_sigmoid_op.h
+++ b/paddle/fluid/operators/hierarchical_sigmoid_op.h
@@ -111,7 +111,7 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel<T> {
    auto pre_out_mat = EigenMatrix<T>::From(*pre_out);
    auto pre_out_grad_mat = EigenMatrix<T>::From(pre_out_grad);
    auto out_grad_mat = EigenMatrix<T>::From(*out_grad);
-    Eigen::array<int, 2> bcast({{1, static_cast<int>(pre_out_grad.dims()[1])}});
+    Eigen::array<int, 2> bcast{1, static_cast<int>(pre_out_grad.dims()[1])};
    // softrelu derivative
    pre_out_grad_mat.device(place) =

--- a/paddle/fluid/operators/math/CMakeLists.txt
+++ b/paddle/fluid/operators/math/CMakeLists.txt
@@ -81,4 +81,3 @@ if(WITH_XBYAK)
 endif()
 cc_library(jit_kernel SRCS ${JIT_KERNEL_SRCS} DEPS ${JIT_KERNEL_DEPS})
 cc_test(jit_kernel_test SRCS jit_kernel_test.cc DEPS jit_kernel)
--- a/paddle/fluid/operators/math/matrix_bit_code.h
+++ b/paddle/fluid/operators/math/matrix_bit_code.h
@@ -67,7 +67,7 @@ inline constexpr size_t FindLastSet(size_t x) {
             : (std::is_same<size_t, unsigned long>::value  // NOLINT
                    ? (x ? 8 * sizeof(x) - __builtin_clzl(x) : 0)
                    : (x ? 8 * sizeof(x) - __builtin_clzll(x) : 0));
+}
 #else
 // windows don't have built-in clz, ctz function
 template <typename T>
@@ -92,7 +92,6 @@ inline int clz(const T& value) {
 inline size_t FindLastSet(size_t x) { return sizeof(size_t) * 8 - clz(x); }
 #endif  // !_WIN32
-}
 struct SimpleCode {
  SimpleCode(size_t code, size_t num_classes) : c_(code + num_classes) {}

--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -170,12 +170,6 @@ __all__ = [
    'bilinear_tensor_product',
 ]
-# To avoid the api checker complains
-if os.name == 'nt':
-    __all__.remove('dynamic_lstm')
-    __all__.remove('crf_decoding')
-    __all__.remove('roi_pool')
 def fc(input,
       size,
@@ -349,128 +343,126 @@ def embedding(input,
    return tmp
-if os.name != 'nt':
+@templatedoc(op_type="lstm")
+def dynamic_lstm(input,
+                 size,
+                 h_0=None,
+                 c_0=None,
+                 param_attr=None,
+                 bias_attr=None,
+                 use_peepholes=True,
+                 is_reverse=False,
+                 gate_activation='sigmoid',
+                 cell_activation='tanh',
+                 candidate_activation='tanh',
+                 dtype='float32',
+                 name=None):
+    """
+    ${comment}
-    @templatedoc(op_type="lstm")
+    Args:
-    def dynamic_lstm(input,
+        input (Variable): ${input_comment}
-                     size,
+        size (int): 4 * hidden size.
-                     h_0=None,
+        h_0(Variable): The initial hidden state is an optional input, default is zero.
-                     c_0=None,
+                       This is a tensor with shape (N x D), where N is the
-                     param_attr=None,
+                       batch size and D is the hidden size.
-                     bias_attr=None,
+        c_0(Variable): The initial cell state is an optional input, default is zero.
-                     use_peepholes=True,
+                       This is a tensor with shape (N x D), where N is the
-                     is_reverse=False,
+                       batch size. `h_0` and `c_0` can be NULL but only at the same time.
-                     gate_activation='sigmoid',
+        param_attr(ParamAttr|None): The parameter attribute for the learnable
-                     cell_activation='tanh',
+                               hidden-hidden weights.
-                     candidate_activation='tanh',
-                     dtype='float32',
-                     name=None):
-        """
-        ${comment}
-        Args:
-            input (Variable): ${input_comment}
-            size (int): 4 * hidden size.
-            h_0(Variable): The initial hidden state is an optional input, default is zero.
-                           This is a tensor with shape (N x D), where N is the
-                           batch size and D is the hidden size.
-            c_0(Variable): The initial cell state is an optional input, default is zero.
-                           This is a tensor with shape (N x D), where N is the
-                           batch size. `h_0` and `c_0` can be NULL but only at the same time.
-            param_attr(ParamAttr|None): The parameter attribute for the learnable
-                                   hidden-hidden weights.
-                                   - Weights = {:math:`W_{ch}, W_{ih}, \
-                                                    W_{fh}, W_{oh}`}
-                                   - The shape is (D x 4D), where D is the hidden
-                                     size.
-                                   If it is set to None or one attribute of ParamAttr,
-                                   dynamic_lstm will create ParamAttr as param_attr.
-                                   If the Initializer of the param_attr is not set, the
-                                   parameter is initialized with Xavier. Default: None.
-            bias_attr (ParamAttr|None): The bias attribute for the learnable bias
-                                  weights, which contains two parts, input-hidden
-                                  bias weights and peephole connections weights if
-                                  setting `use_peepholes` to `True`.
-                                  1. `use_peepholes = False`
-                                     - Biases = {:math:`b_c, b_i, b_f, b_o`}.
-                                     - The shape is (1 x 4D).
-                                  2. `use_peepholes = True`
-                                     - Biases = { :math:`b_c, b_i, b_f, b_o, W_{ic}, \
-                                                     W_{fc}, W_{oc}`}.
-                                     - The shape is (1 x 7D).
-                                  If it is set to None or one attribute of ParamAttr,
-                                  dynamic_lstm will create ParamAttr as bias_attr.
-                                  If the Initializer of the bias_attr is not set,
-                                  the bias is initialized zero. Default: None.
-            use_peepholes (bool): ${use_peepholes_comment}
-            is_reverse (bool): ${is_reverse_comment}
-            gate_activation (str): ${gate_activation_comment}
-            cell_activation (str): ${cell_activation_comment}
-            candidate_activation (str): ${candidate_activation_comment}
-            dtype (str): Data type. Choices = ["float32", "float64"], default "float32".
-            name (str|None): A name for this layer(optional). If set None, the layer
-                             will be named automatically.
-        Returns:
-            tuple: The hidden state, and cell state of LSTM. The shape of both \
-            is (T x D), and lod is the same with the `input`.
-        Examples:
-            .. code-block:: python
-                hidden_dim = 512
-                forward_proj = fluid.layers.fc(input=input_seq, size=hidden_dim * 4,
-                                               bias_attr=False)
-                forward, _ = fluid.layers.dynamic_lstm(
-                    input=forward_proj, size=hidden_dim * 4, use_peepholes=False)
-        """
-        assert bias_attr is not False, "bias_attr should not be False in dynamic_lstmp."
-        helper = LayerHelper('lstm', **locals())
-        size = size // 4
-        weight = helper.create_parameter(
-            attr=helper.param_attr, shape=[size, 4 * size], dtype=dtype)
-        bias_size = [1, 7 * size]
-        if not use_peepholes:
-            bias_size[1] = 4 * size
-        bias = helper.create_parameter(
-            attr=helper.bias_attr, shape=bias_size, dtype=dtype, is_bias=True)
-        hidden = helper.create_variable_for_type_inference(dtype)
+                               - Weights = {:math:`W_{ch}, W_{ih}, \
-        cell = helper.create_variable_for_type_inference(dtype)
+                                                W_{fh}, W_{oh}`}
-        batch_gate = helper.create_variable_for_type_inference(dtype)
+                               - The shape is (D x 4D), where D is the hidden
-        batch_cell_pre_act = helper.create_variable_for_type_inference(dtype)
+                                 size.
-        inputs = {'Input': input, 'Weight': weight, 'Bias': bias}
-        batch_size = input.shape[0]
-        if h_0:
-            assert h_0.shape == (batch_size, size), \
-                'The shape of h0 should be (batch_size, %d)' % size
-            inputs['H0'] = h_0
-        if c_0:
-            assert c_0.shape == (batch_size, size), \
-                'The shape of c0 should be (batch_size, %d)' % size
-            inputs['C0'] = c_0
-        helper.append_op(
+                               If it is set to None or one attribute of ParamAttr,
-            type='lstm',
+                               dynamic_lstm will create ParamAttr as param_attr.
-            inputs=inputs,
+                               If the Initializer of the param_attr is not set, the
-            outputs={
+                               parameter is initialized with Xavier. Default: None.
-                'Hidden': hidden,
+        bias_attr (ParamAttr|None): The bias attribute for the learnable bias
-                'Cell': cell,
+                              weights, which contains two parts, input-hidden
-                'BatchGate': batch_gate,
+                              bias weights and peephole connections weights if
-                'BatchCellPreAct': batch_cell_pre_act
+                              setting `use_peepholes` to `True`.
-            },
-            attrs={
+                              1. `use_peepholes = False`
-                'use_peepholes': use_peepholes,
+                                 - Biases = {:math:`b_c, b_i, b_f, b_o`}.
-                'is_reverse': is_reverse,
+                                 - The shape is (1 x 4D).
-                'gate_activation': gate_activation,
+                              2. `use_peepholes = True`
-                'cell_activation': cell_activation,
+                                 - Biases = { :math:`b_c, b_i, b_f, b_o, W_{ic}, \
-                'candidate_activation': candidate_activation
+                                                 W_{fc}, W_{oc}`}.
-            })
+                                 - The shape is (1 x 7D).
-        return hidden, cell
+                              If it is set to None or one attribute of ParamAttr,
+                              dynamic_lstm will create ParamAttr as bias_attr.
+                              If the Initializer of the bias_attr is not set,
+                              the bias is initialized zero. Default: None.
+        use_peepholes (bool): ${use_peepholes_comment}
+        is_reverse (bool): ${is_reverse_comment}
+        gate_activation (str): ${gate_activation_comment}
+        cell_activation (str): ${cell_activation_comment}
+        candidate_activation (str): ${candidate_activation_comment}
+        dtype (str): Data type. Choices = ["float32", "float64"], default "float32".
+        name (str|None): A name for this layer(optional). If set None, the layer
+                         will be named automatically.
+    Returns:
+        tuple: The hidden state, and cell state of LSTM. The shape of both \
+        is (T x D), and lod is the same with the `input`.
+    Examples:
+        .. code-block:: python
+            hidden_dim = 512
+            forward_proj = fluid.layers.fc(input=input_seq, size=hidden_dim * 4,
+                                           bias_attr=False)
+            forward, _ = fluid.layers.dynamic_lstm(
+                input=forward_proj, size=hidden_dim * 4, use_peepholes=False)
+    """
+    assert bias_attr is not False, "bias_attr should not be False in dynamic_lstmp."
+    helper = LayerHelper('lstm', **locals())
+    size = size // 4
+    weight = helper.create_parameter(
+        attr=helper.param_attr, shape=[size, 4 * size], dtype=dtype)
+    bias_size = [1, 7 * size]
+    if not use_peepholes:
+        bias_size[1] = 4 * size
+    bias = helper.create_parameter(
+        attr=helper.bias_attr, shape=bias_size, dtype=dtype, is_bias=True)
+    hidden = helper.create_variable_for_type_inference(dtype)
+    cell = helper.create_variable_for_type_inference(dtype)
+    batch_gate = helper.create_variable_for_type_inference(dtype)
+    batch_cell_pre_act = helper.create_variable_for_type_inference(dtype)
+    inputs = {'Input': input, 'Weight': weight, 'Bias': bias}
+    batch_size = input.shape[0]
+    if h_0:
+        assert h_0.shape == (batch_size, size), \
+            'The shape of h0 should be (batch_size, %d)' % size
+        inputs['H0'] = h_0
+    if c_0:
+        assert c_0.shape == (batch_size, size), \
+            'The shape of c0 should be (batch_size, %d)' % size
+        inputs['C0'] = c_0
+    helper.append_op(
+        type='lstm',
+        inputs=inputs,
+        outputs={
+            'Hidden': hidden,
+            'Cell': cell,
+            'BatchGate': batch_gate,
+            'BatchCellPreAct': batch_cell_pre_act
+        },
+        attrs={
+            'use_peepholes': use_peepholes,
+            'is_reverse': is_reverse,
+            'gate_activation': gate_activation,
+            'cell_activation': cell_activation,
+            'candidate_activation': candidate_activation
+        })
+    return hidden, cell
 def dynamic_lstmp(input,
@@ -969,43 +961,39 @@ def linear_chain_crf(input, label, param_attr=None):
    return log_likelihood
-if os.name != 'nt':
+@templatedoc()
+def crf_decoding(input, param_attr, label=None):
-    @templatedoc()
+    """
-    def crf_decoding(input, param_attr, label=None):
+    ${comment}
-        """
-        ${comment}
-        Args:
+    Args:
-            input(${emission_type}): ${emission_comment}
+        input(${emission_type}): ${emission_comment}
-            param_attr(ParamAttr): The parameter attribute for training.
+        param_attr(ParamAttr): The parameter attribute for training.
-            label(${label_type}): ${label_comment}
+        label(${label_type}): ${label_comment}
-        Returns:
+    Returns:
-            Variable: ${viterbi_path_comment}
+        Variable: ${viterbi_path_comment}
-        Examples:
+    Examples:
-            .. code-block:: python
+        .. code-block:: python
-               crf_decode = layers.crf_decoding(
+           crf_decode = layers.crf_decoding(
-                    input=hidden, param_attr=ParamAttr(name="crfw"))
+                input=hidden, param_attr=ParamAttr(name="crfw"))
-        """
+    """
-        helper = LayerHelper('crf_decoding', **locals())
+    helper = LayerHelper('crf_decoding', **locals())
-        transition = helper.get_parameter(param_attr.name)
+    transition = helper.get_parameter(param_attr.name)
-        viterbi_path = helper.create_variable_for_type_inference(
+    viterbi_path = helper.create_variable_for_type_inference(
-            dtype=helper.input_dtype())
+        dtype=helper.input_dtype())
-        helper.append_op(
+    helper.append_op(
-            type='crf_decoding',
+        type='crf_decoding',
-            inputs={
+        inputs={"Emission": [input],
-                "Emission": [input],
                "Transition": transition,
-                "Label": label
+                "Label": label},
-            },
+        outputs={"ViterbiPath": [viterbi_path]})
-            outputs={"ViterbiPath": [viterbi_path]})
-        return viterbi_path
+    return viterbi_path
 @templatedoc()
@@ -5599,48 +5587,42 @@ def label_smooth(label,
    return smooth_label
-if os.name != 'nt':
+@templatedoc()
+def roi_pool(input, rois, pooled_height=1, pooled_width=1, spatial_scale=1.0):
-    @templatedoc()
+    """
-    def roi_pool(input,
+    ${comment}
-                 rois,
-                 pooled_height=1,
+    Args:
-                 pooled_width=1,
+        input (Variable): ${x_comment}
-                 spatial_scale=1.0):
+        rois (Variable): ROIs (Regions of Interest) to pool over.
-        """
+        pooled_height (integer): ${pooled_height_comment} Default: 1
-        ${comment}
+        pooled_width (integer): ${pooled_width_comment} Default: 1
+        spatial_scale (float): ${spatial_scale_comment} Default: 1.0
-        Args:
-            input (Variable): ${x_comment}
+    Returns:
-            rois (Variable): ROIs (Regions of Interest) to pool over.
+        Variable: ${out_comment}.
-            pooled_height (integer): ${pooled_height_comment} Default: 1
-            pooled_width (integer): ${pooled_width_comment} Default: 1
+    Examples:
-            spatial_scale (float): ${spatial_scale_comment} Default: 1.0
+        .. code-block:: python
-        Returns:
+            pool_out = fluid.layers.roi_pool(input=x, rois=rois, 7, 7, 1.0)
-            Variable: ${out_comment}.
+    """
+    helper = LayerHelper('roi_pool', **locals())
-        Examples:
+    dtype = helper.input_dtype()
-            .. code-block:: python
+    pool_out = helper.create_variable_for_type_inference(dtype)
+    argmaxes = helper.create_variable_for_type_inference(dtype='int32')
-                pool_out = fluid.layers.roi_pool(input=x, rois=rois, 7, 7, 1.0)
+    helper.append_op(
-        """
+        type="roi_pool",
-        helper = LayerHelper('roi_pool', **locals())
+        inputs={"X": input,
-        dtype = helper.input_dtype()
+                "ROIs": rois},
-        pool_out = helper.create_variable_for_type_inference(dtype)
+        outputs={"Out": pool_out,
-        argmaxes = helper.create_variable_for_type_inference(dtype='int32')
+                 "Argmax": argmaxes},
-        helper.append_op(
+        attrs={
-            type="roi_pool",
+            "pooled_height": pooled_height,
-            inputs={"X": input,
+            "pooled_width": pooled_width,
-                    "ROIs": rois},
+            "spatial_scale": spatial_scale
-            outputs={"Out": pool_out,
+        })
-                     "Argmax": argmaxes},
+    return pool_out
-            attrs={
-                "pooled_height": pooled_height,
-                "pooled_width": pooled_width,
-                "spatial_scale": spatial_scale
-            })
-        return pool_out
 @templatedoc()

--- a/python/paddle/fluid/layers/ops.py
+++ b/python/paddle/fluid/layers/ops.py
@@ -100,26 +100,27 @@ Examples:
    >>> result = fluid.layers.hard_shrink(x=data, threshold=0.3)
 """
-if os.name != 'nt':
+__all__ += ['cumsum']
-    __all__ += ['cumsum']
+_cum_sum_ = generate_layer_fn('cumsum')
-    _cum_sum_ = generate_layer_fn('cumsum')
-    def cumsum(x, axis=None, exclusive=None, reverse=None):
+def cumsum(x, axis=None, exclusive=None, reverse=None):
-        locals_var = locals().keys()
+    locals_var = locals().keys()
-        kwargs = dict()
+    kwargs = dict()
-        for name in locals_var:
+    for name in locals_var:
-            val = locals()[name]
+        val = locals()[name]
-            if val is not None:
+        if val is not None:
-                kwargs[name] = val
+            kwargs[name] = val
-        return _cum_sum_(**kwargs)
+    return _cum_sum_(**kwargs)
-    cumsum.__doc__ = _cum_sum_.__doc__ + """
-    Examples:
+cumsum.__doc__ = _cum_sum_.__doc__ + """
+Examples:
-        >>> data = fluid.layers.data(name="input", shape=[32, 784])
-        >>> result = fluid.layers.cumsum(data, axis=0)
+    >>> data = fluid.layers.data(name="input", shape=[32, 784])
-    """
+    >>> result = fluid.layers.cumsum(data, axis=0)
+"""
 __all__ += ['thresholded_relu']

--- a/python/paddle/fluid/transpiler/details/checkport.py
+++ b/python/paddle/fluid/transpiler/details/checkport.py
@@ -34,6 +34,7 @@ def wait_server_ready(endpoints):
    """
    while True:
        all_ok = True
+        not_ready_endpoints = []
        for ep in endpoints:
            ip_port = ep.split(":")
            with closing(socket.socket(socket.AF_INET,
@@ -42,8 +43,11 @@ def wait_server_ready(endpoints):
                result = sock.connect_ex((ip_port[0], int(ip_port[1])))
                if result != 0:
                    all_ok = False
+                    not_ready_endpoints.append(ep)
        if not all_ok:
            sys.stderr.write("pserver not ready, wait 3 sec to retry...\n")
+            sys.stderr.write("not ready endpoints:" + str(not_ready_endpoints) +
+                             "\n")
            sys.stderr.flush()
            time.sleep(3)
        else: