Merge branch 'develop' into resnet50_ut

83ca657f · Tao Luo · 21ee3059 · 35b713c3 · 83ca657f · 83ca657f
12 changed file
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
@@ -145,14 +145,14 @@ paddle.fluid.layers.unstack ArgSpec(args=['x', 'axis', 'num'], varargs=None, key
 paddle.fluid.layers.sequence_enumerate ArgSpec(args=['input', 'win_size', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0, None))
 paddle.fluid.layers.expand ArgSpec(args=['x', 'expand_times', 'name'], varargs=None, keywords=None, defaults=(None,))
 paddle.fluid.layers.sequence_concat ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.scale ArgSpec(args=['x', 'scale', 'bias', 'bias_after_scale', 'out', 'act', 'name'], varargs=None, keywords=None, defaults=(1.0, 0.0, True, None, None, None))
-paddle.fluid.layers.elementwise_add ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None))
-paddle.fluid.layers.elementwise_div ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None))
-paddle.fluid.layers.elementwise_sub ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None))
-paddle.fluid.layers.elementwise_mul ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None))
-paddle.fluid.layers.elementwise_max ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None))
-paddle.fluid.layers.elementwise_min ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None))
-paddle.fluid.layers.elementwise_pow ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None))
+paddle.fluid.layers.scale ArgSpec(args=['x', 'scale', 'bias', 'bias_after_scale', 'act', 'name'], varargs=None, keywords=None, defaults=(1.0, 0.0, True, None, None))
+paddle.fluid.layers.elementwise_add ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None))
+paddle.fluid.layers.elementwise_div ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None))
+paddle.fluid.layers.elementwise_sub ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None))
+paddle.fluid.layers.elementwise_mul ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None))
+paddle.fluid.layers.elementwise_max ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None))
+paddle.fluid.layers.elementwise_min ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None))
+paddle.fluid.layers.elementwise_pow ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None))
 paddle.fluid.layers.uniform_random_batch_size_like ArgSpec(args=['input', 'shape', 'dtype', 'input_dim_idx', 'output_dim_idx', 'min', 'max', 'seed'], varargs=None, keywords=None, defaults=('float32', 0, 0, -1.0, 1.0, 0))
 paddle.fluid.layers.gaussian_random ArgSpec(args=['shape', 'mean', 'std', 'seed', 'dtype', 'use_mkldnn'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0, 'float32', False))
 paddle.fluid.layers.sampling_id ArgSpec(args=['x', 'min', 'max', 'seed', 'dtype'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0, 'float32'))
@@ -160,6 +160,12 @@ paddle.fluid.layers.gaussian_random_batch_size_like ArgSpec(args=['input', 'shap
 paddle.fluid.layers.sum ArgSpec(args=['x', 'use_mkldnn'], varargs=None, keywords=None, defaults=(False,))
 paddle.fluid.layers.slice ArgSpec(args=['input', 'axes', 'starts', 'ends'], varargs=None, keywords=None, defaults=None)
 paddle.fluid.layers.shape ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None)
+paddle.fluid.layers.logical_and ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None))
+paddle.fluid.layers.logical_or ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None))
+paddle.fluid.layers.logical_xor ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None))
+paddle.fluid.layers.logical_not ArgSpec(args=['x', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None))
+paddle.fluid.layers.clip ArgSpec(args=['x', 'min', 'max', 'name'], varargs=None, keywords=None, defaults=(None,))
+paddle.fluid.layers.clip_by_norm ArgSpec(args=['x', 'max_norm', 'name'], varargs=None, keywords=None, defaults=(None,))
 paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True))
 paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None))
 paddle.fluid.layers.read_file ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None)
@@ -225,12 +231,6 @@ paddle.fluid.layers.is_empty ArgSpec(args=['x', 'cond'], varargs=None, keywords=
 paddle.fluid.layers.mean ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
 paddle.fluid.layers.mul ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
 paddle.fluid.layers.sigmoid_cross_entropy_with_logits ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
-paddle.fluid.layers.clip ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
-paddle.fluid.layers.clip_by_norm ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
-paddle.fluid.layers.logical_and ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
-paddle.fluid.layers.logical_or ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
-paddle.fluid.layers.logical_xor ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
-paddle.fluid.layers.logical_not ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
 paddle.fluid.layers.maxout ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
 paddle.fluid.layers.sigmoid ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
 paddle.fluid.layers.logsigmoid ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))

--- a/paddle/fluid/framework/scope.cc
+++ b/paddle/fluid/framework/scope.cc
@@ -20,13 +20,6 @@ limitations under the License. */
 #include "paddle/fluid/framework/threadpool.h"
 #include "paddle/fluid/string/printf.h"

-// The mutex is not needed by training and inference, only for distribution.
-#if PADDLE_WITH_DISTRIBUTE
-#define WITH_LOCK 1
-#else
-#define WITH_LOCK 0
-#endif
-
 DEFINE_bool(benchmark, false,
            "Doing memory benchmark. It will make deleting scope synchronized, "
            "and add some memory usage logs."
@@ -56,24 +49,18 @@ int64_t GetEagerDeletionThreshold() {
 Scope::~Scope() { DropKids(); }

 Scope& Scope::NewScope() const {
-#if WITH_LOCK
  std::unique_lock<std::mutex> lock(mutex_);
-#endif
  kids_.push_back(new Scope(this));
  return *kids_.back();
 }

 Variable* Scope::Var(const std::string& name) {
-#if WITH_LOCK
  std::unique_lock<std::mutex> lock(mutex_);
-#endif
  return VarInternal(name);
 }

 Variable* Scope::Var(std::string* name) {
-#if WITH_LOCK
  std::unique_lock<std::mutex> lock(mutex_);
-#endif
  auto new_name = string::Sprintf("%p.%d", this, vars_.size());
  if (name != nullptr) {
    *name = new_name;
@@ -82,39 +69,29 @@ Variable* Scope::Var(std::string* name) {
 }

 Variable* Scope::FindVar(const std::string& name) const {
-#if WITH_LOCK
  std::unique_lock<std::mutex> lock(mutex_);
-#endif
  return FindVarInternal(name);
 }

 const Scope* Scope::FindScope(const Variable* var) const {
-#if WITH_LOCK
  std::unique_lock<std::mutex> lock(mutex_);
-#endif
  return FindScopeInternal(var);
 }

 void Scope::DropKids() {
-#if WITH_LOCK
  std::unique_lock<std::mutex> lock(mutex_);
-#endif
  for (Scope* s : kids_) delete s;
  kids_.clear();
 }

 bool Scope::HasKid(const Scope* scope) const {
-#if WITH_LOCK
  std::unique_lock<std::mutex> lock(mutex_);
-#endif
  auto it = std::find(this->kids_.begin(), this->kids_.end(), scope);
  return it != this->kids_.end();
 }

 std::vector<std::string> Scope::LocalVarNames() const {
-#if WITH_LOCK
  std::unique_lock<std::mutex> lock(mutex_);
-#endif
  std::vector<std::string> known_vars;
  known_vars.reserve(this->vars_.size());
  for (auto& p : vars_) {
@@ -124,9 +101,7 @@ std::vector<std::string> Scope::LocalVarNames() const {
 }

 void Scope::DeleteScope(Scope* scope) const {
-#if WITH_LOCK
  std::unique_lock<std::mutex> lock(mutex_);
-#endif
  auto it = std::find(this->kids_.begin(), this->kids_.end(), scope);
  PADDLE_ENFORCE(it != this->kids_.end(), "Cannot find %p as kid scope", scope);
  this->kids_.erase(it);
@@ -139,9 +114,7 @@ void Scope::DeleteScope(Scope* scope) const {
 }

 void Scope::EraseVars(const std::vector<std::string>& var_names) {
-#if WITH_LOCK
  std::unique_lock<std::mutex> lock(mutex_);
-#endif
  std::set<std::string> var_set(var_names.begin(), var_names.end());
  for (auto it = vars_.begin(); it != vars_.end();) {
    if (var_set.find(it->first) != var_set.end()) {
@@ -154,16 +127,12 @@ void Scope::EraseVars(const std::vector<std::string>& var_names) {

 void Scope::Rename(const std::string& origin_name,
                   const std::string& new_name) const {
-#if WITH_LOCK
  std::unique_lock<std::mutex> lock(mutex_);
-#endif
  RenameInternal(origin_name, new_name);
 }

 std::string Scope::Rename(const std::string& origin_name) const {
-#if WITH_LOCK
  std::unique_lock<std::mutex> lock(mutex_);
-#endif
  auto new_name = string::Sprintf("%p.%d", this, vars_.size());
  RenameInternal(origin_name, new_name);
  return new_name;

--- a/paddle/fluid/platform/dynload/cublas.h
+++ b/paddle/fluid/platform/dynload/cublas.h
@@ -55,7 +55,7 @@ extern void *cublas_dso_handle;
  struct DynLoad__##__name {                         \
    template <typename... Args>                      \
    inline cublasStatus_t operator()(Args... args) { \
-      return __name(args...);                        \
+      return ::__name(args...);                      \
    }                                                \
  };                                                 \
  extern DynLoad__##__name __name

--- a/paddle/fluid/platform/dynload/cudnn.h
+++ b/paddle/fluid/platform/dynload/cudnn.h
@@ -13,6 +13,9 @@ See the License for the specific language governing permissions and
 limitations under the License. */

 #pragma once
+#define GLOG_NO_ABBREVIATED_SEVERITIES
+#define GOOGLE_GLOG_DLL_DECL
+#include <glog/logging.h>

 #include <cudnn.h>
 #include <mutex>  // NOLINT
@@ -47,13 +50,13 @@ extern void EnforceCUDNNLoaded(const char* fn_name);

 #else

-#define DECLARE_DYNAMIC_LOAD_CUDNN_WRAP(__name)                  \
-  struct DynLoad__##__name {                                     \
-    template <typename... Args>                                  \
-    auto operator()(Args... args) -> decltype(__name(args...)) { \
-      return __name(args...);                                    \
-    }                                                            \
-  };                                                             \
+#define DECLARE_DYNAMIC_LOAD_CUDNN_WRAP(__name)     \
+  struct DynLoad__##__name {                        \
+    template <typename... Args>                     \
+    inline cudnnStatus_t operator()(Args... args) { \
+      return ::__name(args...);                     \
+    }                                               \
+  };                                                \
  extern DynLoad__##__name __name

 #endif

--- a/paddle/fluid/platform/dynload/curand.h
+++ b/paddle/fluid/platform/dynload/curand.h
@@ -44,7 +44,7 @@ extern void *curand_dso_handle;
  struct DynLoad__##__name {                     \
    template <typename... Args>                  \
    curandStatus_t operator()(Args... args) {    \
-      return __name(args...);                    \
+      return ::__name(args...);                  \
    }                                            \
  };                                             \
  extern DynLoad__##__name __name

--- a/paddle/fluid/platform/dynload/dynamic_loader.cc
+++ b/paddle/fluid/platform/dynload/dynamic_loader.cc
@@ -107,7 +107,11 @@ static inline void* GetDsoHandleFromDefaultPath(const std::string& dso_path,
 static inline void* GetDsoHandleFromSearchPath(const std::string& search_root,
                                               const std::string& dso_name,
                                               bool throw_on_error = true) {
+#if !defined(_WIN32)
  int dynload_flags = RTLD_LAZY | RTLD_LOCAL;
+#else
+  int dynload_flags = 0;
+#endif  // !_WIN32
  void* dso_handle = nullptr;

  std::string dlPath = dso_name;
@@ -117,10 +121,15 @@ static inline void* GetDsoHandleFromSearchPath(const std::string& search_root,
    // search xxx.so from custom path
    dlPath = join(search_root, dso_name);
    dso_handle = dlopen(dlPath.c_str(), dynload_flags);
+#if !defined(_WIN32)
+    auto errorno = dlerror();
+#else
+    auto errorno = GetLastError();
+#endif  // !_WIN32
    // if not found, search from default path
    if (nullptr == dso_handle) {
      LOG(WARNING) << "Failed to find dynamic library: " << dlPath << " ("
-                   << dlerror() << ")";
+                   << errorno << ")";
      if (dlPath.find("nccl") != std::string::npos) {
        std::cout
            << "You may need to install 'nccl2' from NVIDIA official website: "
@@ -139,10 +148,15 @@ static inline void* GetDsoHandleFromSearchPath(const std::string& search_root,
      "export LD_LIBRARY_PATH=... \n Note: After Mac OS 10.11, "
      "using the DYLD_LIBRARY_PATH is impossible unless System "
      "Integrity Protection (SIP) is disabled.";
+#if !defined(_WIN32)
+  auto errorno = dlerror();
+#else
+  auto errorno = GetLastError();
+#endif  // !_WIN32
  if (throw_on_error) {
-    PADDLE_ENFORCE(nullptr != dso_handle, error_msg, dlPath, dlerror());
+    PADDLE_ENFORCE(nullptr != dso_handle, error_msg, dlPath, errorno);
  } else if (nullptr == dso_handle) {
-    LOG(WARNING) << string::Sprintf(error_msg, dlPath, dlerror());
+    LOG(WARNING) << string::Sprintf(error_msg, dlPath, errorno);
  }

  return dso_handle;

--- a/paddle/scripts/paddle_build.sh
+++ b/paddle/scripts/paddle_build.sh
@@ -395,7 +395,7 @@ EOF
        ctest --output-on-failure -j $1     
        # make install should also be test when unittest 
        make install -j 8
-        pip install /usr/local/opt/paddle/share/wheels/*.whl
+        pip install ${INSTALL_PREFIX:-/paddle/build}/opt/paddle/share/wheels/*.whl
        if [[ ${WITH_FLUID_ONLY:-OFF} == "OFF" ]] ; then
            paddle version
        fi
@@ -750,7 +750,7 @@ function main() {
        cmake_gen ${PYTHON_ABI:-""}
        build
        run_test
-        assert_api_not_changed
+        assert_api_not_changed ${PYTHON_ABI:-""}
        ;;
      *)
        print_usage

--- a/python/paddle/fluid/clip.py
+++ b/python/paddle/fluid/clip.py
@@ -271,7 +271,8 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr):
                    "All parameters' 'clip_norm' of a same group should be the same"
                )

-        local_norm_var = layers.reduce_sum(input=layers.pow(x=grad, factor=2.0))
+        square = grad * grad
+        local_norm_var = layers.cast(layers.reduce_sum(input=square), 'float64')
        context[self.group_name].append(local_norm_var)

        self.context = context
@@ -281,6 +282,7 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr):
        if group_scale_name not in self.context:
            group_norm_var = layers.sums(input=self.context[self.group_name])
            group_norm_var = layers.sqrt(x=group_norm_var)
+            group_norm_var = layers.cast(group_norm_var, 'float32')
            clip_var = self.context[self.group_name + "_clip"]
            group_scale_var = layers.elementwise_div(
                x=clip_var,

--- a/python/paddle/fluid/layers/control_flow.py
+++ b/python/paddle/fluid/layers/control_flow.py
@@ -21,7 +21,7 @@ from .. import core
 from ..framework import Program, Variable, Operator
 from ..layer_helper import LayerHelper, unique_name
 from ..initializer import force_init_on_cpu
-from .ops import logical_and, logical_not, logical_or
+from .nn import logical_and, logical_not, logical_or
 import numpy
 import warnings
 import six

--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -51,7 +51,9 @@ __all__ = [
    'expand', 'sequence_concat', 'scale', 'elementwise_add', 'elementwise_div',
    'elementwise_sub', 'elementwise_mul', 'elementwise_max', 'elementwise_min',
    'elementwise_pow', 'uniform_random_batch_size_like', 'gaussian_random',
-    'sampling_id', 'gaussian_random_batch_size_like', 'sum', 'slice', 'shape'
+    'sampling_id', 'gaussian_random_batch_size_like', 'sum', 'slice', 'shape',
+    'logical_and', 'logical_or', 'logical_xor', 'logical_not', 'clip',
+    'clip_by_norm'
 ]


@@ -953,8 +955,8 @@ def cross_entropy(input, label, soft_label=False, ignore_index=-100):
        soft_label (bool): a flag indicating whether to
                                           interpretate the given labels as soft
                                           labels. Default: `False`.
-        ignore_index (int): Specifies a target value that is ignored and does 
-                            not contribute to the input gradient. Only valid 
+        ignore_index (int): Specifies a target value that is ignored and does
+                            not contribute to the input gradient. Only valid
                            if soft_label is set to False. Default: -100

    Returns:
@@ -2714,20 +2716,20 @@ def sequence_pad(x, pad_value, maxlen=None):

    Args:
        x(Variable): Input variable which should contain lod information.
-        pad_value(Variable): The Variable that holds values that will be fill 
-            into padded steps. It can be a scalar or a tensor whose shape 
-            equals to time steps in sequences. If it's a scalar, it will be 
+        pad_value(Variable): The Variable that holds values that will be fill
+            into padded steps. It can be a scalar or a tensor whose shape
+            equals to time steps in sequences. If it's a scalar, it will be
            automatically broadcasted to the shape of time step.
-        maxlen(int, default None): The length of padded sequences. It can be 
-            None or any positive int. When it is None, all sequences will be 
-            padded up to the length of the longest one among them; when it a 
-            certain positive value, it must be greater than the length of the 
+        maxlen(int, default None): The length of padded sequences. It can be
+            None or any positive int. When it is None, all sequences will be
+            padded up to the length of the longest one among them; when it a
+            certain positive value, it must be greater than the length of the
            longest original sequence."
-    
+
    Returns:
-        Variable: The padded sequence batch and the original lengths before 
+        Variable: The padded sequence batch and the original lengths before
                  padding. All sequences has the same length.
-    
+
    Examples:
        .. code-block:: python

@@ -4343,8 +4345,8 @@ def softmax_with_cross_entropy(logits,
            soft_label is set to true, Label is a Tensor<float/double> with
        soft_label (bool): A flag to indicate whether to interpretate the given
            labels as soft labels. By default, `soft_label` is set to False.
-        ignore_index (int): Specifies a target value that is ignored and does 
-                            not contribute to the input gradient. Only valid 
+        ignore_index (int): Specifies a target value that is ignored and does
+                            not contribute to the input gradient. Only valid
                            if soft_label is set to False. Default: -100

    Returns:
@@ -4601,14 +4603,14 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None):

 def squeeze(input, axes, name=None):
    """
-    Remove single-dimensional entries from the shape of a tensor. Takes a 
-    parameter axes with a list of axes to squeeze. If axes is not provided, all 
-    the single dimensions will be removed from the shape. If an axis is 
+    Remove single-dimensional entries from the shape of a tensor. Takes a
+    parameter axes with a list of axes to squeeze. If axes is not provided, all
+    the single dimensions will be removed from the shape. If an axis is
    selected with shape entry not equal to one, an error is raised.
-        
+
    Examples:
    Case 1:
-      Given 
+      Given
        X.shape = (1, 3, 1, 5)
      and
        axes = [0]
@@ -4617,11 +4619,11 @@ def squeeze(input, axes, name=None):
      Case 2:
        Given
          X.shape = (1, 3, 1, 5)
-        and 
+        and
          axes = []
        we get:
          Out.shape = (3, 5)
-    
+
    Args:
        input (Variable): The input variable to be squeezed.
        axes (list): List of integers, indicating the dimensions to be squeezed.
@@ -4651,14 +4653,14 @@ def squeeze(input, axes, name=None):

 def unsqueeze(input, axes, name=None):
    """
-    Insert single-dimensional entries to the shape of a tensor. Takes one 
-    required argument axes, a list of dimensions that will be inserted. 
-    Dimension indices in axes are as seen in the output tensor. 
+    Insert single-dimensional entries to the shape of a tensor. Takes one
+    required argument axes, a list of dimensions that will be inserted.
+    Dimension indices in axes are as seen in the output tensor.

-    For example: 
-      Given a tensor such that tensor with shape [3, 4, 5], 
+    For example:
+      Given a tensor such that tensor with shape [3, 4, 5],
      then Unsqueezed tensor with axes=[0, 4] has shape [1, 3, 4, 5, 1].
-    
+
    Args:
        input (Variable): The input variable to be unsqueezed.
        axes (list): List of integers, indicating the dimensions to be inserted.
@@ -5757,39 +5759,39 @@ def pad2d(input,
    Example:

      Given that X is a channel of image from input:
-      
+
      X = [[1, 2, 3],
           [4, 5, 6]]
-      
+
      Case 0:
-      
+
        paddings = [0, 1, 2, 3],
        mode = 'constant'
        pad_value = 0
-        
+
        Out = [[0, 0, 1, 2, 3, 0, 0, 0]
               [0, 0, 4, 5, 6, 0, 0, 0]
               [0, 0, 0, 0, 0, 0, 0, 0]]
-      
+
      Case 1:
-      
+
        paddings = [0, 1, 2, 1],
        mode = 'reflect'
-        
+
        Out = [[3, 2, 1, 2, 3, 2]
               [6, 5, 4, 5, 6, 5]
               [3, 2, 1, 2, 3, 2]]
-        
+
      Case 2:
-      
+
        paddings = [0, 1, 2, 1],
        mode = 'edge'
-        
+
        Out = [[1, 1, 1, 2, 3, 3]
               [4, 4, 4, 5, 6, 6]
               [4, 4, 4, 5, 6, 6]]
-    
-  
+
+
    Args:
        input (Variable): The input image with [N, C, H, W] format or [N, H, W, C] format.
        paddings (tuple|list): The padding size. If padding is a tuple, it must
@@ -5988,7 +5990,7 @@ def prelu(x, mode, param_attr=None, name=None):
 		       channel:elements in a channel share same weight
 		       element:each element has a weight
 	name(str|None): A name for this layer(optional). If set None, the layer
-                        will be named automatically. 
+                        will be named automatically.

    Returns:
        Variable: The output tensor with the same shape as input.
@@ -6166,10 +6168,10 @@ def flatten(x, axis=1, name=None):
 def sequence_enumerate(input, win_size, pad_value=0, name=None):
    """
    Generate a new sequence for the input index sequence, which enumerates all the
-    sub-sequences with length `win_size` of the input. 
+    sub-sequences with length `win_size` of the input.
    The enumerated sequence has the same 1st dimension with variable `input`, and
    the 2nd dimension is `win_size`, padded by `pad_value` if necessary in generation.
-    
+
    Examples:
    Case 1:
      Input:
@@ -6296,20 +6298,20 @@ def unstack(x, axis=0, num=None):
    **UnStack Layer**

    This layer unstacks input :code:`x` into several tensors along axis.
-   
+
    If :code:`axis` < 0, it would be replaced with :code:`axis+rank(x)`.
    If :code:`num` is None, it would be inferred from :code:`x.shape[axis]`,
    and if :code:`x.shape[axis]` <= 0 or is unknown, :code:`ValueError` is
-    raised. 
+    raised.

    Args:
-        x (Variable): Input variable. 
+        x (Variable): Input variable.
        axis (int): The axis along which the input is unstacked.
        num (int|None): The number of output variables.
-    
+
    Returns:
        list(Variable): The unstacked variables.
-    
+
    """

    helper = LayerHelper('unstack', **locals())
@@ -6342,21 +6344,21 @@ def expand(x, expand_times, name=None):
    .. code-block:: text

        Input(X) is a 3-D tensor with shape [2, 3, 1]:
-        
+
                [
                   [[1], [2], [3]],
                   [[4], [5], [6]]
                ]
-        
+
        Attr(expand_times):  [1, 2, 2]
-        
+
        Output(Out) is a 3-D tensor with shape [2, 6, 2]:
-        
+
                [
                    [[1, 1], [2, 2], [3, 3], [1, 1], [2, 2], [3, 3]],
                    [[4, 4], [5, 5], [6, 6], [4, 4], [5, 5], [6, 6]]
                ]
-        
+
    Args:
        x (Variable): A tensor with rank in [1, 6].
        expand_times (list|tuple): Expand times number for each dimension.
@@ -6630,14 +6632,12 @@ def _elementwise_op(helper):
    assert y is not None, 'y cannot be None in {}'.format(op_type)
    axis = helper.kwargs.get('axis', -1)
    use_mkldnn = helper.kwargs.get('use_mkldnn', False)
-    out = helper.kwargs.get('out', None)
-    if out is None:
-        name = helper.kwargs.get('name', None)
-        if name is None:
-            out = helper.create_tmp_variable(dtype=x.dtype)
-        else:
-            out = helper.create_variable(
-                name=name, dtype=x.dtype, persistable=False)
+    name = helper.kwargs.get('name', None)
+    if name is None:
+        out = helper.create_tmp_variable(dtype=x.dtype)
+    else:
+        out = helper.create_variable(
+            name=name, dtype=x.dtype, persistable=False)

    helper.append_op(
        type=op_type,
@@ -6650,13 +6650,7 @@ def _elementwise_op(helper):


 @templatedoc()
-def scale(x,
-          scale=1.0,
-          bias=0.0,
-          bias_after_scale=True,
-          out=None,
-          act=None,
-          name=None):
+def scale(x, scale=1.0, bias=0.0, bias_after_scale=True, act=None, name=None):
    """
    ${comment}

@@ -6665,21 +6659,19 @@ def scale(x,
        scale(${scale_type}): ${scale_comment}
        bias(${bias_type}): ${bias_comment}
        bias_after_scale(${bias_after_scale_type}): ${bias_after_scale_comment}
-        out(Tensor): Output tensor.
        act(basestring|None): Activation applied to the output.
-        name(basestring|None): Name of the output. 
+        name(basestring|None): Name of the output.

    Returns:
        out(${out_type}): ${out_comment}
    """

    helper = LayerHelper('scale', **locals())
-    if out is None:
-        if name is None:
-            out = helper.create_tmp_variable(dtype=x.dtype)
-        else:
-            out = helper.create_variable(
-                name=name, dtype=x.dtype, persistable=False)
+    if name is None:
+        out = helper.create_tmp_variable(dtype=x.dtype)
+    else:
+        out = helper.create_variable(
+            name=name, dtype=x.dtype, persistable=False)

    helper.append_op(
        type='scale',
@@ -6693,73 +6685,31 @@ def scale(x,
    return helper.append_activation(out)


-def elementwise_add(x,
-                    y,
-                    out=None,
-                    axis=-1,
-                    use_mkldnn=False,
-                    act=None,
-                    name=None):
+def elementwise_add(x, y, axis=-1, use_mkldnn=False, act=None, name=None):
    return _elementwise_op(LayerHelper('elementwise_add', **locals()))


-def elementwise_div(x,
-                    y,
-                    out=None,
-                    axis=-1,
-                    use_mkldnn=False,
-                    act=None,
-                    name=None):
+def elementwise_div(x, y, axis=-1, use_mkldnn=False, act=None, name=None):
    return _elementwise_op(LayerHelper('elementwise_div', **locals()))


-def elementwise_sub(x,
-                    y,
-                    out=None,
-                    axis=-1,
-                    use_mkldnn=False,
-                    act=None,
-                    name=None):
+def elementwise_sub(x, y, axis=-1, use_mkldnn=False, act=None, name=None):
    return _elementwise_op(LayerHelper('elementwise_sub', **locals()))


-def elementwise_mul(x,
-                    y,
-                    out=None,
-                    axis=-1,
-                    use_mkldnn=False,
-                    act=None,
-                    name=None):
+def elementwise_mul(x, y, axis=-1, use_mkldnn=False, act=None, name=None):
    return _elementwise_op(LayerHelper('elementwise_mul', **locals()))


-def elementwise_max(x,
-                    y,
-                    out=None,
-                    axis=-1,
-                    use_mkldnn=False,
-                    act=None,
-                    name=None):
+def elementwise_max(x, y, axis=-1, use_mkldnn=False, act=None, name=None):
    return _elementwise_op(LayerHelper('elementwise_max', **locals()))


-def elementwise_min(x,
-                    y,
-                    out=None,
-                    axis=-1,
-                    use_mkldnn=False,
-                    act=None,
-                    name=None):
+def elementwise_min(x, y, axis=-1, use_mkldnn=False, act=None, name=None):
    return _elementwise_op(LayerHelper('elementwise_min', **locals()))


-def elementwise_pow(x,
-                    y,
-                    out=None,
-                    axis=-1,
-                    use_mkldnn=False,
-                    act=None,
-                    name=None):
+def elementwise_pow(x, y, axis=-1, use_mkldnn=False, act=None, name=None):
    return _elementwise_op(LayerHelper('elementwise_pow', **locals()))


@@ -6771,7 +6721,168 @@ for func in [
    func.__doc__ = _generate_doc_string_(
        op_proto,
        additional_args_lines=[
-            "out (Tensor): The output tensor of elementwise op.",
            "act (basestring|None): Activation applied to the output.",
            "name (basestring|None): Name of the output."
        ])
+
+
+def _logical_op(op_name, x, y, out=None, name=None, binary_op=True):
+    helper = LayerHelper(op_name, **locals())
+
+    if binary_op:
+        assert x.dtype == y.dtype
+
+    if out is None:
+        if name is None:
+            out = helper.create_tmp_variable(dtype=x.dtype)
+        else:
+            out = helper.create_variable(
+                name=name, dtype=x.dtype, persistable=False)
+
+    if binary_op:
+        helper.append_op(
+            type=op_name, inputs={"X": x,
+                                  "Y": y}, outputs={"Out": out})
+    else:
+        helper.append_op(type=op_name, inputs={"X": x}, outputs={"Out": out})
+
+    return out
+
+
+@templatedoc()
+def logical_and(x, y, out=None, name=None):
+    """
+    ${comment}
+
+    Args:
+        x(${x_type}): ${x_comment}
+        y(${y_type}): ${y_comment}
+        out(Tensor): Output tensor of logical operation.
+        name(basestring|None): Name of the output.
+
+    Returns:
+        out(${out_type}): ${out_comment}
+    """
+
+    return _logical_op(
+        op_name="logical_and", x=x, y=y, name=name, out=out, binary_op=True)
+
+
+@templatedoc()
+def logical_or(x, y, out=None, name=None):
+    """
+    ${comment}
+
+    Args:
+        x(${x_type}): ${x_comment}
+        y(${y_type}): ${y_comment}
+        out(Tensor): Output tensor of logical operation.
+        name(basestring|None): Name of the output.
+
+    Returns:
+        out(${out_type}): ${out_comment}
+    """
+
+    return _logical_op(
+        op_name="logical_or", x=x, y=y, name=name, out=out, binary_op=True)
+
+
+@templatedoc()
+def logical_xor(x, y, out=None, name=None):
+    """
+    ${comment}
+
+    Args:
+        x(${x_type}): ${x_comment}
+        y(${y_type}): ${y_comment}
+        out(Tensor): Output tensor of logical operation.
+        name(basestring|None): Name of the output.
+
+    Returns:
+        out(${out_type}): ${out_comment}
+    """
+
+    return _logical_op(
+        op_name="logical_xor", x=x, y=y, name=name, out=out, binary_op=True)
+
+
+@templatedoc()
+def logical_not(x, out=None, name=None):
+    """
+    ${comment}
+
+    Args:
+        x(${x_type}): ${x_comment}
+        out(Tensor): Output tensor of logical operation.
+        name(basestring|None): Name of the output.
+
+    Returns:
+        out(${out_type}): ${out_comment}
+    """
+
+    return _logical_op(
+        op_name="logical_not", x=x, y=None, name=name, out=out, binary_op=False)
+
+
+@templatedoc()
+def clip(x, min, max, name=None):
+    """
+    ${comment}
+
+    Args:
+        x(${x_type}): ${x_comment}
+        min(${min_type}): ${min_comment}
+        max(${max_type}): ${max_comment}
+        name(basestring|None): Name of the output.
+
+    Returns:
+        out(${out_type}): ${out_comment}
+    """
+
+    helper = LayerHelper("clip", **locals())
+
+    if name is None:
+        out = helper.create_tmp_variable(dtype=x.dtype)
+    else:
+        out = helper.create_variable(
+            name=name, dtype=x.dtype, persistable=False)
+
+    helper.append_op(
+        type="clip",
+        inputs={"X": x},
+        attrs={"min": min,
+               "max": max},
+        outputs={"Out": out})
+
+    return out
+
+
+@templatedoc()
+def clip_by_norm(x, max_norm, name=None):
+    """
+    ${comment}
+
+    Args:
+        x(${x_type}): ${x_comment}
+        max_norm(${max_norm_type}): ${max_norm_comment}
+        name(basestring|None): Name of the output.
+
+    Returns:
+        out(${out_type}): ${out_comment}
+    """
+
+    helper = LayerHelper("clip_by_norm", **locals())
+
+    if name is None:
+        out = helper.create_tmp_variable(dtype=x.dtype)
+    else:
+        out = helper.create_variable(
+            name=name, dtype=x.dtype, persistable=False)
+
+    helper.append_op(
+        type="clip_by_norm",
+        inputs={"X": x},
+        attrs={"max_norm": max_norm},
+        outputs={"Out": out})
+
+    return out
--- a/python/paddle/fluid/layers/ops.py
+++ b/python/paddle/fluid/layers/ops.py
@@ -39,12 +39,6 @@ __all__ = [
    'mean',
    'mul',
    'sigmoid_cross_entropy_with_logits',
-    'clip',
-    'clip_by_norm',
-    'logical_and',
-    'logical_or',
-    'logical_xor',
-    'logical_not',
    'maxout',
 ]


--- a/python/paddle/fluid/tests/unittests/test_dist_se_resnext.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_se_resnext.py
@@ -22,7 +22,7 @@ class TestDistSeResneXt2x2(TestDistBase):
        self._sync_mode = True
        self._use_reader_alloc = False

-    def test_dist_train(self):
+    def no_test_dist_train(self):
        self.check_with_place("dist_se_resnext.py", delta=100)


@@ -40,7 +40,7 @@ class TestDistSeResneXt2x2Async(TestDistBase):
        self._sync_mode = False
        self._use_reader_alloc = False

-    def test_dist_train(self):
+    def no_test_dist_train(self):
        self.check_with_place("dist_se_resnext.py", delta=100)