Merge branch 'develop' into kmax_score_layer

89ea288c · caoying03 · 927adb4a · 559b0224 · 89ea288c · 89ea288c
43 changed file
--- a/cmake/cpplint.cmake
+++ b/cmake/cpplint.cmake
@@ -56,11 +56,14 @@ macro(add_style_check_target TARGET_NAME)
                # cpplint code style
                get_filename_component(base_filename ${filename} NAME)
                set(CUR_GEN ${CMAKE_CURRENT_BINARY_DIR}/${base_filename}.cpplint)
-                add_custom_command(TARGET ${TARGET_NAME} PRE_BUILD
+                add_custom_command(OUTPUT ${CUR_GEN} PRE_BUILD
                    COMMAND "${PYTHON_EXECUTABLE}" "${PROJ_ROOT}/paddle/scripts/cpplint.py"
                            "--filter=${STYLE_FILTER}"
                            "--write-success=${CUR_GEN}" ${filename}
+                    DEPENDS ${filename} ${PROJ_ROOT}/paddle/scripts/cpplint.py
                    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
+                add_custom_target(${base_filename}.cpplint DEPENDS ${CUR_GEN})
+                add_dependencies(${TARGET_NAME} ${base_filename}.cpplint)
            endif()
        endforeach()
    endif()

--- a/cmake/util.cmake
+++ b/cmake/util.cmake
@@ -118,7 +118,6 @@ endfunction()
 macro(add_unittest_without_exec TARGET_NAME)
    add_executable(${TARGET_NAME} ${ARGN})
    link_paddle_test(${TARGET_NAME})
-    add_style_check_target(${TARGET_NAME} ${ARGN})
 endmacro()
 # add_unittest

--- a/paddle/framework/CMakeLists.txt
+++ b/paddle/framework/CMakeLists.txt
@@ -12,13 +12,15 @@ cc_test(variable_test SRCS variable_test.cc)
 cc_library(scope SRCS scope.cc)
 cc_test(scope_test SRCS scope_test.cc DEPS scope)
-proto_library(attr_type SRCS attr_type.proto)
+proto_library(attribute_proto SRCS attribute.proto)
-proto_library(op_proto SRCS op_proto.proto DEPS attr_type)
+proto_library(op_proto SRCS op_proto.proto DEPS attribute_proto)
-proto_library(op_desc SRCS op_desc.proto DEPS attr_type)
+proto_library(op_desc SRCS op_desc.proto DEPS attribute_proto)
 cc_test(op_proto_test SRCS op_proto_test.cc DEPS op_proto protobuf)
 cc_test(op_desc_test SRCS op_desc_test.cc DEPS op_desc protobuf)
-cc_library(operator SRCS operator.cc DEPS op_desc device_context tensor scope)
+cc_library(attribute SRCS attribute.cc DEPS op_desc op_proto)
+cc_library(operator SRCS operator.cc DEPS op_desc device_context tensor scope attribute)
 cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry)
 cc_library(grad_op_builder SRCS grad_op_builder.cc DEPS op_proto operator)
@@ -26,7 +28,7 @@ cc_library(op_registry SRCS op_registry.cc DEPS op_desc grad_op_builder)
 cc_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry)
 cc_test(grad_op_builder_test SRCS grad_op_builder_test.cc DEPS grad_op_builder op_registry add_op)
-py_proto_compile(framework_py_proto SRCS attr_type.proto op_proto.proto op_desc.proto)
+py_proto_compile(framework_py_proto SRCS attribute.proto op_proto.proto op_desc.proto)
 # Generate an empty __init__.py to make framework_py_proto as a valid python module.
 add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
 add_dependencies(framework_py_proto framework_py_proto_init)

--- a/paddle/framework/attribute.cc
+++ b/paddle/framework/attribute.cc
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "paddle/framework/attribute.h"
+#include <vector>
+namespace paddle {
+namespace framework {
+template <>
+AttrType AttrTypeID<int>() {
+  return INT;
+}
+template <>
+AttrType AttrTypeID<float>() {
+  return FLOAT;
+}
+template <>
+AttrType AttrTypeID<std::string>() {
+  return STRING;
+}
+template <>
+AttrType AttrTypeID<std::vector<int>>() {
+  return INTS;
+}
+template <>
+AttrType AttrTypeID<std::vector<float>>() {
+  return FLOATS;
+}
+template <>
+AttrType AttrTypeID<std::vector<std::string>>() {
+  return STRINGS;
+}
+Attribute GetAttrValue(const AttrDesc& attr_desc) {
+  switch (attr_desc.type()) {
+    case paddle::framework::AttrType::INT: {
+      return attr_desc.i();
+    }
+    case paddle::framework::AttrType::FLOAT: {
+      return attr_desc.f();
+    }
+    case paddle::framework::AttrType::STRING: {
+      return attr_desc.s();
+    }
+    case paddle::framework::AttrType::INTS: {
+      std::vector<int> val(attr_desc.ints_size());
+      for (int i = 0; i < attr_desc.ints_size(); ++i) {
+        val[i] = attr_desc.ints(i);
+      }
+      return val;
+    }
+    case paddle::framework::AttrType::FLOATS: {
+      std::vector<float> val(attr_desc.floats_size());
+      for (int i = 0; i < attr_desc.floats_size(); ++i) {
+        val[i] = attr_desc.floats(i);
+      }
+      return val;
+    }
+    case paddle::framework::AttrType::STRINGS: {
+      std::vector<std::string> val(attr_desc.strings_size());
+      for (int i = 0; i < attr_desc.strings_size(); ++i) {
+        val[i] = attr_desc.strings(i);
+      }
+      return val;
+    }
+  }
+  PADDLE_ENFORCE(false, "Unknown OpDesc::AttrDesc::type !");
+  return boost::blank();
+}
+}  // namespace framework
+}  // namespace paddle
--- a/paddle/framework/attr_checker.h
+++ b/paddle/framework/attr_checker.h
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
 #pragma once
 #include <boost/variant.hpp>
@@ -6,6 +20,9 @@
 #include <unordered_map>
 #include <unordered_set>
 #include <vector>
+#include "paddle/framework/attribute.pb.h"
+#include "paddle/framework/op_desc.pb.h"
 #include "paddle/platform/enforce.h"
 namespace paddle {
@@ -14,13 +31,19 @@ namespace framework {
 typedef boost::variant<boost::blank, int, float, std::string, std::vector<int>,
                       std::vector<float>, std::vector<std::string>>
    Attribute;
 typedef std::unordered_map<std::string, Attribute> AttributeMap;
+template <typename T>
+AttrType AttrTypeID();
+Attribute GetAttrValue(const AttrDesc& attr_desc);
 // check whether a value(attribute) fit a certain limit
 template <typename T>
 class LargerThanChecker {
 public:
-  LargerThanChecker(T lower_bound) : lower_bound_(lower_bound) {}
+  explicit LargerThanChecker(T lower_bound) : lower_bound_(lower_bound) {}
  void operator()(T& value) const {
    PADDLE_ENFORCE(value > lower_bound_, "larger_than check fail");
  }
@@ -35,7 +58,8 @@ class LargerThanChecker {
 template <typename T>
 class DefaultValueSetter {
 public:
-  DefaultValueSetter(T default_value) : default_value_(default_value) {}
+  explicit DefaultValueSetter(T default_value)
+      : default_value_(default_value) {}
  void operator()(T& value) const { value = default_value_; }
 private:
@@ -78,7 +102,8 @@ class TypedAttrChecker {
  typedef std::function<void(T&)> ValueChecker;
 public:
-  TypedAttrChecker(const std::string& attr_name) : attr_name_(attr_name) {}
+  explicit TypedAttrChecker(const std::string& attr_name)
+      : attr_name_(attr_name) {}
  TypedAttrChecker& InEnum(const std::unordered_set<T>& range) {
    value_checkers_.push_back(EnumInContainer<T>(range));

--- a/paddle/framework/attr_type.proto
+++ b/paddle/framework/attr_type.proto
--- a/paddle/framework/backward.cc
+++ b/paddle/framework/backward.cc
@@ -59,19 +59,17 @@ std::shared_ptr<OperatorBase> BackwardRecursive(
  //  If all input gradients of forwarding operator do not need to calculate,
  //  just return an NOP. Not return null ptr because NOP does not take
  //  too much time for calculation, but it is useful for simplifying logic.
-  if (AllInSet(forwardOp.inputs_, OperatorBase::GRAD_VAR_SUFFIX(),
+  if (AllInSet(forwardOp.inputs_, kGradVarSuffix, no_grad_names)) {
-               no_grad_names)) {
    return NOP();
  }
  //  All output gradients of forwarding operator do not need to calculate.
  //  Then all input gradients cannot be computed at all, and we put them into
  //  `no_grad_names` set. Return an NOP.
-  if (AllInSet(forwardOp.outputs_, OperatorBase::GRAD_VAR_SUFFIX(),
+  if (AllInSet(forwardOp.outputs_, kGradVarSuffix, no_grad_names)) {
-               no_grad_names)) {
    for (auto& name : forwardOp.inputs_) {
      // Mark all input is not need
-      no_grad_names.insert(name + OperatorBase::GRAD_VAR_SUFFIX());
+      no_grad_names.insert(name + kGradVarSuffix);
    }
    return NOP();
  }
@@ -134,9 +132,9 @@ std::shared_ptr<OperatorBase> BackwardRecursive(
    std::shared_ptr<OperatorBase> grad_op = OpRegistry::CreateGradOp(forwardOp);
    for (std::string& grad_input : grad_op->inputs_) {
      if (no_grad_names.count(grad_input)) {
-        std::string prefix = grad_input.substr(
+        std::string prefix =
-            0, grad_input.size() - OperatorBase::GRAD_VAR_SUFFIX().size());
+            grad_input.substr(0, grad_input.size() - kGradVarSuffix.size());
-        grad_input = prefix + OperatorBase::ZERO_VAR_SUFFIX();
+        grad_input = prefix + kZeroVarSuffix;
        // If part of input gradient of that operator is not calculated, fill
        // zero variables to that input gradient.
@@ -147,7 +145,7 @@ std::shared_ptr<OperatorBase> BackwardRecursive(
    for (std::string& grad_output : grad_op->outputs_) {
      if (no_grad_names.count(grad_output)) {
-        grad_output = OperatorBase::EMPTY_VAR_NAME();
+        grad_output = kEmptyVarName;
      }
    }
@@ -168,14 +166,14 @@ std::shared_ptr<OperatorBase> Backward(
  std::unordered_set<std::string> no_grad_names;
  no_grad_names.reserve(no_grad_vars.size());
-  no_grad_names.insert(OperatorBase::EMPTY_VAR_NAME() +
+  no_grad_names.insert(kEmptyVarName + kGradVarSuffix);
-                       OperatorBase::GRAD_VAR_SUFFIX());
  for (auto& name : no_grad_vars) {
-    no_grad_names.insert(name + OperatorBase::GRAD_VAR_SUFFIX());
+    no_grad_names.insert(name + kGradVarSuffix);
  }
  size_t uid = 0;
  return BackwardRecursive(forwardOp, no_grad_names, uid);
 }
 }  // namespace framework
 }  // namespace paddle
--- a/paddle/framework/backward_test.cc
+++ b/paddle/framework/backward_test.cc
@@ -78,14 +78,14 @@ class FcOp : public ops::NetOp {
                               {Output("mul_result")}, {}));
    auto b_name = Input("b");
    std::string before_act = "mul_result";
-    if (b_name != EMPTY_VAR_NAME()) {
+    if (b_name != kEmptyVarName) {
      AddOp(OpRegistry::CreateOp("rowwise_add", {Output("mul_result"), b_name},
                                 {Output("add_result")}, {}));
      before_act = "add_result";
    } else {
      auto out_varname = Output("add_result");
-      if (out_varname != EMPTY_VAR_NAME()) {
+      if (out_varname != kEmptyVarName) {
-        this->Rename(out_varname, EMPTY_VAR_NAME());
+        this->Rename(out_varname, kEmptyVarName);
      }
    }
@@ -163,13 +163,12 @@ TEST(Backward, simple_op_grad) {
  ASSERT_NE(fwd, nullptr);
  auto gop = f::OpRegistry::CreateGradOp(*fwd);
  ASSERT_EQ(4UL, gop->inputs_.size());
-  ASSERT_EQ(f::OperatorBase::EMPTY_VAR_NAME(), gop->inputs_[0]);
+  ASSERT_EQ(f::kEmptyVarName, gop->inputs_[0]);
  ASSERT_EQ("rowwise_add_grad", gop->type_);
-  ASSERT_EQ("X" + f::OperatorBase::GRAD_VAR_SUFFIX(), gop->outputs_[0]);
+  ASSERT_EQ("X" + f::kGradVarSuffix, gop->outputs_[0]);
-  ASSERT_EQ("b" + f::OperatorBase::GRAD_VAR_SUFFIX(), gop->outputs_[1]);
+  ASSERT_EQ("b" + f::kGradVarSuffix, gop->outputs_[1]);
-  ASSERT_EQ("X" + f::OperatorBase::GRAD_VAR_SUFFIX(),
+  ASSERT_EQ("X" + f::kGradVarSuffix, gop->Output("X" + f::kGradVarSuffix));
-            gop->Output("X" + f::OperatorBase::GRAD_VAR_SUFFIX()));
 }
 TEST(Backward, simple_op_not_need_grad) {
@@ -177,7 +176,7 @@ TEST(Backward, simple_op_not_need_grad) {
  ASSERT_NE(fwd, nullptr);
  auto gop = f::Backward(*fwd, {"X"});
  ASSERT_EQ(std::find(gop->outputs_.begin(), gop->outputs_.end(),
-                      "X" + f::OperatorBase::GRAD_VAR_SUFFIX()),
+                      "X" + f::kGradVarSuffix),
            gop->outputs_.end());
  auto no_input_gop = f::Backward(*fwd, {"X", "b"});
@@ -210,9 +209,9 @@ TEST(Backward, net_fc_backward_normal) {
 }
 TEST(Backward, net_fc_backward_not_have_b) {
-  std::shared_ptr<f::OperatorBase> fwd = f::OpRegistry::CreateOp(
+  std::shared_ptr<f::OperatorBase> fwd =
-      "fc", {"X", "w", f::OperatorBase::EMPTY_VAR_NAME()},
+      f::OpRegistry::CreateOp("fc", {"X", "w", f::kEmptyVarName},
-      {"mul_result", "add_result", "tmp"}, {});
+                              {"mul_result", "add_result", "tmp"}, {});
  ASSERT_NE(fwd, nullptr);
  std::shared_ptr<f::OperatorBase> gop = f::Backward(*fwd, {});
  ASSERT_TRUE(gop->IsNetOp());
@@ -242,24 +241,21 @@ TEST(Backward, net_input_of_network_not_need_grad) {
  std::unordered_set<std::string> all_output = std::unordered_set<std::string>(
      bwd_net->outputs_.begin(), bwd_net->outputs_.end());
-  all_output.erase(f::OperatorBase::EMPTY_VAR_NAME());
+  all_output.erase(f::kEmptyVarName);
  for (auto &out : {"W1", "b1", "hidden0", "W2", "b2"}) {
-    ASSERT_NE(all_output.find(out + f::OperatorBase::GRAD_VAR_SUFFIX()),
+    ASSERT_NE(all_output.find(out + f::kGradVarSuffix), all_output.end());
-              all_output.end());
  }
  // Not Generated X
-  ASSERT_EQ(all_output.find("X" + f::OperatorBase::GRAD_VAR_SUFFIX()),
+  ASSERT_EQ(all_output.find("X" + f::kGradVarSuffix), all_output.end());
-            all_output.end());
  ASSERT_EQ(2UL, bwd_net->ops_.size());
  ASSERT_TRUE(bwd_net->ops_[1]->IsNetOp());
  auto first_fc_grad = static_cast<ops::NetOp *>(bwd_net->ops_[1].get());
  ASSERT_EQ(3UL, first_fc_grad->ops_.size());
-  ASSERT_EQ(
+  ASSERT_EQ(f::kEmptyVarName,
-      f::OperatorBase::EMPTY_VAR_NAME(),
+            first_fc_grad->ops_[2]->Output("A" + f::kGradVarSuffix));
-      first_fc_grad->ops_[2]->Output("A" + f::OperatorBase::GRAD_VAR_SUFFIX()));
 }
 TEST(Backward, net_shared_weight) {
@@ -311,17 +307,15 @@ TEST(Backward, op_part_of_output_are_not_need) {
  ASSERT_EQ(1UL, fill_zero.inputs_.size());
  ASSERT_EQ("Z", fill_zero.inputs_[0]);
  ASSERT_EQ(1UL, fill_zero.outputs_.size());
-  ASSERT_EQ("Z" + f::OperatorBase::ZERO_VAR_SUFFIX(), fill_zero.outputs_[0]);
+  ASSERT_EQ("Z" + f::kZeroVarSuffix, fill_zero.outputs_[0]);
  auto &d_many_out = *net->ops_[1];
  ASSERT_EQ("many_output_op_grad", d_many_out.type_);
  ASSERT_EQ(1UL + 2UL + 2UL, d_many_out.inputs_.size());  // I/O/OG
-  ASSERT_EQ("Z" + f::OperatorBase::ZERO_VAR_SUFFIX(),
+  ASSERT_EQ("Z" + f::kZeroVarSuffix, d_many_out.Input("z" + f::kGradVarSuffix));
-            d_many_out.Input("z" + f::OperatorBase::GRAD_VAR_SUFFIX()));
+  ASSERT_EQ("Y" + f::kGradVarSuffix, d_many_out.Input("y" + f::kGradVarSuffix));
-  ASSERT_EQ("Y" + f::OperatorBase::GRAD_VAR_SUFFIX(),
+  ASSERT_EQ("X" + f::kGradVarSuffix,
-            d_many_out.Input("y" + f::OperatorBase::GRAD_VAR_SUFFIX()));
+            d_many_out.Output("x" + f::kGradVarSuffix));
-  ASSERT_EQ("X" + f::OperatorBase::GRAD_VAR_SUFFIX(),
-            d_many_out.Output("x" + f::OperatorBase::GRAD_VAR_SUFFIX()));
 }
 TEST(Backward, op_part_of_input_are_not_need) {
@@ -331,12 +325,10 @@ TEST(Backward, op_part_of_input_are_not_need) {
  ASSERT_EQ(grad_mul.type_, "mul_grad");
  ASSERT_EQ(grad_mul.inputs_.size(), 2UL + 1UL + 1UL);
  ASSERT_EQ(grad_mul.outputs_.size(), 2UL);
-  ASSERT_EQ(grad_mul.Output("A" + f::OperatorBase::GRAD_VAR_SUFFIX()),
+  ASSERT_EQ(grad_mul.Output("A" + f::kGradVarSuffix), f::kEmptyVarName);
-            f::OperatorBase::EMPTY_VAR_NAME());
+  ASSERT_EQ(grad_mul.Output("B" + f::kGradVarSuffix), "b" + f::kGradVarSuffix);
-  ASSERT_EQ(grad_mul.Output("B" + f::OperatorBase::GRAD_VAR_SUFFIX()),
+  ASSERT_EQ(grad_mul.Input("Out" + f::kGradVarSuffix),
-            "b" + f::OperatorBase::GRAD_VAR_SUFFIX());
+            "out" + f::kGradVarSuffix);
-  ASSERT_EQ(grad_mul.Input("Out" + f::OperatorBase::GRAD_VAR_SUFFIX()),
-            "out" + f::OperatorBase::GRAD_VAR_SUFFIX());
  ASSERT_EQ(grad_mul.Input("A"), "a");
  ASSERT_EQ(grad_mul.Input("B"), "b");
  ASSERT_EQ(grad_mul.Input("Out"), "out");
@@ -368,23 +360,4 @@ TEST(Backward, linear_net_intermediate_variable_has_no_grad) {
  EXPECT_EQ(bwd_net->ops_[1]->outputs_.size(), 0UL);
  EXPECT_EQ(bwd_net->ops_[2]->inputs_.size(), 0UL);
  EXPECT_EQ(bwd_net->ops_[2]->outputs_.size(), 0UL);
-  /*
-    EXPECT_EQ(grad_fc.Output("X" + f::OperatorBase::GRAD_VAR_SUFFIX()),
-              f::OperatorBase::EMPTY_VAR_NAME());
-  EXPECT_EQ(grad_fc.Output("W" + f::OperatorBase::GRAD_VAR_SUFFIX()),
-    "w3" + f::OperatorBase::GRAD_VAR_SUFFIX());
-  EXPECT_EQ(grad_fc.Output("b" + f::OperatorBase::GRAD_VAR_SUFFIX()),
-    "b3" + f::OperatorBase::GRAD_VAR_SUFFIX());
-  EXPECT_EQ(grad_fc.Output("mul_result" + f::OperatorBase::GRAD_VAR_SUFFIX()),
-  "mul_out3" + f::OperatorBase::GRAD_VAR_SUFFIX());
-  EXPECT_EQ(grad_fc.Input("Out" + f::OperatorBase::GRAD_VAR_SUFFIX()),
-  "out3" + f::OperatorBase::GRAD_VAR_SUFFIX());
-  EXPECT_EQ(grad_fc.Input("X"), "out2");
-  EXPECT_EQ(grad_fc.Input("W"), "w3");
-  EXPECT_EQ(grad_fc.Input("mul_result"), "mul_out3");
-  EXPECT_EQ(grad_fc.Input("add_result"), "tmp_out3");
-  EXPECT_EQ(grad_fc.Input("Out"), "out3");
-  */
 }
--- a/paddle/framework/grad_op_builder.cc
+++ b/paddle/framework/grad_op_builder.cc
@@ -56,8 +56,7 @@ static void TransOpArg(const OperatorBase* src_op, OperatorBase* dst_op,
  for (const auto& arg : src_arg_list) {
    std::string src_name = arg.name();
-    std::string dst_name =
+    std::string dst_name = is_grad ? src_name + kGradVarSuffix : src_name;
-        is_grad ? src_name + OperatorBase::GRAD_VAR_SUFFIX() : src_name;
    (*dst_op->in_out_idxs_)[dst_name] = idx++;
    int src_arg_idx = src_op->in_out_idxs_->at(src_name);
    int src_begin =
@@ -65,10 +64,9 @@ static void TransOpArg(const OperatorBase* src_op, OperatorBase* dst_op,
    int src_end = src_format == nullptr ? src_arg_idx + 1
                                        : src_format->at(src_arg_idx + 1);
    for (int i = src_begin; i < src_end; ++i) {
-      std::string s = is_grad ? src_inout[i] + OperatorBase::GRAD_VAR_SUFFIX()
+      std::string s =
-                              : arg.ignore_gradient()
+          is_grad ? src_inout[i] + kGradVarSuffix
-                                    ? OperatorBase::EMPTY_VAR_NAME()
+                  : (arg.ignore_gradient() ? kEmptyVarName : src_inout[i]);
-                                    : src_inout[i];
      dst_inout.emplace_back(s);
    }
    if (dst_format != nullptr) {

--- a/paddle/framework/grad_op_builder_test.cc
+++ b/paddle/framework/grad_op_builder_test.cc
@@ -83,24 +83,21 @@ TEST(GradOpBuilder, MutiInOut) {
  EXPECT_EQ(grad_test_op->Input("Out1"), "out1");
  EXPECT_EQ(grad_test_op->Inputs("Out2_mult"),
            std::vector<std::string>({"out2_1", "out2_2"}));
-  EXPECT_EQ(grad_test_op->Input("Out1" + f::OperatorBase::GRAD_VAR_SUFFIX()),
+  EXPECT_EQ(grad_test_op->Input("Out1" + f::kGradVarSuffix),
-            "out1" + f::OperatorBase::GRAD_VAR_SUFFIX());
+            "out1" + f::kGradVarSuffix);
-  EXPECT_EQ(
+  EXPECT_EQ(grad_test_op->Inputs("Out2_mult" + f::kGradVarSuffix),
-      grad_test_op->Inputs("Out2_mult" + f::OperatorBase::GRAD_VAR_SUFFIX()),
+            std::vector<std::string>(
-      std::vector<std::string>(
+                {"out2_1" + f::kGradVarSuffix, "out2_2" + f::kGradVarSuffix}));
-          {"out2_1" + f::OperatorBase::GRAD_VAR_SUFFIX(),
-           "out2_2" + f::OperatorBase::GRAD_VAR_SUFFIX()}));
  ASSERT_EQ(grad_test_op->outputs_.size(), 5UL);
-  EXPECT_EQ(grad_test_op->Output("In1" + f::OperatorBase::GRAD_VAR_SUFFIX()),
+  EXPECT_EQ(grad_test_op->Output("In1" + f::kGradVarSuffix),
-            "in1" + f::OperatorBase::GRAD_VAR_SUFFIX());
+            "in1" + f::kGradVarSuffix);
-  EXPECT_EQ(
+  EXPECT_EQ(grad_test_op->Outputs("In2_mult" + f::kGradVarSuffix),
-      grad_test_op->Outputs("In2_mult" + f::OperatorBase::GRAD_VAR_SUFFIX()),
+            std::vector<std::string>({"in2_1" + f::kGradVarSuffix,
-      std::vector<std::string>({"in2_1" + f::OperatorBase::GRAD_VAR_SUFFIX(),
+                                      "in2_2" + f::kGradVarSuffix,
-                                "in2_2" + f::OperatorBase::GRAD_VAR_SUFFIX(),
+                                      "in2_3" + f::kGradVarSuffix}));
-                                "in2_3" + f::OperatorBase::GRAD_VAR_SUFFIX()}));
+  EXPECT_EQ(grad_test_op->Output("In3" + f::kGradVarSuffix),
-  EXPECT_EQ(grad_test_op->Output("In3" + f::OperatorBase::GRAD_VAR_SUFFIX()),
+            "in3" + f::kGradVarSuffix);
-            "in3" + f::OperatorBase::GRAD_VAR_SUFFIX());
 }
 TEST(GradOpBuilder, IOIgnoredInGradient) {
@@ -116,30 +113,25 @@ TEST(GradOpBuilder, IOIgnoredInGradient) {
  ASSERT_EQ(grad_test_op->inputs_.size(), 5UL + 3UL + 3UL);
  EXPECT_EQ(grad_test_op->Input("In1"), "in1");
  EXPECT_EQ(grad_test_op->Inputs("In2_mult"),
-            std::vector<std::string>({f::OperatorBase::EMPTY_VAR_NAME(),
+            std::vector<std::string>({f::kEmptyVarName, f::kEmptyVarName}));
-                                      f::OperatorBase::EMPTY_VAR_NAME()}));
  EXPECT_EQ(grad_test_op->Inputs("In3_mult"),
            std::vector<std::string>({"in3_1", "in3_2"}));
  EXPECT_EQ(grad_test_op->Inputs("Out1_mult"),
            std::vector<std::string>({"out1_1", "out1_2"}));
-  EXPECT_EQ(grad_test_op->Input("Out2"), f::OperatorBase::EMPTY_VAR_NAME());
+  EXPECT_EQ(grad_test_op->Input("Out2"), f::kEmptyVarName);
-  EXPECT_EQ(
+  EXPECT_EQ(grad_test_op->Inputs("Out1_mult" + f::kGradVarSuffix),
-      grad_test_op->Inputs("Out1_mult" + f::OperatorBase::GRAD_VAR_SUFFIX()),
+            std::vector<std::string>(
-      std::vector<std::string>(
+                {"out1_1" + f::kGradVarSuffix, "out1_2" + f::kGradVarSuffix}));
-          {"out1_1" + f::OperatorBase::GRAD_VAR_SUFFIX(),
+  EXPECT_EQ(grad_test_op->Input("Out2" + f::kGradVarSuffix),
-           "out1_2" + f::OperatorBase::GRAD_VAR_SUFFIX()}));
+            "out2" + f::kGradVarSuffix);
-  EXPECT_EQ(grad_test_op->Input("Out2" + f::OperatorBase::GRAD_VAR_SUFFIX()),
-            "out2" + f::OperatorBase::GRAD_VAR_SUFFIX());
  ASSERT_EQ(grad_test_op->outputs_.size(), 5UL);
-  EXPECT_EQ(grad_test_op->Output("In1" + f::OperatorBase::GRAD_VAR_SUFFIX()),
+  EXPECT_EQ(grad_test_op->Output("In1" + f::kGradVarSuffix),
-            "in1" + f::OperatorBase::GRAD_VAR_SUFFIX());
+            "in1" + f::kGradVarSuffix);
-  EXPECT_EQ(
+  EXPECT_EQ(grad_test_op->Outputs("In2_mult" + f::kGradVarSuffix),
-      grad_test_op->Outputs("In2_mult" + f::OperatorBase::GRAD_VAR_SUFFIX()),
+            std::vector<std::string>(
-      std::vector<std::string>({"in2_1" + f::OperatorBase::GRAD_VAR_SUFFIX(),
+                {"in2_1" + f::kGradVarSuffix, "in2_2" + f::kGradVarSuffix}));
-                                "in2_2" + f::OperatorBase::GRAD_VAR_SUFFIX()}));
+  EXPECT_EQ(grad_test_op->Outputs("In3_mult" + f::kGradVarSuffix),
-  EXPECT_EQ(
+            std::vector<std::string>(
-      grad_test_op->Outputs("In3_mult" + f::OperatorBase::GRAD_VAR_SUFFIX()),
+                {"in3_1" + f::kGradVarSuffix, "in3_2" + f::kGradVarSuffix}));
-      std::vector<std::string>({"in3_1" + f::OperatorBase::GRAD_VAR_SUFFIX(),
-                                "in3_2" + f::OperatorBase::GRAD_VAR_SUFFIX()}));
 }
--- a/paddle/framework/op_desc.proto
+++ b/paddle/framework/op_desc.proto
@@ -15,7 +15,7 @@ limitations under the License. */
 syntax="proto2";
 package paddle.framework;
-import "attr_type.proto";
+import "attribute.proto";
 // AttrDesc is used to describe Attributes of an Operator. It contain's
 // name, type, and value of Attribute.

--- a/paddle/framework/op_proto.proto
+++ b/paddle/framework/op_proto.proto
@@ -21,7 +21,7 @@ limitations under the License. */
 syntax="proto2";
 package paddle.framework;
-import "attr_type.proto";
+import "attribute.proto";
 // Attribute protocol message for 3rd-party language binding.
 // It will store the Op support what attribute and what type.

--- a/paddle/framework/op_registry.cc
+++ b/paddle/framework/op_registry.cc
@@ -14,37 +14,8 @@ limitations under the License. */
 #include <paddle/framework/op_registry.h>
-namespace paddle {
+#include <vector>
-namespace framework {
-template <>
-void AttrTypeHelper::SetAttrType<int>(AttrProto* attr) {
-  attr->set_type(paddle::framework::AttrType::INT);
-}
-template <>
-void AttrTypeHelper::SetAttrType<float>(AttrProto* attr) {
-  attr->set_type(paddle::framework::AttrType::FLOAT);
-}
-template <>
-void AttrTypeHelper::SetAttrType<std::string>(AttrProto* attr) {
-  attr->set_type(paddle::framework::AttrType::STRING);
-}
-template <>
+namespace paddle {
-void AttrTypeHelper::SetAttrType<std::vector<int>>(AttrProto* attr) {
+namespace framework {}  // namespace framework
-  attr->set_type(paddle::framework::AttrType::INTS);
-}
-template <>
-void AttrTypeHelper::SetAttrType<std::vector<float>>(AttrProto* attr) {
-  attr->set_type(paddle::framework::AttrType::FLOATS);
-}
-template <>
-void AttrTypeHelper::SetAttrType<std::vector<std::string>>(AttrProto* attr) {
-  attr->set_type(paddle::framework::AttrType::STRINGS);
-}
-}  // namespace framework
 }  // namespace paddle
--- a/paddle/framework/op_registry.h
+++ b/paddle/framework/op_registry.h
@@ -19,7 +19,7 @@ limitations under the License. */
 #include <type_traits>
 #include <unordered_map>
 #include <unordered_set>
-#include "paddle/framework/attr_checker.h"
+#include "paddle/framework/attribute.h"
 #include "paddle/framework/grad_op_builder.h"
 #include "paddle/framework/op_desc.pb.h"
 #include "paddle/framework/scope.h"
@@ -27,49 +27,6 @@ limitations under the License. */
 namespace paddle {
 namespace framework {
-// helper class to set attribute type
-struct AttrTypeHelper {
-  template <typename T>
-  static void SetAttrType(AttrProto* attr);
-  static Attribute GetAttrValue(const AttrDesc& attr_desc) {
-    switch (attr_desc.type()) {
-      case paddle::framework::AttrType::INT: {
-        return attr_desc.i();
-      }
-      case paddle::framework::AttrType::FLOAT: {
-        return attr_desc.f();
-      }
-      case paddle::framework::AttrType::STRING: {
-        return attr_desc.s();
-      }
-      case paddle::framework::AttrType::INTS: {
-        std::vector<int> val(attr_desc.ints_size());
-        for (int i = 0; i < attr_desc.ints_size(); ++i) {
-          val[i] = attr_desc.ints(i);
-        }
-        return val;
-      }
-      case paddle::framework::AttrType::FLOATS: {
-        std::vector<float> val(attr_desc.floats_size());
-        for (int i = 0; i < attr_desc.floats_size(); ++i) {
-          val[i] = attr_desc.floats(i);
-        }
-        return val;
-      }
-      case paddle::framework::AttrType::STRINGS: {
-        std::vector<std::string> val(attr_desc.strings_size());
-        for (int i = 0; i < attr_desc.strings_size(); ++i) {
-          val[i] = attr_desc.strings(i);
-        }
-        return val;
-      }
-    }
-    PADDLE_ENFORCE(false, "Unknown OpDesc::AttrDesc::type !");
-    return boost::blank();
-  }
-};
 // this class not only make proto but also init attribute checkers.
 class OpProtoAndCheckerMaker {
 public:
@@ -136,7 +93,7 @@ class OpProtoAndCheckerMaker {
    *attr->mutable_name() = name;
    *attr->mutable_comment() = comment;
    attr->set_generated(generated);
-    AttrTypeHelper::SetAttrType<T>(attr);
+    attr->set_type(AttrTypeID<T>());
    return op_checker_->AddAttrChecker<T>(name);
  }
@@ -297,7 +254,7 @@ class OpRegistry {
    AttributeMap attrs;
    for (auto& attr : op_desc.attrs()) {
-      attrs[attr.name()] = AttrTypeHelper::GetAttrValue(attr);
+      attrs[attr.name()] = GetAttrValue(attr);
    }
    return CreateOp(op_desc.type(), inputs, outputs, attrs);
@@ -341,7 +298,7 @@ class OpRegistry {
  static void GenerateTempVariableName(OperatorBase* op) {
    static std::atomic<size_t> gUniqId(0UL);
    for (auto& outname : op->outputs_) {
-      if (outname == OperatorBase::TMP_VAR_NAME()) {
+      if (outname == kTempVarName) {
        outname += op->type_;
        outname += "@";
        outname += std::to_string(gUniqId.fetch_add(1));

--- a/paddle/framework/operator.h
+++ b/paddle/framework/operator.h
@@ -20,7 +20,7 @@ limitations under the License. */
 #include <unordered_map>
 #include <vector>
-#include "paddle/framework/attr_checker.h"
+#include "paddle/framework/attribute.h"
 #include "paddle/framework/op_desc.pb.h"
 #include "paddle/framework/op_proto.pb.h"
 #include "paddle/framework/scope.h"
@@ -32,9 +32,29 @@ limitations under the License. */
 namespace paddle {
 namespace framework {
+/// If a variable is a empty variable, that name will be used.
+const std::string kEmptyVarName = "@EMPTY@";
+/// If a variable is a temporary variable, that name will be set in Python,
+/// but it will be convert to a unique name in scope after OpCreator.
+const std::string kTempVarName = "@TEMP@";
+/// If a variable's name has a certain suffix, it means that the
+/// variable is the gradient of another varibale.
+/// e.g. Variable "x@GRAD" is the gradient of varibale "x".
+const std::string kGradVarSuffix = "@GRAD";
+/// Variables with this suffix are supposed to be filled up with zeros.
+const std::string kZeroVarSuffix = "@ZERO";
+inline std::string GradVarName(const std::string& var_name) {
+  return var_name + kGradVarSuffix;
+}
 class OperatorBase;
 class InferShapeContext;
 class ExecutionContext;
 /**
 * OperatorBase has the basic element that Net will call to do computation.
 * Only CreateOperator from OpRegistry will new Operator directly. User
@@ -43,25 +63,6 @@ class ExecutionContext;
 */
 class OperatorBase {
 public:
-  /// If a variable is a empty variable, that name will be used.
-  static std::string EMPTY_VAR_NAME() { return "@EMPTY@"; }
-  /// If a variable is a temporary variable, that name will be set in Python,
-  /// but it will be convert to a unique name in scope after OpCreator.
-  static std::string TMP_VAR_NAME() { return "@TEMP@"; }
-  /// If a variable's name has a certain suffix, it means that the
-  /// variable is the gradient of another varibale.
-  /// e.g. Variable "x@GRAD" is the gradient of varibale "x".
-  static std::string GRAD_VAR_SUFFIX() { return "@GRAD"; }
-  static std::string GRAD_VAR_NAME(const std::string& name) {
-    return name + GRAD_VAR_SUFFIX();
-  }
-  /// Variables with this suffix are supposed to be filled up with zeros.
-  static std::string ZERO_VAR_SUFFIX() { return "@ZERO"; }
  virtual ~OperatorBase() {}
  template <typename T>

--- a/paddle/framework/pybind.cc
+++ b/paddle/framework/pybind.cc
@@ -163,8 +163,8 @@ All parameter, weight, gradient are variables in Paddle.
  m.def_submodule(
       "var_names",
       "The module will return special predefined variable name in Paddle")
-      .def("empty", OperatorBase::EMPTY_VAR_NAME)
+      .def("empty", []() { return kEmptyVarName; })
-      .def("temp", OperatorBase::TMP_VAR_NAME);
+      .def("temp", []() { return kTempVarName; });
  // clang-format off
  py::class_<paddle::platform::DeviceContext>(m, "DeviceContext")
      .def_static("create",

--- a/paddle/gserver/tests/CMakeLists.txt
+++ b/paddle/gserver/tests/CMakeLists.txt
 # gserver pacakge unittests
+file(GLOB_RECURSE GSERVER_HEADER RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.h")
+file(GLOB_RECURSE GSERVER_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.cpp")
+add_style_check_target(paddle_gserver ${GSERVER_SOURCES})
+add_style_check_target(paddle_gserver ${GSERVER_HEADER})
 ################### test_ProtoDataProvider ############
 add_unittest_without_exec(test_ProtoDataProvider
    test_ProtoDataProvider.cpp)
@@ -50,7 +55,7 @@ add_unittest_without_exec(test_DetectionOutput
    test_DetectionOutput.cpp
    LayerGradUtil.cpp)
-add_test(NAME test_DetectionOutput 
+add_test(NAME test_DetectionOutput
    COMMAND test_DetectionOutput)
 ################# test_ConvUnify #######################
 add_unittest_without_exec(test_ConvUnify

--- a/paddle/math/MathUtils.cpp
+++ b/paddle/math/MathUtils.cpp
@@ -25,7 +25,7 @@ namespace paddle {
 */
 void sparseRand(
    int* major, int* minor, int nnz, int majorLen, int minorMax, bool useGpu) {
-  CHECK(size_t(nnz) > size_t(1));
+  CHECK(size_t(nnz) >= size_t(1));
  int* cpuMajor;
  int* cpuMinor;
  CpuIVector cpuMinorVec(nnz);

--- a/paddle/math/tests/test_matrixCompare.cpp
+++ b/paddle/math/tests/test_matrixCompare.cpp
@@ -79,8 +79,8 @@ void testMatrixMaxSequence(int batchSize, int inputDim) {
 }
 TEST(Matrix, maxSequence) {
-  for (auto batchSize : {1, 10, 128, 1000, 6000}) {
+  for (auto batchSize : {1, 3, 997}) {   // prime numbers close to 1, 4, 1024
-    for (auto inputDim : {1, 32, 100, 512}) {
+    for (auto inputDim : {1, 7, 131}) {  // prime numbers close to 1, 8, 128
      VLOG(3) << " batchSize=" << batchSize << " inputDim=" << inputDim;
      testMatrixMaxSequence(batchSize, inputDim);
    }
@@ -240,14 +240,10 @@ TEST(Matrix, unary) {
    // inverse matrix
    testMatrixInverse(height);
 #else
-    LOG(WARNING) << "Cannot run Matrix Inverse Unit Test.\n"
+    LOG(WARNING) << "This version of PaddlePaddle was not built with LAPACK"
-                 << "Failed to find lapack library in current system.\n"
+                 << "support so we cannot test matrix inverse. To test "
-                 << "To address this issue, Please adopt one of the following "
+                 << "matrix inverse, please install LAPACKE "
-                    "approaches: \n"
+                 << "and MKL/Openblas/ATLAS, and re-build PaddlePaddle.";
-                 << "1. Simply issue `sudo apt-get install liblapacke-dev` to "
-                    "avoid re-build source code. \n"
-                 << "2. Install MKL/Openblas/ATLAS and re-build PaddlePaddle "
-                    "source code.";
 #endif
  }
 }
@@ -341,8 +337,8 @@ void testMatrixSoftmaxBp(int height, int width) {
 }
 TEST(Matrix, softmax) {
-  for (auto height : {1, 11, 73, 128, 200}) {
+  for (auto height : {1, 3, 131}) {    // prime numbers close to 1, 4, 127
-    for (auto width : {1, 32, 100, 512, 1000}) {
+    for (auto width : {1, 17, 251}) {  // prime numbers close to 1, 16, 256
      VLOG(3) << " height=" << height << " width=" << width;
      testMatrixSoftmax(height, width);
@@ -527,7 +523,7 @@ void testVectorRowFunc(int size) {
 }
 TEST(Vector, rowFunc) {
-  for (auto size : {1, 5, 31, 90, 150, 500, 1000, 4000}) {
+  for (auto size : {1, 3, 997}) {  // prime numbers close to 1, 4, 1024
    VLOG(3) << " size=" << size;
    testVectorRowFunc(size);
  }
@@ -604,7 +600,7 @@ void testVectorIsEqual(int size) {
 }
 TEST(Vector, Equal) {
-  for (auto size : {1, 5, 31, 90, 150, 500, 1000, 4000}) {
+  for (auto size : {1, 3, 997}) {  // prime numbers close to 1, 4, 1024
    VLOG(3) << " size=" << size;
    testVectorReset<int>(size);
    testVectorReset<real>(size);
@@ -635,9 +631,8 @@ void testMatrixTopK(int samples, int dim, int beamSize) {
 }
 TEST(Matrix, topK) {
-  for (auto samples : {1, 5, 31, 90, 150, 500}) {
+  for (auto samples : {1, 17, 131}) {  // prime numbers close to 1, 16, 127
-    for (auto dim :
+    for (auto dim : {1, 3, 997}) {     // prime numbers close to 1, 4, 1024
-         {1, 5, 8, 10, 15, 64, 80, 120, 256, 300, 1280, 5120, 50000}) {
      for (auto beamSize : {1, 5, 10, 20, 40, (int)rand() % dim + 1}) {
        if (beamSize > dim) continue;
        VLOG(3) << " samples=" << samples << " beamSize=" << beamSize
@@ -650,6 +645,7 @@ TEST(Matrix, topK) {
 void testSMatrixTopK(int samples, int dim, int beamSize, real ratio) {
  int nnz = samples * dim * ratio;
+  if (nnz < 1) nnz = 1;  // Because sparseRand in MathUtil.cpp requires this.
  MatrixPtr cpuSrc = std::make_shared<CpuSparseMatrix>(samples, dim, nnz);
  MatrixPtr gpuSrc = std::make_shared<GpuSparseMatrix>(samples, dim, nnz);
  MatrixPtr cpuVal = std::make_shared<CpuMatrix>(samples, beamSize);
@@ -683,9 +679,9 @@ void testSMatrixTopK(int samples, int dim, int beamSize, real ratio) {
 }
 TEST(SMatrix, topK) {
-  for (auto samples : {1, 5, 100}) {
+  for (auto samples : {1, 3, 61}) {
-    for (auto dim : {10000, 10000, 50000}) {
+    for (auto dim : {1, 3, 61}) {
-      for (auto beamSize : {1, 5, 40, 100, 500}) {
+      for (auto beamSize : {1, 3, 61}) {
        for (auto ratio : {0.01, 0.001}) {
          if (beamSize > dim) continue;
          VLOG(3) << " samples=" << samples << " beamSize=" << beamSize
@@ -806,10 +802,9 @@ void testClassificationError(int numSamples, int dim, int topkSize) {
 }
 TEST(Matrix, classificationError) {
-  for (auto numSamples : {1, 5, 31, 90, 150, 300}) {
+  for (auto numSamples : {1, 3, 31}) {
-    for (auto dim :
+    for (auto dim : {1, 3, 31}) {
-         {1, 5, 8, 10, 15, 64, 80, 120, 256, 300, 1280, 5120, 50000}) {
+      for (auto topkSize : {1, 3, (int)rand() % dim + 1}) {
-      for (auto topkSize : {1, 5, 10, 20, 40, (int)rand() % dim + 1}) {
        if (topkSize > dim) continue;
        VLOG(3) << " sample= " << numSamples << " topkSize= " << topkSize
                << " dim= " << dim;
@@ -1016,13 +1011,15 @@ void testAvgPoolFwdBwd(int numSamples,
  TensorCheckErr(*inputGrad, *inputGpuGrad);
 }
+// TODO(yi): I noticed many such blindly combinatorial tests in this
+// file.  They are no help to locate defects at all.
 TEST(Matrix, PoolFwdBwd) {
-  for (auto numSamples : {5, 32}) {
+  for (auto numSamples : {1, 3}) {
-    for (auto channels : {1, 9, 32}) {
+    for (auto channels : {1, 3}) {
-      for (auto imgSizeH : {14, 28}) {
+      for (auto imgSizeH : {13, 17}) {
-        for (auto imgSizeW : {16, 30}) {
+        for (auto imgSizeW : {17, 19}) {
-          for (auto sizeX : {2, 5}) {
+          for (auto sizeX : {2, 3}) {
-            for (auto sizeY : {2, 5}) {
+            for (auto sizeY : {2, 3}) {
              for (auto sH : {1, 2}) {
                for (auto sW : {1, 2}) {
                  for (auto pH : {0, (sizeY - 1) / 2}) {
@@ -1128,8 +1125,8 @@ TEST(Matrix, MaxOutFwdBwd) {
 }
 TEST(CpuMatrix, copyFrom) {
-  const size_t height = 1000;
+  const size_t height = 31;
-  const size_t width = 1000;
+  const size_t width = 53;
  CpuMatrix cpu(height, width);
  GpuMatrix gpu(height, width);
  CpuMatrix copy(height, width);
@@ -1149,6 +1146,10 @@ void testBatch2seqPadding(int batchSize, int inputDim) {
  IVectorPtr cpuSequence;
  generateSequenceStartPositions(batchSize, cpuSequence);
+  for (int i = 0; i < cpuSequence->getSize(); ++i) {
+    (cpuSequence->getData())[i] += 1;  // so no way that maxSeqLen is 0;
+  }
  IVectorPtr gpuSequence = IVector::create(cpuSequence->getSize(), true);
  gpuSequence->copyFrom(*cpuSequence);
@@ -1156,45 +1157,46 @@ void testBatch2seqPadding(int batchSize, int inputDim) {
  size_t maxSeqLen = *std::max_element(cpuSequence->getData(),
                                       cpuSequence->getData() + numSeq);
+  printf("numSeq = %ld, maxSeqLen = %ld\n", numSeq, maxSeqLen);
  MatrixPtr cBatch = std::make_shared<CpuMatrix>(numSeq * maxSeqLen, inputDim);
  MatrixPtr gBatch = std::make_shared<GpuMatrix>(numSeq * maxSeqLen, inputDim);
  MatrixPtr cCheck = std::make_shared<CpuMatrix>(numSeq * maxSeqLen, inputDim);
-  hl_sequence2batch_copy_padding(gBatch->getData(),
+  // hl_sequence2batch_copy_padding(gBatch->getData(),
-                                 gpuInput->getData(),
+  //                                gpuInput->getData(),
-                                 cpuSequence->getData(),
+  //                                cpuSequence->getData(),
-                                 inputDim,
+  //                                inputDim,
-                                 maxSeqLen,
+  //                                maxSeqLen,
-                                 numSeq,
+  //                                numSeq,
-                                 false,
+  //                                false,
-                                 true);
+  //                                true);
-  cCheck->copyFrom(*gBatch);
+  // cCheck->copyFrom(*gBatch);
-  int* seqStart = cpuSequence->getData();
+  // int* seqStart = cpuSequence->getData();
-  float* batchData = cBatch->getData();
+  // float* batchData = cBatch->getData();
-  float* seqData = cpuInput->getData();
+  // float* seqData = cpuInput->getData();
-  for (size_t i = 0; i < maxSeqLen; i++) {
+  // for (size_t i = 0; i < maxSeqLen; i++) {
-    for (size_t j = 0; j < numSeq; j++) {
+  //   for (size_t j = 0; j < numSeq; j++) {
-      size_t sequenceStart = seqStart[j];
+  //     size_t sequenceStart = seqStart[j];
-      size_t sequenceLength = seqStart[j + 1] - seqStart[j];
+  //     size_t sequenceLength = seqStart[j + 1] - seqStart[j];
-      if (i < sequenceLength) {
+  //     if (i < sequenceLength) {
-        memcpy(batchData + (i * numSeq + j) * inputDim,
+  //       memcpy(batchData + (i * numSeq + j) * inputDim,
-               seqData + (sequenceStart + i) * inputDim,
+  //              seqData + (sequenceStart + i) * inputDim,
-               inputDim * sizeof(real));
+  //              inputDim * sizeof(real));
-      } else {
+  //     } else {
-        memset(batchData + (i * numSeq + j) * inputDim,
+  //       memset(batchData + (i * numSeq + j) * inputDim,
-               0,
+  //              0,
-               inputDim * sizeof(real));
+  //              inputDim * sizeof(real));
-      }
+  //     }
-    }
+  //   }
-  }
+  // }
-  TensorCheckErr(*cBatch, *cCheck);
+  // TensorCheckErr(*cBatch, *cCheck);
 }
 TEST(Matrix, warpCTC) {
-  for (auto batchSize : {51, 526, 2884}) {
+  for (auto batchSize : {1, 3, 17}) {
-    for (auto inputDim : {32, 512, 2026}) {
+    for (auto inputDim : {1, 3, 31}) {
      VLOG(3) << " batchSize=" << batchSize << " inputDim=" << inputDim;
      testBatch2seqPadding(batchSize, inputDim);
    }

--- a/paddle/operators/fc_op.cc
+++ b/paddle/operators/fc_op.cc
@@ -27,7 +27,7 @@ public:
                               {Output("before_act")},
                               {}));
    auto b = Input("b");
-    if (b != EMPTY_VAR_NAME()) {
+    if (b != framework::kEmptyVarName) {
      AddOp(OpRegistry::CreateOp("rowwise_add",
                                 {Output("before_act"), Input("b")},
                                 {Output("before_act")},

--- a/paddle/operators/mean_op.cc
+++ b/paddle/operators/mean_op.cc
@@ -41,7 +41,7 @@ public:
 class MeanGradOp : public OperatorWithKernel {
 protected:
  void InferShape(const InferShapeContext &ctx) const override {
-    ctx.Output<Tensor>("X" + GRAD_VAR_SUFFIX())
+    ctx.Output<Tensor>("X" + framework::kGradVarSuffix)
        ->Resize(ctx.Input<Tensor>("X")->dims());
  }
 };

--- a/paddle/operators/mean_op.h
+++ b/paddle/operators/mean_op.h
@@ -39,10 +39,10 @@ template <typename Place, typename T>
 class MeanGradKernel : public OpKernel {
 public:
  void Compute(const ExecutionContext& context) const override {
-    auto OG = context.Input<Tensor>("Out" + OperatorBase::GRAD_VAR_SUFFIX());
+    auto OG = context.Input<Tensor>("Out" + framework::kGradVarSuffix);
    PADDLE_ENFORCE(framework::product(OG->dims()) == 1,
                   "Mean Gradient should be scalar");
-    auto IG = context.Output<Tensor>("X" + OperatorBase::GRAD_VAR_SUFFIX());
+    auto IG = context.Output<Tensor>("X" + framework::kGradVarSuffix);
    IG->mutable_data<T>(context.GetPlace());
    T ig_size = (T)framework::product(IG->dims());

--- a/paddle/operators/recurrent_op.cc
+++ b/paddle/operators/recurrent_op.cc
@@ -38,10 +38,10 @@ void SegmentInputs(const std::vector<Scope*>& step_scopes,
                   "input link [%s] is not in scope.",
                   inlinks[i].external);
    Tensor* input = input_var->GetMutable<Tensor>();
-    DDim dims = input->dims();
+    framework::DDim dims = input->dims();
    PADDLE_ENFORCE(static_cast<size_t>(dims[0]) == seq_len,
                   "all the inlinks must have same length");
-    DDim step_dims = slice_ddim(dims, 1, dims.size());
+    framework::DDim step_dims = slice_ddim(dims, 1, dims.size());
    for (size_t j = 0; j < seq_len; j++) {
      Tensor* step_input =
          step_scopes[j]->NewVar(inlinks[i].internal)->GetMutable<Tensor>();
@@ -64,13 +64,13 @@ void ConcatOutputs(const std::vector<Scope*>& step_scopes,
                   outlinks[i].external);
    Tensor* output = output_var->GetMutable<Tensor>();
    if (infer_shape_mode) {
-      DDim step_dims = step_scopes[0]
+      framework::DDim step_dims = step_scopes[0]
-                           ->FindVar(outlinks[i].internal)
+                                      ->FindVar(outlinks[i].internal)
-                           ->GetMutable<Tensor>()
+                                      ->GetMutable<Tensor>()
-                           ->dims();
+                                      ->dims();
      std::vector<int> dims_vec = vectorize(step_dims);
      dims_vec.insert(dims_vec.begin(), seq_len);
-      output->Resize(make_ddim(dims_vec));
+      output->Resize(framework::make_ddim(dims_vec));
    } else {
      output->mutable_data<float>(platform::CPUPlace());
      for (size_t j = 0; j < seq_len; j++) {

--- a/paddle/operators/recurrent_op.h
+++ b/paddle/operators/recurrent_op.h
@@ -19,8 +19,6 @@
 namespace paddle {
 namespace operators {
-using namespace paddle::framework;  // NOLINT
 namespace rnn {
 /**
@@ -70,7 +68,7 @@ struct ArgumentName {
 /**
 * Prepare inputs for each step net.
 */
-void SegmentInputs(const std::vector<Scope*>& step_scopes,
+void SegmentInputs(const std::vector<framework::Scope*>& step_scopes,
                   const std::vector<Link>& inlinks,
                   const size_t seq_len,
                   bool infer_shape_mode);
@@ -78,12 +76,12 @@ void SegmentInputs(const std::vector<Scope*>& step_scopes,
 /**
 * Process outputs of step nets and merge to variables.
 */
-void ConcatOutputs(const std::vector<Scope*>& step_scopes,
+void ConcatOutputs(const std::vector<framework::Scope*>& step_scopes,
                   const std::vector<Link>& outlinks,
                   const size_t seq_len,
                   bool infer_shape_mode);
-void LinkMemories(const std::vector<Scope*>& step_scopes,
+void LinkMemories(const std::vector<framework::Scope*>& step_scopes,
                  const std::vector<MemoryAttr>& memories,
                  const size_t step_id,
                  const int offset,
@@ -103,14 +101,15 @@ void InitArgument(const ArgumentName& name, Argument* arg);
 class RecurrentAlgorithm {
 public:
-  void Run(const Scope& scope, const platform::DeviceContext& dev_ctx) const;
+  void Run(const framework::Scope& scope,
+           const platform::DeviceContext& dev_ctx) const;
  void Init(std::unique_ptr<rnn::Argument> arg) { arg_ = std::move(arg); }
  /**
   * InferShape must be called before Run.
   */
-  void InferShape(const Scope& scope) const;
+  void InferShape(const framework::Scope& scope) const;
 protected:
  /*
@@ -119,13 +118,15 @@ protected:
   * NOTE the scopes are reused in both the forward and backward, so just
   * create once and expand its size if more steps need.
   */
-  void CreateScopes(const Scope& scope) const;
+  void CreateScopes(const framework::Scope& scope) const;
-  const std::vector<Scope*>& GetStepScopes(const Scope& scope) const {
+  const std::vector<framework::Scope*>& GetStepScopes(
-    return *scope.FindVar(arg_->step_scopes)->GetMutable<std::vector<Scope*>>();
+      const framework::Scope& scope) const {
+    return *scope.FindVar(arg_->step_scopes)
+                ->GetMutable<std::vector<framework::Scope*>>();
  }
-  void InitMemories(Scope* step_scopes, bool infer_shape_mode) const;
+  void InitMemories(framework::Scope* step_scopes, bool infer_shape_mode) const;
 private:
  std::unique_ptr<rnn::Argument> arg_;
@@ -146,18 +147,22 @@ class RecurrentGradientAlgorithm {
 public:
  void Init(std::unique_ptr<rnn::Argument> arg) { arg_ = std::move(arg); }
-  void Run(const Scope& scope, const platform::DeviceContext& dev_ctx) const;
+  void Run(const framework::Scope& scope,
+           const platform::DeviceContext& dev_ctx) const;
-  void LinkBootMemoryGradients(Scope* step_scopes, bool infer_shape_mode) const;
+  void LinkBootMemoryGradients(framework::Scope* step_scopes,
+                               bool infer_shape_mode) const;
  /**
   * InferShape must be called before Run.
   */
-  void InferShape(const Scope& scope) const;
+  void InferShape(const framework::Scope& scope) const;
 protected:
-  inline const std::vector<Scope*>& GetStepScopes(const Scope& scope) const {
+  inline const std::vector<framework::Scope*>& GetStepScopes(
-    return *scope.FindVar(arg_->step_scopes)->GetMutable<std::vector<Scope*>>();
+      const framework::Scope& scope) const {
+    return *scope.FindVar(arg_->step_scopes)
+                ->GetMutable<std::vector<framework::Scope*>>();
  }
 private:
@@ -165,16 +170,18 @@ private:
  mutable size_t seq_len_;
 };
-class RecurrentOp final : public OperatorBase {
+class RecurrentOp final : public framework::OperatorBase {
 public:
  void Init() override;
  /**
   * InferShape must be called before Run.
   */
-  void InferShape(const Scope& scope) const override { alg_.InferShape(scope); }
+  void InferShape(const framework::Scope& scope) const override {
+    alg_.InferShape(scope);
+  }
-  void Run(const Scope& scope,
+  void Run(const framework::Scope& scope,
           const platform::DeviceContext& dev_ctx) const override {
    alg_.Run(scope, dev_ctx);
  }
@@ -185,16 +192,18 @@ private:
  RecurrentAlgorithm alg_;
 };
-class RecurrentGradientOp final : public OperatorBase {
+class RecurrentGradientOp final : public framework::OperatorBase {
 public:
  void Init() override;
  /**
   * InferShape must be called before Run.
   */
-  void InferShape(const Scope& scope) const override { alg_.InferShape(scope); }
+  void InferShape(const framework::Scope& scope) const override {
+    alg_.InferShape(scope);
+  }
-  void Run(const Scope& scope,
+  void Run(const framework::Scope& scope,
           const platform::DeviceContext& dev_ctx) const override {
    alg_.Run(scope, dev_ctx);
  }

--- a/paddle/operators/recurrent_op_test.cc
+++ b/paddle/operators/recurrent_op_test.cc
@@ -16,6 +16,7 @@
 #include <glog/logging.h>
 #include <gtest/gtest.h>
+#include "paddle/framework/ddim.h"
 #include "paddle/framework/op_registry.h"
 #include "paddle/framework/operator.h"
 #include "paddle/framework/tensor.h"
@@ -24,6 +25,9 @@
 namespace paddle {
 namespace operators {
+using framework::make_ddim;
+using framework::DDim;
 class RecurrentOpTest : public ::testing::Test {
 protected:
  virtual void SetUp() override {
@@ -72,7 +76,7 @@ protected:
  }
  void CreateRNNOp() {
-    OpDesc op_desc;
+    framework::OpDesc op_desc;
    op_desc.set_type("recurrent_op");
    // inlinks 0

--- a/paddle/operators/softmax_op.cc
+++ b/paddle/operators/softmax_op.cc
@@ -48,12 +48,12 @@ protected:
    PADDLE_ENFORCE(ctx.OutputSize() == 1UL,
                   "Output of SoftmaxOpGrad should be 1");
    PADDLE_ENFORCE(ctx.InputVar("Y") != nullptr, "Input(Y) should not be null");
-    PADDLE_ENFORCE(ctx.InputVar(GRAD_VAR_NAME("Y")) != nullptr,
+    PADDLE_ENFORCE(ctx.InputVar(framework::GradVarName("Y")) != nullptr,
                   "Input(Y@GRAD) should not be null");
    PADDLE_ENFORCE(ctx.Input<Tensor>("Y")->dims() ==
-                       ctx.Input<Tensor>(GRAD_VAR_NAME("Y"))->dims(),
+                       ctx.Input<Tensor>(framework::GradVarName("Y"))->dims(),
                   "the shape of Input(0) and Input(1) should be the same");
-    ctx.Output<Tensor>(GRAD_VAR_NAME("X"))
+    ctx.Output<Tensor>(framework::GradVarName("X"))
        ->Resize(ctx.Input<Tensor>("Y")->dims());
  }
 };

--- a/paddle/operators/softmax_op.h
+++ b/paddle/operators/softmax_op.h
@@ -68,8 +68,8 @@ public:
    std::shared_ptr<Tensor> scale_ = std::make_shared<Tensor>();
    auto Y = context.Input<Tensor>("Y");
-    auto dY = context.Input<Tensor>(OperatorBase::GRAD_VAR_NAME("Y"));
+    auto dY = context.Input<Tensor>(framework::GradVarName("Y"));
-    auto dX = context.Output<Tensor>(OperatorBase::GRAD_VAR_NAME("X"));
+    auto dX = context.Output<Tensor>(framework::GradVarName("X"));
    dX->mutable_data<T>(context.GetPlace());
    const int batch_size = Y->dims()[0];

--- a/paddle/platform/device_context.h
+++ b/paddle/platform/device_context.h
@@ -55,7 +55,7 @@ class CPUDeviceContext : public DeviceContext {
 class CUDADeviceContext : public DeviceContext {
 public:
-  CUDADeviceContext(GPUPlace);  // NOLINT
+  explicit CUDADeviceContext(GPUPlace);
  virtual ~CUDADeviceContext();
  /*! \brief  Wait for all operations completion in the stream. */

--- a/paddle/platform/device_context_test.cc
+++ b/paddle/platform/device_context_test.cc
@@ -15,24 +15,28 @@ limitations under the License. */
 #include "paddle/platform/device_context.h"
 #include "gtest/gtest.h"
-using DEVICE_GPU = Eigen::GpuDevice;
 TEST(Device, Init) {
+  using paddle::platform::DeviceContext;
+  using paddle::platform::CUDADeviceContext;
+  using paddle::platform::GPUPlace;
  int count = paddle::platform::GetDeviceCount();
  for (int i = 0; i < count; i++) {
-    paddle::platform::DeviceContext* device_context =
+    DeviceContext* device_context = new CUDADeviceContext(GPUPlace(i));
-        new paddle::platform::CUDADeviceContext(i);
    Eigen::GpuDevice* gpu_device =
-        device_context->template get_eigen_device<DEVICE_GPU>();
+        device_context->template get_eigen_device<Eigen::GpuDevice>();
    ASSERT_NE(nullptr, gpu_device);
    delete device_context;
  }
 }
 TEST(Device, CUDADeviceContext) {
+  using paddle::platform::CUDADeviceContext;
+  using paddle::platform::GPUPlace;
  int count = paddle::platform::GetDeviceCount();
  for (int i = 0; i < count; i++) {
-    paddle::platform::CUDADeviceContext* device_context =
+    CUDADeviceContext* device_context = new CUDADeviceContext(GPUPlace(i));
-        new paddle::platform::CUDADeviceContext(i);
    Eigen::GpuDevice* gpu_device = device_context->eigen_device();
    ASSERT_NE(nullptr, gpu_device);
    cudnnHandle_t cudnn_handle = device_context->cudnn_handle();

--- a/paddle/platform/enforce.h
+++ b/paddle/platform/enforce.h
@@ -162,5 +162,50 @@ inline void throw_on_error(T e) {
    }                                                                   \
  } while (0)
+/*
+ * Some enforce helpers here, usage:
+ *    int a = 1;
+ *    int b = 2;
+ *    PADDLE_ENFORCE_EQ(a, b);
+ *
+ *    will raise an expression described as follows:
+ *    "enforce a == b failed, 1 != 2" with detailed stack infomation.
+ *
+ *    extra messages is also supported, for example:
+ *    PADDLE_ENFORCE(a, b, "some simple enforce failed between %d numbers", 2)
+ */
+#define PADDLE_ENFORCE_EQ(__VAL0, __VAL1, ...) \
+  __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, ==, !=, __VA_ARGS__)
+#define PADDLE_ENFORCE_NE(__VAL0, __VAL1, ...) \
+  __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, !=, ==, __VA_ARGS__)
+#define PADDLE_ENFORCE_GT(__VAL0, __VAL1, ...) \
+  __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, >, <=, __VA_ARGS__)
+#define PADDLE_ENFORCE_GE(__VAL0, __VAL1, ...) \
+  __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, >=, <, __VA_ARGS__)
+#define PADDLE_ENFORCE_LT(__VAL0, __VAL1, ...) \
+  __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <, >=, __VA_ARGS__)
+#define PADDLE_ENFORCE_LE(__VAL0, __VAL1, ...) \
+  __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <=, >, __VA_ARGS__)
+// if two values have different data types, choose a compatible type for them.
+template <typename T1, typename T2>
+struct CompatibleType {
+  static const bool t1_to_t2 = std::is_convertible<T1, T2>::value;
+  typedef typename std::conditional<t1_to_t2, T2, T1>::type type;
+};
+#define __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, __CMP, __INV_CMP, ...)        \
+  PADDLE_ENFORCE(__COMPATIBLE_TYPE(__VAL0, __VAL1, __VAL0)                    \
+                     __CMP __COMPATIBLE_TYPE(__VAL0, __VAL1, __VAL1),         \
+                 "enforce %s " #__CMP " %s failed, %s " #__INV_CMP " %s\n%s", \
+                 #__VAL0, #__VAL1, std::to_string(__VAL0),                    \
+                 std::to_string(__VAL1),                                      \
+                 paddle::string::Sprintf("" __VA_ARGS__));
+#define __COMPATIBLE_TYPE(__VAL0, __VAL1, __VAL)              \
+  typename paddle::platform::CompatibleType<decltype(__VAL0), \
+                                            decltype(__VAL1)>::type(__VAL)
 }  // namespace platform
 }  // namespace paddle
--- a/paddle/platform/enforce_test.cc
+++ b/paddle/platform/enforce_test.cc
@@ -34,3 +34,165 @@ TEST(ENFORCE, FAILED) {
  }
  ASSERT_TRUE(in_catch);
 }
+TEST(ENFORCE, NO_ARG_OK) {
+  int a = 2;
+  int b = 2;
+  PADDLE_ENFORCE_EQ(a, b);
+  // test enforce with extra message.
+  PADDLE_ENFORCE_EQ(a, b, "some thing wrong %s", "info");
+}
+TEST(ENFORCE_EQ, NO_EXTRA_MSG_FAIL) {
+  int a = 2;
+  bool in_catch = false;
+  try {
+    PADDLE_ENFORCE_EQ(a, 1 + 3);
+  } catch (paddle::platform::EnforceNotMet error) {
+    in_catch = true;
+    const std::string msg = "enforce a == 1 + 3 failed, 2 != 4";
+    const char* what = error.what();
+    for (size_t i = 0; i < msg.length(); ++i) {
+      ASSERT_EQ(what[i], msg[i]);
+    }
+  }
+  ASSERT_TRUE(in_catch);
+}
+TEST(ENFORCE_EQ, EXTRA_MSG_FAIL) {
+  int a = 2;
+  bool in_catch = false;
+  try {
+    PADDLE_ENFORCE_EQ(a, 1 + 3, "%s size not match", "their");
+  } catch (paddle::platform::EnforceNotMet error) {
+    in_catch = true;
+    const std::string msg =
+        "enforce a == 1 + 3 failed, 2 != 4\ntheir size not match";
+    const char* what = error.what();
+    for (size_t i = 0; i < msg.length(); ++i) {
+      ASSERT_EQ(what[i], msg[i]);
+    }
+  }
+  ASSERT_TRUE(in_catch);
+}
+TEST(ENFORCE_NE, OK) {
+  PADDLE_ENFORCE_NE(1, 2);
+  PADDLE_ENFORCE_NE(1.0, 2UL);
+}
+TEST(ENFORCE_NE, FAIL) {
+  bool in_catch = false;
+  try {
+    // 2UL here to check data type compatible
+    PADDLE_ENFORCE_NE(1.0, 1UL);
+  } catch (paddle::platform::EnforceNotMet error) {
+    in_catch = true;
+    const std::string msg = "enforce 1.0 != 1UL failed, 1.000000 == 1";
+    const char* what = error.what();
+    for (size_t i = 0; i < msg.length(); ++i) {
+      ASSERT_EQ(what[i], msg[i]);
+    }
+  }
+  ASSERT_TRUE(in_catch);
+}
+TEST(ENFORCE_GT, OK) { PADDLE_ENFORCE_GT(2, 1); }
+TEST(ENFORCE_GT, FAIL) {
+  bool in_catch = false;
+  try {
+    // 2UL here to check data type compatible
+    PADDLE_ENFORCE_GT(1, 2UL);
+  } catch (paddle::platform::EnforceNotMet error) {
+    in_catch = true;
+    const std::string msg = "enforce 1 > 2UL failed, 1 <= 2";
+    const char* what = error.what();
+    for (size_t i = 0; i < msg.length(); ++i) {
+      ASSERT_EQ(what[i], msg[i]);
+    }
+  }
+  ASSERT_TRUE(in_catch);
+}
+TEST(ENFORCE_GE, OK) {
+  PADDLE_ENFORCE_GE(2, 2UL);
+  PADDLE_ENFORCE_GE(3, 2UL);
+  PADDLE_ENFORCE_GE(3, 2);
+  PADDLE_ENFORCE_GE(3.21, 2UL);
+}
+TEST(ENFORCE_GE, FAIL) {
+  bool in_catch = false;
+  try {
+    PADDLE_ENFORCE_GE(1, 2UL);
+  } catch (paddle::platform::EnforceNotMet error) {
+    in_catch = true;
+    const std::string msg = "enforce 1 >= 2UL failed, 1 < 2";
+    const char* what = error.what();
+    for (size_t i = 0; i < msg.length(); ++i) {
+      ASSERT_EQ(what[i], msg[i]);
+    }
+  }
+  ASSERT_TRUE(in_catch);
+}
+TEST(ENFORCE_LE, OK) {
+  PADDLE_ENFORCE_LE(1, 1);
+  PADDLE_ENFORCE_LE(1, 1UL);
+  PADDLE_ENFORCE_LE(2, 3UL);
+  PADDLE_ENFORCE_LE(2UL, 3);
+  PADDLE_ENFORCE_LE(2UL, 3.2);
+}
+TEST(ENFORCE_LE, FAIL) {
+  bool in_catch = false;
+  try {
+    PADDLE_ENFORCE_GT(1, 2UL);
+  } catch (paddle::platform::EnforceNotMet error) {
+    in_catch = true;
+    const std::string msg = "enforce 1 > 2UL failed, 1 <= 2";
+    const char* what = error.what();
+    for (size_t i = 0; i < msg.length(); ++i) {
+      ASSERT_EQ(what[i], msg[i]);
+    }
+  }
+  ASSERT_TRUE(in_catch);
+}
+TEST(ENFORCE_LT, OK) {
+  PADDLE_ENFORCE_LT(3, 10);
+  PADDLE_ENFORCE_LT(2, 3UL);
+  PADDLE_ENFORCE_LT(2UL, 3);
+}
+TEST(ENFORCE_LT, FAIL) {
+  bool in_catch = false;
+  try {
+    PADDLE_ENFORCE_LT(1UL, 0.12);
+  } catch (paddle::platform::EnforceNotMet error) {
+    in_catch = true;
+    const std::string msg = "enforce 1UL < 0.12 failed, 1 >= 0.12";
+    const char* what = error.what();
+    for (size_t i = 0; i < msg.length(); ++i) {
+      ASSERT_EQ(what[i], msg[i]);
+    }
+  }
+  ASSERT_TRUE(in_catch);
+}
--- a/paddle/platform/place.h
+++ b/paddle/platform/place.h
@@ -32,7 +32,7 @@ struct CPUPlace {
 struct GPUPlace {
  GPUPlace() : GPUPlace(0) {}
-  GPUPlace(int d) : device(d) {}  // NOLINT
+  explicit GPUPlace(int d) : device(d) {}
  // needed for variant equality comparison
  inline bool operator==(const GPUPlace &o) const { return device == o.device; }

--- a/paddle/string/piece.h
+++ b/paddle/string/piece.h
@@ -39,8 +39,8 @@ public:
  // size_ is 0.
  Piece();
  Piece(const char* d, size_t n);
-  Piece(const char* d);         // NOLINT
+  Piece(const char* d);         // NOLINT: accept C string into Piece.
-  Piece(const std::string& s);  // NOLINT
+  Piece(const std::string& s);  // NOLINT: accept C++ string into Piece.
  const char* data() const { return data_; }
  size_t len() const { return size_; }

--- a/python/paddle/v2/dataset/cifar.py
+++ b/python/paddle/v2/dataset/cifar.py
@@ -133,7 +133,7 @@ def convert(path):
    """
    Converts dataset to recordio format
    """
-    paddle.v2.dataset.common.convert(path, train100(), 10, "cifar_train100")
+    paddle.v2.dataset.common.convert(path, train100(), 1000, "cifar_train100")
-    paddle.v2.dataset.common.convert(path, test100(), 10, "cifar_test100")
+    paddle.v2.dataset.common.convert(path, test100(), 1000, "cifar_test100")
-    paddle.v2.dataset.common.convert(path, train10(), 10, "cifar_train10")
+    paddle.v2.dataset.common.convert(path, train10(), 1000, "cifar_train10")
-    paddle.v2.dataset.common.convert(path, test10(), 10, "cifar_test10")
+    paddle.v2.dataset.common.convert(path, test10(), 1000, "cifar_test10")
--- a/python/paddle/v2/dataset/common.py
+++ b/python/paddle/v2/dataset/common.py
@@ -32,17 +32,22 @@ __all__ = [
 DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset')
 # When running unit tests, there could be multiple processes that
 # trying to create DATA_HOME directory simultaneously, so we cannot
 # use a if condition to check for the existence of the directory;
 # instead, we use the filesystem as the synchronization mechanism by
 # catching returned errors.
-try:
+def must_mkdirs(path):
-    os.makedirs(DATA_HOME)
+    try:
-except OSError as exc:
+        os.makedirs(DATA_HOME)
-    if exc.errno != errno.EEXIST:
+    except OSError as exc:
-        raise
+        if exc.errno != errno.EEXIST:
-    pass
+            raise
+        pass
+must_mkdirs(DATA_HOME)
 def md5file(fname):
@@ -93,6 +98,19 @@ def fetch_all():
                "fetch")()
+def fetch_all_recordio(path):
+    for module_name in filter(lambda x: not x.startswith("__"),
+                              dir(paddle.v2.dataset)):
+        if "convert" in dir(
+                importlib.import_module("paddle.v2.dataset.%s" % module_name)) and \
+                not module_name == "common":
+            ds_path = os.path.join(path, module_name)
+            must_mkdirs(ds_path)
+            getattr(
+                importlib.import_module("paddle.v2.dataset.%s" % module_name),
+                "convert")(ds_path)
 def split(reader, line_count, suffix="%05d.pickle", dumper=cPickle.dump):
    """
    you can call the function as:

--- a/python/paddle/v2/dataset/conll05.py
+++ b/python/paddle/v2/dataset/conll05.py
@@ -233,5 +233,5 @@ def convert(path):
    """
    Converts dataset to recordio format
    """
-    paddle.v2.dataset.common.convert(path, test(), 10, "conl105_train")
+    paddle.v2.dataset.common.convert(path, test(), 1000, "conl105_train")
-    paddle.v2.dataset.common.convert(path, test(), 10, "conl105_test")
+    paddle.v2.dataset.common.convert(path, test(), 1000, "conl105_test")
--- a/python/paddle/v2/dataset/imdb.py
+++ b/python/paddle/v2/dataset/imdb.py
@@ -173,5 +173,5 @@ def convert(path):
    Converts dataset to recordio format
    """
    w = word_dict()
-    paddle.v2.dataset.common.convert(path, lambda: train(w), 10, "imdb_train")
+    paddle.v2.dataset.common.convert(path, lambda: train(w), 1000, "imdb_train")
-    paddle.v2.dataset.common.convert(path, lambda: test(w), 10, "imdb_test")
+    paddle.v2.dataset.common.convert(path, lambda: test(w), 1000, "imdb_test")
--- a/python/paddle/v2/dataset/imikolov.py
+++ b/python/paddle/v2/dataset/imikolov.py
@@ -155,6 +155,7 @@ def convert(path):
    N = 5
    word_dict = build_dict()
    paddle.v2.dataset.common.convert(path,
-                                     train(word_dict, N), 10, "imikolov_train")
+                                     train(word_dict, N), 1000,
+                                     "imikolov_train")
    paddle.v2.dataset.common.convert(path,
-                                     test(word_dict, N), 10, "imikolov_test")
+                                     test(word_dict, N), 1000, "imikolov_test")
--- a/python/paddle/v2/dataset/mnist.py
+++ b/python/paddle/v2/dataset/mnist.py
@@ -119,5 +119,5 @@ def convert(path):
    """
    Converts dataset to recordio format
    """
-    paddle.v2.dataset.common.convert(path, train(), 10, "minist_train")
+    paddle.v2.dataset.common.convert(path, train(), 1000, "minist_train")
-    paddle.v2.dataset.common.convert(path, test(), 10, "minist_test")
+    paddle.v2.dataset.common.convert(path, test(), 1000, "minist_test")
--- a/python/paddle/v2/dataset/movielens.py
+++ b/python/paddle/v2/dataset/movielens.py
@@ -254,8 +254,8 @@ def convert(path):
    """
    Converts dataset to recordio format
    """
-    paddle.v2.dataset.common.convert(path, train(), 10, "movielens_train")
+    paddle.v2.dataset.common.convert(path, train(), 1000, "movielens_train")
-    paddle.v2.dataset.common.convert(path, test(), 10, "movielens_test")
+    paddle.v2.dataset.common.convert(path, test(), 1000, "movielens_test")
 if __name__ == '__main__':

--- a/python/paddle/v2/dataset/sentiment.py
+++ b/python/paddle/v2/dataset/sentiment.py
@@ -137,5 +137,5 @@ def convert(path):
    """
    Converts dataset to recordio format
    """
-    paddle.v2.dataset.common.convert(path, train, 10, "sentiment_train")
+    paddle.v2.dataset.common.convert(path, train, 1000, "sentiment_train")
-    paddle.v2.dataset.common.convert(path, test, 10, "sentiment_test")
+    paddle.v2.dataset.common.convert(path, test, 1000, "sentiment_test")
--- a/python/paddle/v2/dataset/uci_housing.py
+++ b/python/paddle/v2/dataset/uci_housing.py
@@ -119,5 +119,5 @@ def convert(path):
    """
    Converts dataset to recordio format
    """
-    paddle.v2.dataset.common.convert(path, train(), 10, "uci_housing_train")
+    paddle.v2.dataset.common.convert(path, train(), 1000, "uci_housing_train")
-    paddle.v2.dataset.common.convert(path, test(), 10, "uci_houseing_test")
+    paddle.v2.dataset.common.convert(path, test(), 1000, "uci_houseing_test")
--- a/python/paddle/v2/dataset/wmt14.py
+++ b/python/paddle/v2/dataset/wmt14.py
@@ -169,5 +169,6 @@ def convert(path):
    Converts dataset to recordio format
    """
    dict_size = 30000
-    paddle.v2.dataset.common.convert(path, train(dict_size), 10, "wmt14_train")
+    paddle.v2.dataset.common.convert(path,
-    paddle.v2.dataset.common.convert(path, test(dict_size), 10, "wmt14_test")
+                                     train(dict_size), 1000, "wmt14_train")
+    paddle.v2.dataset.common.convert(path, test(dict_size), 1000, "wmt14_test")