Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into fix_croplayer

7c09999d · wanghaoshuang · 4409255c · a98346f4 · 7c09999d · 7c09999d
40 changed file
--- a/cmake/generic.cmake
+++ b/cmake/generic.cmake
@@ -290,8 +290,22 @@ function(go_library TARGET_NAME)
    set(${TARGET_NAME}_LIB_NAME "${CMAKE_STATIC_LIBRARY_PREFIX}${TARGET_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}" CACHE STRING "output library name for target ${TARGET_NAME}")
  endif()

-  # Add dummy code to support `make target_name` under Terminal Command
  set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}_dummy.c)
+
+  # This custom command will always run since it depends on a not
+  # existing file.
+  add_custom_command(
+    OUTPUT dummy_rebulid_${TARGET_NAME}
+    COMMAND cmake -E touch ${dummyfile}
+    )
+  # Create a custom target that depends on the custom command output
+  # file, so the custom command can be referenced as a dependency by
+  # `add_dependencies`.
+  add_custom_target(rebuild_${TARGET_NAME}
+    DEPENDS dummy_rebulid_${TARGET_NAME}
+    )
+
+  # Add dummy code to support `make target_name` under Terminal Command
  file(WRITE ${dummyfile} "const char * dummy = \"${dummyfile}\";")
  if (go_library_SHARED OR go_library_shared)
    add_library(${TARGET_NAME} SHARED ${dummyfile})
@@ -302,6 +316,12 @@ function(go_library TARGET_NAME)
    add_dependencies(${TARGET_NAME} ${go_library_DEPS})
  endif(go_library_DEPS)

+  # The "source file" of the library is `${dummyfile}` which never
+  # change, so the target will never rebuild. Make the target depends
+  # on the custom command that touches the library "source file", so
+  # rebuild will always happen.
+  add_dependencies(${TARGET_NAME} rebuild_${TARGET_NAME})
+
  set(${TARGET_NAME}_LIB_PATH "${CMAKE_CURRENT_BINARY_DIR}/${${TARGET_NAME}_LIB_NAME}" CACHE STRING "output library path for target ${TARGET_NAME}")

  file(GLOB GO_SOURCE RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.go")

--- a/paddle/framework/CMakeLists.txt
+++ b/paddle/framework/CMakeLists.txt
 # ddim lib
-cc_library(enforce SRCS enforce.cc DEPS glog)
-cc_test(enforce_test SRCS enforce_test.cc DEPS enforce)
 cc_library(ddim SRCS ddim.cc DEPS eigen3)
 cc_test(ddim_test SRCS ddim_test.cc DEPS ddim)
 nv_test(dim_test SRCS dim_test.cu DEPS ddim)
-cc_library(tensor SRCS tensor.cc DEPS ddim place enforce paddle_memory)
+
+cc_library(tensor SRCS tensor.cc DEPS ddim place paddle_memory)
 cc_test(tensor_test SRCS tensor_test.cc DEPS tensor)
+cc_test(eigen_test SRCS eigen_test.cc DEPS tensor)
+
 cc_test(variable_test SRCS variable_test.cc)
 cc_test(scope_test SRCS scope_test.cc)
+
 proto_library(attr_type SRCS attr_type.proto)
 proto_library(op_proto SRCS op_proto.proto DEPS attr_type)
-cc_test(op_proto_test SRCS op_proto_test.cc DEPS op_proto protobuf)
 proto_library(op_desc SRCS op_desc.proto DEPS attr_type)
+cc_test(op_proto_test SRCS op_proto_test.cc DEPS op_proto protobuf)
 cc_test(op_desc_test SRCS op_desc_test.cc DEPS op_desc protobuf)

 cc_library(operator SRCS operator.cc DEPS op_desc device_context tensor)
 cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry)

-cc_library(op_registry SRCS op_registry.cc DEPS op_proto op_desc enforce)
+cc_library(op_registry SRCS op_registry.cc DEPS op_proto op_desc)
 cc_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry operator)

 py_proto_compile(framework_py_proto SRCS attr_type.proto op_proto.proto op_desc.proto)

--- a/paddle/framework/attr_checker.h
+++ b/paddle/framework/attr_checker.h
@@ -4,8 +4,9 @@
 #include <functional>
 #include <string>
 #include <unordered_map>
+#include <unordered_set>
 #include <vector>
-#include "paddle/framework/enforce.h"
+#include "paddle/platform/enforce.h"

 namespace paddle {
 namespace framework {
@@ -41,6 +42,35 @@ class DefaultValueSetter {
  T default_value_;
 };

+template <typename T>
+class EnumInContainer {
+ public:
+  explicit EnumInContainer(const std::unordered_set<T>& c) : container_(c) {}
+  void operator()(T& val) const {
+    PADDLE_ENFORCE(container_.find(val) != container_.end(),
+                   "Value %s is not in enum container %s", val,
+                   ContainerDebugString());
+  }
+
+ private:
+  std::string ContainerDebugString() const {
+    std::ostringstream sout;
+    sout << "[";
+    size_t cnt = 0;
+    for (auto& v : container_) {
+      sout << v;
+      ++cnt;
+      if (cnt != container_.size()) {
+        sout << " ,";
+      }
+    }
+    sout << "]";
+    return sout.str();
+  }
+
+  std::unordered_set<T> container_;
+};
+
 // check whether a certain attribute fit its limits
 // an attribute can have more than one limits
 template <typename T>
@@ -50,6 +80,11 @@ class TypedAttrChecker {
 public:
  TypedAttrChecker(const std::string& attr_name) : attr_name_(attr_name) {}

+  TypedAttrChecker& InEnum(const std::unordered_set<T>& range) {
+    value_checkers_.push_back(EnumInContainer<T>(range));
+    return *this;
+  }
+
  TypedAttrChecker& LargerThan(const T& lower_bound) {
    value_checkers_.push_back(LargerThanChecker<T>(lower_bound));
    return *this;

--- a/paddle/framework/ddim.cc
+++ b/paddle/framework/ddim.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */

 #include "paddle/framework/ddim.h"
-#include "paddle/framework/enforce.h"
+#include "paddle/platform/enforce.h"

 namespace paddle {
 namespace framework {

--- a/paddle/framework/ddim.h
+++ b/paddle/framework/ddim.h
@@ -19,7 +19,7 @@ limitations under the License. */
 #include <stdexcept>
 #include <vector>
 #include "paddle/framework/dim.h"
-#include "paddle/framework/enforce.h"
+#include "paddle/platform/enforce.h"
 #include "unsupported/Eigen/CXX11/Tensor"

 namespace paddle {
@@ -119,17 +119,6 @@ int arity(const DDim& ddim);

 std::ostream& operator<<(std::ostream&, const DDim&);

-template <int NDIMS>
-Eigen::DSizes<Eigen::DenseIndex, NDIMS> ToEigenDSizes(const DDim& dims) {
-  int rank = arity(dims);
-  PADDLE_ENFORCE(rank == NDIMS, "DDim and NDIMS must be same");
-  Eigen::DSizes<Eigen::DenseIndex, NDIMS> dsizes;
-  for (int d = 0; d < rank; d++) {
-    dsizes[d] = dims[d];
-  }
-  return dsizes;
-}
-
 }  // namespace framework
 }  // namespace paddle


--- a/paddle/framework/eigen.h
+++ b/paddle/framework/eigen.h
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "paddle/framework/tensor.h"
+#include "unsupported/Eigen/CXX11/Tensor"
+
+namespace paddle {
+namespace framework {
+
+// EigenDim converts paddle::platform::DDim into Eigen::DSizes.
+template <int D>
+struct EigenDim {
+  using Type = Eigen::DSizes<Eigen::DenseIndex, D>;
+
+  static Type From(const DDim& dims) {
+    PADDLE_ENFORCE(arity(dims) == D, "D must match arity(DDim)");
+    Type ret;
+    for (int d = 0; d < arity(dims); d++) {
+      ret[d] = dims[d];
+    }
+    return ret;
+  }
+};
+
+// Interpret paddle::platform::Tensor as EigenTensor and EigenConstTensor.
+template <typename T, size_t D, int MajorType = Eigen::RowMajor,
+          typename IndexType = Eigen::DenseIndex>
+struct EigenTensor {
+  // TODO(qijun) Now, default type in unaligned, and we will make a benchmark on
+  // the speed of aligned and unaligned version in future.
+  using Type = Eigen::TensorMap<Eigen::Tensor<T, D, MajorType, IndexType>>;
+
+  using ConstType =
+      Eigen::TensorMap<Eigen::Tensor<const T, D, MajorType, IndexType>>;
+
+  static Type From(Tensor& tensor, DDim dims) {
+    return Type(tensor.data<T>(), EigenDim<D>::From(dims));
+  }
+
+  static Type From(Tensor& tensor) { return From(tensor, tensor.dims_); }
+
+  static ConstType From(const Tensor& tensor, DDim dims) {
+    return ConstType(tensor.data<T>(), EigenDim<D>::From(dims));
+  }
+
+  static ConstType From(const Tensor& tensor) {
+    return From(tensor, tensor.dims_);
+  }
+};
+
+template <typename T, int MajorType = Eigen::RowMajor,
+          typename IndexType = Eigen::DenseIndex>
+struct EigenVector : public EigenTensor<T, 1, MajorType, IndexType> {
+  // Flatten is to reshape a Tensor into a one dimension EigenVector
+  static typename EigenTensor<T, 1>::Type Flatten(Tensor& tensor) {
+    return EigenTensor<T, 1>::From(
+        tensor, make_ddim({static_cast<int>(product(tensor.dims_))}));
+  }
+
+  static typename EigenTensor<T, 1>::ConstType Flatten(const Tensor& tensor) {
+    return EigenTensor<T, 1>::From(
+        tensor, make_ddim({static_cast<int>(product(tensor.dims_))}));
+  }
+};
+
+template <typename T, int MajorType = Eigen::RowMajor,
+          typename IndexType = Eigen::DenseIndex>
+using EigenMatrix = EigenTensor<T, 2, MajorType, IndexType>;
+
+}  // namespace framework
+}  // namespace paddle
--- a/paddle/framework/eigen_test.cc
+++ b/paddle/framework/eigen_test.cc
+/*
+  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+  http://www.apache.org/licenses/LICENSE-2.0
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+*/
+
+#include "paddle/framework/eigen.h"
+#include <gtest/gtest.h>
+
+namespace paddle {
+namespace framework {
+
+TEST(EigenDim, From) {
+  EigenDim<3>::Type ed = EigenDim<3>::From(make_ddim({1, 2, 3}));
+  ASSERT_EQ(1, ed[0]);
+  ASSERT_EQ(2, ed[1]);
+  ASSERT_EQ(3, ed[2]);
+}
+
+TEST(Eigen, Tensor) {
+  Tensor t;
+  float* p = t.mutable_data<float>(make_ddim({1, 2, 3}), platform::CPUPlace());
+  for (int i = 0; i < 1 * 2 * 3; i++) {
+    p[i] = static_cast<float>(i);
+  }
+
+  EigenTensor<float, 3>::Type et = EigenTensor<float, 3>::From(t);
+
+  ASSERT_EQ(1, et.dimension(0));
+  ASSERT_EQ(2, et.dimension(1));
+  ASSERT_EQ(3, et.dimension(2));
+
+  for (int i = 0; i < 1; i++) {
+    for (int j = 0; j < 2; j++) {
+      for (int k = 0; k < 3; k++) {
+        ASSERT_NEAR((i * 2 + j) * 3 + k, et(i, j, k), 1e-6f);
+      }
+    }
+  }
+}
+
+TEST(Eigen, VectorFrom) {
+  Tensor t;
+  float* p = t.mutable_data<float>(make_ddim({6}), platform::CPUPlace());
+  for (int i = 0; i < 6; i++) {
+    p[i] = static_cast<float>(i);
+  }
+
+  EigenVector<float>::Type ev = EigenVector<float>::From(t);
+
+  ASSERT_EQ(6, ev.dimension(0));
+
+  for (int i = 0; i < 6; i++) {
+    ASSERT_NEAR(i, ev(i), 1e-6f);
+  }
+}
+
+TEST(Eigen, VectorFlatten) {
+  Tensor t;
+  float* p = t.mutable_data<float>(make_ddim({1, 2, 3}), platform::CPUPlace());
+  for (int i = 0; i < 1 * 2 * 3; i++) {
+    p[i] = static_cast<float>(i);
+  }
+
+  EigenVector<float>::Type ev = EigenVector<float>::Flatten(t);
+
+  ASSERT_EQ(1 * 2 * 3, ev.dimension(0));
+
+  for (int i = 0; i < 1 * 2 * 3; i++) {
+    ASSERT_NEAR(i, ev(i), 1e-6f);
+  }
+}
+
+TEST(Eigen, Matrix) {
+  Tensor t;
+  float* p = t.mutable_data<float>(make_ddim({2, 3}), platform::CPUPlace());
+  for (int i = 0; i < 2 * 3; i++) {
+    p[i] = static_cast<float>(i);
+  }
+
+  EigenMatrix<float>::Type em = EigenMatrix<float>::From(t);
+
+  ASSERT_EQ(2, em.dimension(0));
+  ASSERT_EQ(3, em.dimension(1));
+
+  for (int i = 0; i < 2; i++) {
+    for (int j = 0; j < 3; j++) {
+      ASSERT_NEAR(i * 3 + j, em(i, j), 1e-6f);
+    }
+  }
+}
+
+}  // namespace framework
+}  // namespace paddle
--- a/paddle/framework/enforce.cc
+++ b/paddle/framework/enforce.cc
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-   http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License. */
-
-#include "paddle/framework/enforce.h"
--- a/paddle/framework/enforce.h
+++ b/paddle/framework/enforce.h
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-#include <glog/logging.h>
-#include <paddle/string/printf.h>
-#include <exception>
-#include <sstream>
-
-namespace paddle {
-namespace framework {
-
-/**
- * @brief Enforce exception. Inherits std::exception
- *
- * All enforce condition not met, will throw an EnforceNotMet exception.
- */
-class EnforceNotMet : public std::exception {
- public:
-  EnforceNotMet(const std::string& msg, const char* file, int fileline) {
-    std::ostringstream sout;
-    sout << msg << " at [" << file << ":" << fileline << "];";
-    all_msg_ = sout.str();
-  }
-
-  const char* what() const noexcept override { return all_msg_.c_str(); }
-
- private:
-  std::string all_msg_;
-};
-
-// From https://stackoverflow.com/questions/30130930/
-// __buildin_expect is in C++ 11 standard. Since the condition which enforced
-// should be true in most situation, it will make the compiler generate faster
-// code by adding `UNLIKELY` macro.
-#define UNLIKELY(condition) __builtin_expect(static_cast<bool>(condition), 0)
-
-/**
- * @brief Throw a EnforceNotMet exception, automatically filled __FILE__ &
- * __LINE__
- *
- * This macro take __VA_ARGS__, user can pass any type if that type can
- * serialize to std::ostream
- */
-#define PADDLE_THROW(...)                                            \
-  do {                                                               \
-    throw ::paddle::framework::EnforceNotMet(                        \
-        ::paddle::string::Sprintf(__VA_ARGS__), __FILE__, __LINE__); \
-  } while (0)
-
-/**
- * @brief Enforce a condition, otherwise throw an EnforceNotMet
- */
-#ifdef NDEBUG
-#define PADDLE_ENFORCE(condition, ...) \
-  do {                                 \
-    if (UNLIKELY(!(condition))) {      \
-      PADDLE_THROW(__VA_ARGS__);       \
-    }                                  \
-  } while (0)
-#else
-#define PADDLE_ENFORCE(condition, ...) \
-  CHECK(condition) << ::paddle::string::Sprintf(__VA_ARGS__);
-#endif
-
-}  // namespace framework
-}  // namespace paddle
--- a/paddle/framework/net.cc
+++ b/paddle/framework/net.cc
@@ -19,7 +19,10 @@
 namespace paddle {
 namespace framework {

-void PlainNet::CompleteAddOp() {
+void PlainNet::CompleteAddOp(bool calc) {
+  add_op_done_ = true;
+  if (!calc) return;
+
  std::unordered_set<std::string> input_set;
  std::unordered_set<std::string> output_set;
  std::unordered_set<std::string> temp_output;
@@ -52,7 +55,6 @@ void PlainNet::CompleteAddOp() {
  }

  attrs_["temporary_index"] = tmp_index;
-  add_op_done_ = true;
 }

 std::string PlainNet::DebugString() const {

--- a/paddle/framework/net.h
+++ b/paddle/framework/net.h
@@ -16,7 +16,6 @@ limitations under the License. */

 #include <paddle/framework/op_desc.pb.h>
 #include <paddle/framework/operator.h>
-#include "paddle/framework/net_proto.pb.h"
 #include "paddle/framework/op_proto.pb.h"
 #include "paddle/framework/op_registry.h"
 #include "paddle/framework/scope.h"
@@ -41,7 +40,7 @@ namespace framework {
 class Net : public OperatorBase {
 public:
  virtual void AddOp(const OperatorPtr& op) = 0;
-  virtual void CompleteAddOp() = 0;
+  virtual void CompleteAddOp(bool calc) = 0;
 };

 using NetPtr = std::shared_ptr<Net>;
@@ -86,7 +85,7 @@ class PlainNet : public Net {
    ops_.push_back(op);
  }

-  void CompleteAddOp() override;
+  void CompleteAddOp(bool calculate = true) override;

  std::string DebugString() const override;


--- a/paddle/framework/net_op_test.cc
+++ b/paddle/framework/net_op_test.cc
@@ -63,5 +63,5 @@ TEST(OpKernel, all) {
  ASSERT_EQ(2, infer_shape_cnt);
  ASSERT_EQ(2, run_cnt);

-  ASSERT_THROW(net->AddOp(op2), paddle::framework::EnforceNotMet);
+  ASSERT_THROW(net->AddOp(op2), std::runtime_error);
 }
--- a/paddle/framework/op_registry_test.cc
+++ b/paddle/framework/op_registry_test.cc
@@ -91,7 +91,7 @@ TEST(OpRegistry, IllegalAttr) {
  try {
    paddle::framework::OperatorPtr op __attribute__((unused)) =
        paddle::framework::OpRegistry::CreateOp(op_desc);
-  } catch (paddle::framework::EnforceNotMet err) {
+  } catch (std::runtime_error& err) {
    caught = true;
    std::string msg = "larger_than check fail";
    const char* err_msg = err.what();
@@ -138,7 +138,7 @@ TEST(OpRegistry, CustomChecker) {
  try {
    paddle::framework::OperatorPtr op __attribute__((unused)) =
        paddle::framework::OpRegistry::CreateOp(op_desc);
-  } catch (paddle::framework::EnforceNotMet err) {
+  } catch (std::runtime_error& err) {
    caught = true;
    std::string msg = "Attribute 'test_attr' is required!";
    const char* err_msg = err.what();
@@ -157,7 +157,7 @@ TEST(OpRegistry, CustomChecker) {
  try {
    paddle::framework::OperatorPtr op __attribute__((unused)) =
        paddle::framework::OpRegistry::CreateOp(op_desc);
-  } catch (paddle::framework::EnforceNotMet err) {
+  } catch (std::runtime_error& err) {
    caught = true;
    std::string msg = "'test_attr' must be even!";
    const char* err_msg = err.what();
@@ -196,7 +196,7 @@ TEST(ProtoMaker, DuplicatedAttr) {
  pd::OpProto op_proto;
  pd::OpAttrChecker op_checker;
  auto proto_maker = TestAttrProtoMaker(&op_proto, &op_checker);
-  ASSERT_THROW(proto_maker.Validate(), paddle::framework::EnforceNotMet);
+  ASSERT_THROW(proto_maker.Validate(), std::runtime_error);
 }

 class TestInOutProtoMaker : public pd::OpProtoAndCheckerMaker {
@@ -212,5 +212,5 @@ TEST(ProtoMaker, DuplicatedInOut) {
  pd::OpProto op_proto;
  pd::OpAttrChecker op_checker;
  auto proto_maker = TestInOutProtoMaker(&op_proto, &op_checker);
-  ASSERT_THROW(proto_maker.Validate(), paddle::framework::EnforceNotMet);
+  ASSERT_THROW(proto_maker.Validate(), std::runtime_error);
 }
--- a/paddle/framework/tensor.h
+++ b/paddle/framework/tensor.h
@@ -19,9 +19,8 @@ limitations under the License. */
 #include <memory>
 #include <typeindex>
 #include "paddle/framework/ddim.h"
-#include "paddle/framework/enforce.h"
-#include "paddle/framework/tensor_types.h"
 #include "paddle/memory/memory.h"
+#include "paddle/platform/enforce.h"
 #include "paddle/platform/place.h"
 #include "unsupported/Eigen/CXX11/Tensor"

@@ -35,6 +34,15 @@ struct CastToPyBufferImpl;
 namespace framework {

 class Tensor {
+  template <bool less, size_t i, typename... args>
+  friend struct paddle::pybind::details::CastToPyBufferImpl;
+
+  template <typename T, size_t D, int MajorType, typename IndexType>
+  friend struct EigenTensor;
+
+  template <typename T, int MajorType, typename IndexType>
+  friend struct EigenVector;
+
 public:
  Tensor() : offset_(0) {}

@@ -46,7 +54,7 @@ class Tensor {
  }

  template <typename T>
-  T* raw_data() const {
+  T* data() {
    CheckDims<T>();
    return reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(holder_->ptr()) +
                                offset_);
@@ -86,66 +94,6 @@ class Tensor {
                                offset_);
  }

-  template <typename T, size_t NDIMS>
-  typename TTypes<T, NDIMS>::Tensor shaped(DDim new_dims) {
-    Eigen::array<Eigen::DenseIndex, NDIMS> dims =
-        paddle::framework::ToEigenDSizes<NDIMS>(new_dims);
-    return typename TTypes<T, NDIMS>::Tensor(raw_data<T>(), dims);
-  }
-
-  template <typename T, size_t NDIMS>
-  typename TTypes<T, NDIMS>::Tensor tensor() {
-    return typename TTypes<T, NDIMS>::Tensor(
-        raw_data<T>(), paddle::framework::ToEigenDSizes<NDIMS>(dims_));
-  }
-
-  // flat to rank = 1
-  template <typename T>
-  typename TTypes<T>::Flat flat() {
-    return shaped<T, 1>(make_ddim({static_cast<int>(product(dims_))}));
-  }
-
-  // to TensorType Vec
-  template <typename T>
-  typename TTypes<T>::Vec vec() {
-    return tensor<T, 1>();
-  }
-
-  // to TensorType Matrix
-  template <typename T>
-  typename TTypes<T>::Matrix matrix() {
-    return tensor<T, 2>();
-  }
-
-  // const versions of all the methods above.
-  template <typename T, size_t NDIMS>
-  typename TTypes<T, NDIMS>::Tensor shaped(DDim new_dims) const {
-    Eigen::array<Eigen::DenseIndex, NDIMS> dims =
-        paddle::framework::ToEigenDSizes<NDIMS>(new_dims);
-    return typename TTypes<T, NDIMS>::Tensor(data<T>(), dims);
-  }
-
-  template <typename T, size_t NDIMS>
-  typename TTypes<T, NDIMS>::ConstantTensor tensor() const {
-    return typename TTypes<T, NDIMS>::Tensor(
-        data<T>(), paddle::framework::ToEigenDSizes<NDIMS>(dims_));
-  }
-
-  template <typename T>
-  typename TTypes<T>::ConstFlat flat() const {
-    return shaped<T, 1>(make_ddim({static_cast<int>(product(dims_))}));
-  }
-
-  template <typename T>
-  typename TTypes<T>::ConstVec vec() const {
-    return tensor<T, 1>();
-  }
-
-  template <typename T>
-  typename TTypes<T>::ConstMatrix matrix() const {
-    return tensor<T, 2>();
-  }
-
  template <typename T>
  void ShareDataFrom(const Tensor& src) {
    src.CheckDims<T>();
@@ -251,8 +199,6 @@ class Tensor {
  std::shared_ptr<Placeholder> holder_;  // holds the memory block if allocated.
  DDim dims_;
  size_t offset_;  // marks the begin of tensor data area.
-  template <bool less, size_t i, typename... args>
-  friend struct paddle::pybind::details::CastToPyBufferImpl;
 };

 }  // namespace framework

--- a/paddle/framework/tensor_test.cc
+++ b/paddle/framework/tensor_test.cc
@@ -33,7 +33,7 @@ TEST(Tensor, DataAssert) {
  bool caught = false;
  try {
    src_tensor.data<double>();
-  } catch (paddle::framework::EnforceNotMet err) {
+  } catch (std::runtime_error& err) {
    caught = true;
    std::string msg =
        "Tenosr holds no memory. Call Tensor::mutable_data first.";
@@ -107,7 +107,7 @@ TEST(Tensor, ShareDataFrom) {
    bool caught = false;
    try {
      dst_tensor.ShareDataFrom<float>(src_tensor);
-    } catch (EnforceNotMet err) {
+    } catch (std::runtime_error& err) {
      caught = true;
      std::string msg =
          "Tenosr holds no memory. Call Tensor::mutable_data first.";

--- a/paddle/framework/tensor_types.h
+++ b/paddle/framework/tensor_types.h
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-
-#include "unsupported/Eigen/CXX11/Tensor"
-
-namespace paddle {
-namespace framework {
-
-// Helper to define Tensor types given that the scalar is of type T.
-template <typename T, int NDIMS = 1, typename IndexType = Eigen::DenseIndex>
-struct TTypes {
-  // Rank-<NDIMS> tensor of scalar type T.
-  typedef Eigen::TensorMap<Eigen::Tensor<T, NDIMS, Eigen::RowMajor, IndexType>,
-                           Eigen::Aligned>
-      Tensor;
-  typedef Eigen::TensorMap<
-      Eigen::Tensor<const T, NDIMS, Eigen::RowMajor, IndexType>, Eigen::Aligned>
-      ConstTensor;
-
-  // Scalar tensor (implemented as a rank-0 tensor) of scalar type T.
-  typedef Eigen::TensorMap<
-      Eigen::TensorFixedSize<T, Eigen::Sizes<>, Eigen::RowMajor, IndexType>,
-      Eigen::Aligned>
-      Scalar;
-  typedef Eigen::TensorMap<Eigen::TensorFixedSize<const T, Eigen::Sizes<>,
-                                                  Eigen::RowMajor, IndexType>,
-                           Eigen::Aligned>
-      ConstScalar;
-
-  // Rank-1 tensor (vector) of scalar type T.
-  typedef Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, IndexType>,
-                           Eigen::Aligned>
-      Flat;
-  typedef Eigen::TensorMap<
-      Eigen::Tensor<const T, 1, Eigen::RowMajor, IndexType>, Eigen::Aligned>
-      ConstFlat;
-  typedef Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, IndexType>,
-                           Eigen::Aligned>
-      Vec;
-  typedef Eigen::TensorMap<
-      Eigen::Tensor<const T, 1, Eigen::RowMajor, IndexType>, Eigen::Aligned>
-      ConstVec;
-
-  // Rank-2 tensor (matrix) of scalar type T.
-  typedef Eigen::TensorMap<Eigen::Tensor<T, 2, Eigen::RowMajor, IndexType>,
-                           Eigen::Aligned>
-      Matrix;
-  typedef Eigen::TensorMap<
-      Eigen::Tensor<const T, 2, Eigen::RowMajor, IndexType>, Eigen::Aligned>
-      ConstMatrix;
-};
-
-}  // namespace framework
-}  // namespace paddle
--- a/paddle/memory/detail/system_allocator.cc
+++ b/paddle/memory/detail/system_allocator.cc
@@ -14,7 +14,7 @@ limitations under the License. */

 #include "paddle/memory/detail/system_allocator.h"
 #include "paddle/platform/assert.h"
-#include "paddle/platform/error.h"
+#include "paddle/platform/enforce.h"
 #include "paddle/platform/gpu_info.h"

 #include <stdlib.h>    // for malloc and free
@@ -128,8 +128,7 @@ void GPUAllocator::Free(void* p, size_t size, size_t index) {
  // process is terminating, in which case we don't care if
  // cudaFree succeeds.
  if (err != cudaErrorCudartUnloading) {
-    platform::throw_on_error(err,
-                             "cudaFree{Host} failed in GPUAllocator::Free.");
+    PADDLE_ENFORCE(err, "cudaFree{Host} failed in GPUAllocator::Free.");
  }
 }


--- a/paddle/operators/CMakeLists.txt
+++ b/paddle/operators/CMakeLists.txt
@@ -27,7 +27,8 @@ function(op_library TARGET)
    endif()

    list(LENGTH cu_srcs cu_srcs_len)
-    if (${cu_srcs_len} EQUAL 0)
+    list(LENGTH op_library_DEPS dep_len)
+    if (${cu_srcs_len} EQUAL 0 AND ${dep_len} EQUAL 0)
        message(WARNING "The op library ${TARGET} not support GPU!")
    endif()

@@ -47,3 +48,8 @@ op_library(mul_op SRCS mul_op.cc mul_op.cu)
 op_library(rowwise_add_op SRCS rowwise_add_op.cu rowwise_add_op.cc)
 op_library(sigmoid_op SRCS sigmoid_op.cu sigmoid_op.cc)
 op_library(softmax_op SRCS softmax_op.cc softmax_op.cu)
+
+op_library(fc_op SRCS fc_op.cc DEPS mul_op rowwise_add_op sigmoid_op
+        softmax_op net)
+
+op_library(sgd_op SRCS sgd_op.cc sgd_op.cu)
--- a/paddle/operators/add_op.h
+++ b/paddle/operators/add_op.h
@@ -14,6 +14,7 @@ limitations under the License. */

 #pragma once
 #include "glog/logging.h"
+#include "paddle/framework/eigen.h"
 #include "paddle/framework/operator.h"

 namespace paddle {
@@ -29,8 +30,10 @@ public:

    output->mutable_data<T>(context.GetPlace());

-    output->flat<T>().device(*(context.GetEigenDevice<Place>())) =
-        input0.flat<T>() + input1.flat<T>();
+    framework::EigenVector<T>::Flatten(*output).device(
+        *(context.GetEigenDevice<Place>())) =
+        framework::EigenVector<T>::Flatten(input0) +
+        framework::EigenVector<T>::Flatten(input1);
  }
 };


--- a/paddle/operators/fc_op.cc
+++ b/paddle/operators/fc_op.cc
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "paddle/framework/net.h"
+#include "paddle/framework/op_registry.h"
+#include "paddle/framework/operator.h"
+
+namespace paddle {
+namespace operators {
+
+class FullyConnectedOp : public framework::PlainNet {
+public:
+  void Init() override {
+    AddOp(framework::OpRegistry::CreateOp("mul",
+                                          {
+                                              Input("X"), Input("W"),
+                                          },
+                                          {Output("before_act")},
+                                          {}));
+    auto b = Input("b");
+    if (b != framework::OperatorBase::EMPTY_VAR_NAME()) {
+      AddOp(framework::OpRegistry::CreateOp("rowwise_add",
+                                            {Output("before_act"), Input("b")},
+                                            {Output("before_act")},
+                                            {}));
+    }
+
+    auto activation = GetAttr<std::string>("activation");
+    AddOp(framework::OpRegistry::CreateOp(
+        activation, {Output("before_act")}, {Output("Y")}, {}));
+    CompleteAddOp(false);
+  }
+};
+
+class FullyConnectedOpMaker : public framework::OpProtoAndCheckerMaker {
+public:
+  FullyConnectedOpMaker(framework::OpProto *proto,
+                        framework::OpAttrChecker *op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput("X", "the input of fc operator");
+    AddInput("W", "the weight of fc operator");
+    AddInput("b", "the bias of fc operator");
+
+    AddOutput("Y", "the output of fc operator");
+    AddOutput(
+        "before_act", "the before activation output of fc operator", true);
+    AddAttr<std::string>("activation", "The activation key for fc layer")
+        .SetDefault("sigmoid")
+        .InEnum({"sigmoid", "softmax"});
+
+    //! TODO(yuyang18): Complete comment;
+    AddComment("FullyConnected Operator");
+  }
+};
+}  // namespace operators
+}  // namespace paddle
+
+USE_OP(mul);
+USE_OP(rowwise_add);
+USE_OP(sigmoid);
+USE_OP(softmax);
+
+REGISTER_OP(fc,
+            paddle::operators::FullyConnectedOp,
+            paddle::operators::FullyConnectedOpMaker);
--- a/paddle/operators/sgd_op.cc
+++ b/paddle/operators/sgd_op.cc
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/operators/sgd_op.h"
+#include "paddle/framework/op_registry.h"
+#include "paddle/framework/tensor.h"
+
+namespace paddle {
+namespace operators {
+
+class SGDOp : public framework::OperatorWithKernel {
+protected:
+  void InferShape(
+      const std::vector<const framework::Tensor *> &inputs,
+      const std::vector<framework::Tensor *> &outputs) const override {
+    PADDLE_ENFORCE(inputs.size() == 2, "Input size of SGDOp must be two");
+    PADDLE_ENFORCE(outputs.size() == 1, "Output size of SGDOp must be one");
+    PADDLE_ENFORCE(inputs[0] != nullptr, "inputs[0] mast be set");
+    PADDLE_ENFORCE(inputs[1] != nullptr, "inputs[1] mast be set");
+    PADDLE_ENFORCE(outputs[0] != nullptr, "outputs[0] mast be set");
+    PADDLE_ENFORCE(inputs[0]->dims() == inputs[1]->dims(),
+                   "Two input of SGD Op's dimension must be same.");
+    outputs[0]->set_dims(inputs[0]->dims());
+  }
+};
+
+class SGDOpMaker : public framework::OpProtoAndCheckerMaker {
+public:
+  SGDOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
+      : framework::OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput("param", "input parameter");
+    AddInput("grad", "input gradient");
+    AddOutput("param_out", "output parameter");
+    AddAttr<float>("learning_rate", "learning rate of sgd");
+    AddComment(R"DOC(
+
+Simplest sgd algorithm.
+
+param_out = param - learning_rate * grad;
+
+)DOC");
+  }
+};
+}  // namespace operators
+}  // namespace paddle
+
+REGISTER_OP(sgd, paddle::operators::SGDOp, paddle::operators::SGDOpMaker);
+typedef paddle::operators::SGDOpKernel<::paddle::platform::CPUPlace, float>
+    SGDOpKernel_CPU_float;
+REGISTER_OP_CPU_KERNEL(sgd, SGDOpKernel_CPU_float);
--- a/paddle/operators/sgd_op.cu
+++ b/paddle/operators/sgd_op.cu
+#include "paddle/operators/sgd_op.h"
+#include "paddle/framework/op_registry.h"
+
+typedef paddle::operators::SGDOpKernel<::paddle::platform::GPUPlace, float> SGDOpKernel_GPU_float;
+REGISTER_OP_GPU_KERNEL(sgd, SGDOpKernel_GPU_float);
\ No newline at end of file
--- a/paddle/operators/sgd_op.h
+++ b/paddle/operators/sgd_op.h
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+#include "glog/logging.h"
+#include "paddle/framework/eigen.h"
+#include "paddle/framework/operator.h"
+
+namespace paddle {
+namespace operators {
+
+template <typename Place, typename T>
+class SGDOpKernel : public framework::OpKernel {
+public:
+  void Compute(const framework::KernelContext& ctx) const override {
+    auto param = ctx.Input("param")->Get<framework::Tensor>();
+    auto grad = ctx.Input("grad")->Get<framework::Tensor>();
+    auto* param_out = ctx.Output(0)->GetMutable<framework::Tensor>();
+    float lr = ctx.op_.GetAttr<float>("learning_rate");
+
+    param_out->mutable_data<T>(ctx.GetPlace());
+
+    framework::EigenVector<T>::Flatten(*param_out)
+        .device(*(ctx.GetEigenDevice<Place>())) =
+        framework::EigenVector<T>::Flatten(param) -
+        lr * framework::EigenVector<T>::Flatten(grad);
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
--- a/paddle/operators/sgd_op_test.cc
+++ b/paddle/operators/sgd_op_test.cc
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <gtest/gtest.h>
+#include <paddle/framework/op_registry.h>
+USE_OP(sgd);
+TEST(SGDOp, GetOpProto) {
+  auto& protos = paddle::framework::OpRegistry::protos();
+  auto it = protos.find("sgd");
+  ASSERT_NE(it, protos.end());
+}
--- a/paddle/platform/CMakeLists.txt
+++ b/paddle/platform/CMakeLists.txt
@@ -8,6 +8,8 @@ cc_test(place_test SRCS place_test.cc DEPS place glog gflags)

 add_subdirectory(dynload)

+cc_test(enforce_test SRCS enforce_test.cc)
+
 IF(WITH_GPU)
    set(GPU_CTX_DEPS dynload_cuda dynamic_loader)
 ELSE()

--- a/paddle/platform/cpu_info.cc
+++ b/paddle/platform/cpu_info.cc
@@ -22,7 +22,6 @@ limitations under the License. */
 #endif

 #include "gflags/gflags.h"
-#include "paddle/platform/error.h"

 DEFINE_double(fraction_of_cpu_memory_to_use, 1,
              "Default use 100% of CPU memory for PaddlePaddle,"

--- a/paddle/platform/device_context.h
+++ b/paddle/platform/device_context.h
@@ -11,12 +11,13 @@ limitations under the License. */

 #pragma once

-#include "paddle/framework/enforce.h"
+#include "paddle/platform/enforce.h"
+#include "paddle/platform/place.h"
+
 #ifndef PADDLE_ONLY_CPU
 #include "paddle/platform/dynload/cublas.h"
 #include "paddle/platform/dynload/cudnn.h"
 #include "paddle/platform/dynload/curand.h"
-#include "paddle/platform/error.h"
 #include "paddle/platform/gpu_info.h"
 #define EIGEN_USE_GPU
 #endif
@@ -71,8 +72,7 @@ class CUDADeviceContext : public DeviceContext {
 public:
  explicit CUDADeviceContext(const GPUPlace gpu_place) : gpu_place_(gpu_place) {
    GPUPlaceGuard guard(gpu_place_);
-    paddle::platform::throw_on_error(cudaStreamCreate(&stream_),
-                                     "cudaStreamCreate failed");
+    PADDLE_ENFORCE(cudaStreamCreate(&stream_), "cudaStreamCreate failed");
    eigen_stream_.reset(new Eigen::CudaStreamDevice(&stream_));
    eigen_device_.reset(new Eigen::GpuDevice(eigen_stream_.get()));
  }
@@ -83,8 +83,8 @@ class CUDADeviceContext : public DeviceContext {
  }

  void Wait() {
-    paddle::platform::throw_on_error(cudaStreamSynchronize(stream_),
-                                     "cudaStreamSynchronize failed");
+    PADDLE_ENFORCE(cudaStreamSynchronize(stream_),
+                   "cudaStreamSynchronize failed");
  }

  cudaStream_t stream() { return stream_; }
@@ -94,12 +94,11 @@ class CUDADeviceContext : public DeviceContext {
  cublasHandle_t cublas_handle() {
    if (!blas_handle_) {
      GPUPlaceGuard guard(gpu_place_);
-      PADDLE_ENFORCE(paddle::platform::dynload::cublasCreate(&blas_handle_) ==
-                         CUBLAS_STATUS_SUCCESS,
+      PADDLE_ENFORCE(paddle::platform::dynload::cublasCreate(&blas_handle_),
                     "cublasCreate failed");
-      PADDLE_ENFORCE(paddle::platform::dynload::cublasSetStream(
-                         blas_handle_, stream_) == CUBLAS_STATUS_SUCCESS,
-                     "cublasSetStream failed");
+      PADDLE_ENFORCE(
+          paddle::platform::dynload::cublasSetStream(blas_handle_, stream_),
+          "cublasSetStream failed");
    }
    return blas_handle_;
  }
@@ -107,12 +106,11 @@ class CUDADeviceContext : public DeviceContext {
  cudnnHandle_t cudnn_handle() {
    if (!dnn_handle_) {
      GPUPlaceGuard guard(gpu_place_);
-      PADDLE_ENFORCE(paddle::platform::dynload::cudnnCreate(&dnn_handle_) ==
-                         CUDNN_STATUS_SUCCESS,
+      PADDLE_ENFORCE(paddle::platform::dynload::cudnnCreate(&dnn_handle_),
                     "cudnnCreate failed");
-      PADDLE_ENFORCE(paddle::platform::dynload::cudnnSetStream(
-                         dnn_handle_, stream_) == CUDNN_STATUS_SUCCESS,
-                     "cudnnSetStream failed");
+      PADDLE_ENFORCE(
+          paddle::platform::dynload::cudnnSetStream(dnn_handle_, stream_),
+          "cudnnSetStream failed");
    }
    return dnn_handle_;
  }
@@ -121,16 +119,15 @@ class CUDADeviceContext : public DeviceContext {
    if (!rand_generator_) {
      GPUPlaceGuard guard(gpu_place_);
      PADDLE_ENFORCE(paddle::platform::dynload::curandCreateGenerator(
-                         &rand_generator_, CURAND_RNG_PSEUDO_DEFAULT) ==
-                         CURAND_STATUS_SUCCESS,
+                         &rand_generator_, CURAND_RNG_PSEUDO_DEFAULT),
                     "curandCreateGenerator failed");
      PADDLE_ENFORCE(
          paddle::platform::dynload::curandSetPseudoRandomGeneratorSeed(
-              rand_generator_, random_seed_) == CURAND_STATUS_SUCCESS,
+              rand_generator_, random_seed_),
          "curandSetPseudoRandomGeneratorSeed failed");
-      PADDLE_ENFORCE(paddle::platform::dynload::curandSetStream(
-                         rand_generator_, stream_) == CURAND_STATUS_SUCCESS,
-                     "curandSetStream failed");
+      PADDLE_ENFORCE(
+          paddle::platform::dynload::curandSetStream(rand_generator_, stream_),
+          "curandSetStream failed");
    }
    return rand_generator_;
  }
@@ -138,26 +135,23 @@ class CUDADeviceContext : public DeviceContext {
  ~CUDADeviceContext() {
    Wait();
    if (blas_handle_) {
-      PADDLE_ENFORCE(paddle::platform::dynload::cublasDestroy(blas_handle_) ==
-                         CUBLAS_STATUS_SUCCESS,
+      PADDLE_ENFORCE(paddle::platform::dynload::cublasDestroy(blas_handle_),
                     "cublasDestroy failed");
    }

    if (dnn_handle_) {
-      PADDLE_ENFORCE(paddle::platform::dynload::cudnnDestroy(dnn_handle_) ==
-                         CUDNN_STATUS_SUCCESS,
+      PADDLE_ENFORCE(paddle::platform::dynload::cudnnDestroy(dnn_handle_),
                     "cudnnDestroy failed");
    }

    if (rand_generator_) {
-      PADDLE_ENFORCE(paddle::platform::dynload::curandDestroyGenerator(
-                         rand_generator_) == CURAND_STATUS_SUCCESS,
-                     "curandDestroyGenerator failed");
+      PADDLE_ENFORCE(
+          paddle::platform::dynload::curandDestroyGenerator(rand_generator_),
+          "curandDestroyGenerator failed");
    }
    eigen_stream_.reset();
    eigen_device_.reset();
-    paddle::platform::throw_on_error(cudaStreamDestroy(stream_),
-                                     "cudaStreamDestroy failed");
+    PADDLE_ENFORCE(cudaStreamDestroy(stream_), "cudaStreamDestroy failed");
  }

 private:

--- a/paddle/platform/dynload/dynamic_loader.cc
+++ b/paddle/platform/dynload/dynamic_loader.cc
@@ -19,7 +19,7 @@ limitations under the License. */
 #include <string>
 #include "gflags/gflags.h"
 #include "glog/logging.h"
-#include "paddle/framework/enforce.h"
+#include "paddle/platform/enforce.h"

 DEFINE_string(cudnn_dir, "",
              "Specify path for loading libcudnn.so. For instance, "

--- a/paddle/platform/enforce.h
+++ b/paddle/platform/enforce.h
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include <paddle/string/printf.h>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+
+#ifndef PADDLE_ONLY_CPU
+
+#include "paddle/platform/dynload/cublas.h"
+#include "paddle/platform/dynload/cudnn.h"
+#include "paddle/platform/dynload/curand.h"
+
+#include <cublas_v2.h>
+#include <cudnn.h>
+#include <curand.h>
+#include <thrust/system/cuda/error.h>
+#include <thrust/system_error.h>
+
+#endif  // PADDLE_ONLY_CPU
+
+namespace paddle {
+namespace platform {
+
+// Because most enforce conditions would evaluate to true, we can use
+// __builtin_expect to instruct the C++ compiler to generate code that
+// always forces branch prediction of true.
+// This generates faster binary code. __builtin_expect is since C++11.
+// For more details, please check https://stackoverflow.com/a/43870188/724872.
+#define UNLIKELY(condition) __builtin_expect(static_cast<bool>(condition), 0)
+
+#ifndef PADDLE_ONLY_CPU
+
+template <typename... Args>
+inline void throw_on_error(cudaError_t e, const Args&... args) {
+  if (UNLIKELY(e)) {
+    // clang-format off
+    throw thrust::system_error(
+        e, thrust::cuda_category(),
+        string::Sprintf(args...) +
+        string::Sprintf(" at [%s:%s];", __FILE__, __LINE__));
+    // clang-format on
+  }
+}
+
+template <typename... Args>
+inline void throw_on_error(curandStatus_t stat, const Args&... args) {
+  if (stat != CURAND_STATUS_SUCCESS) {
+    // clang-format off
+    throw thrust::system_error(
+        cudaErrorLaunchFailure, thrust::cuda_category(),
+        string::Sprintf(args...) +
+        string::Sprintf(" at [%s:%s];", __FILE__, __LINE__));
+    // clang-format on
+  }
+}
+
+template <typename... Args>
+inline void throw_on_error(cudnnStatus_t stat, const Args&... args) {
+  if (stat == CUDNN_STATUS_SUCCESS) {
+    return;
+  } else {
+    // clang-format off
+    throw std::runtime_error(
+        platform::dynload::cudnnGetErrorString(stat) +
+        string::Sprintf(args...) +
+        string::Sprintf(" at [%s:%s];", __FILE__, __LINE__));
+    // clang-format on
+  }
+}
+
+template <typename... Args>
+inline void throw_on_error(cublasStatus_t stat, const Args&... args) {
+  std::string err;
+  if (stat == CUBLAS_STATUS_SUCCESS) {
+    return;
+  } else if (stat == CUBLAS_STATUS_NOT_INITIALIZED) {
+    err = "CUBLAS: not initialized, ";
+  } else if (stat == CUBLAS_STATUS_ALLOC_FAILED) {
+    err = "CUBLAS: alloc failed, ";
+  } else if (stat == CUBLAS_STATUS_INVALID_VALUE) {
+    err = "CUBLAS: invalid value, ";
+  } else if (stat == CUBLAS_STATUS_ARCH_MISMATCH) {
+    err = "CUBLAS: arch mismatch, ";
+  } else if (stat == CUBLAS_STATUS_MAPPING_ERROR) {
+    err = "CUBLAS: mapping error, ";
+  } else if (stat == CUBLAS_STATUS_EXECUTION_FAILED) {
+    err = "CUBLAS: execution failed, ";
+  } else if (stat == CUBLAS_STATUS_INTERNAL_ERROR) {
+    err = "CUBLAS: internal error, ";
+  } else if (stat == CUBLAS_STATUS_NOT_SUPPORTED) {
+    err = "CUBLAS: not supported, ";
+  } else if (stat == CUBLAS_STATUS_LICENSE_ERROR) {
+    err = "CUBLAS: license error, ";
+  }
+  throw std::runtime_error(err + string::Sprintf(args...) +
+                           string::Sprintf(" at [%s:%s];", __FILE__, __LINE__));
+}
+
+#endif  // PADDLE_ONLY_CPU
+
+template <typename... Args>
+inline void throw_on_error(int stat, const Args&... args) {
+  if (UNLIKELY(!(stat))) {
+    throw std::runtime_error(
+        string::Sprintf(args...) +
+        string::Sprintf(" at [%s:%s];", __FILE__, __LINE__));
+  }
+}
+
+#define PADDLE_THROW(...)                                     \
+  do {                                                        \
+    throw std::runtime_error(                                 \
+        string::Sprintf(__VA_ARGS__) +                        \
+        string::Sprintf(" at [%s:%s];", __FILE__, __LINE__)); \
+  } while (0)
+
+/**
+ * @brief Enforce a condition, otherwise throw an EnforceNotMet
+ */
+#define PADDLE_ENFORCE(condition, ...)                          \
+  do {                                                          \
+    ::paddle::platform::throw_on_error(condition, __VA_ARGS__); \
+  } while (0)
+
+}  // namespace platform
+}  // namespace paddle
--- a/paddle/framework/enforce_test.cc
+++ b/paddle/framework/enforce_test.cc
@@ -9,8 +9,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

-#include <gtest/gtest.h>
-#include <paddle/framework/enforce.h>
+#include "paddle/platform/enforce.h"
+#include "gtest/gtest.h"

 TEST(ENFORCE, OK) {
  PADDLE_ENFORCE(true, "Enforce is ok %d now %f", 123, 0.345);
@@ -23,13 +23,14 @@ TEST(ENFORCE, FAILED) {
  bool in_catch = false;
  try {
    PADDLE_ENFORCE(false, "Enforce is not ok %d at all", 123);
-  } catch (paddle::framework::EnforceNotMet err) {
+  } catch (const std::runtime_error& error) {
+    // your error handling code here
    in_catch = true;
    std::string msg = "Enforce is not ok 123 at all";
-    const char* what = err.what();
+    const char* what = error.what();
    for (size_t i = 0; i < msg.length(); ++i) {
      ASSERT_EQ(what[i], msg[i]);
    }
  }
  ASSERT_TRUE(in_catch);
-}
\ No newline at end of file
+}
--- a/paddle/platform/error.h
+++ b/paddle/platform/error.h
-#pragma once
-
-#include <sstream>
-#include <stdexcept>
-#include <string>
-
-#ifndef PADDLE_ONLY_CPU
-
-#include <cublas_v2.h>
-#include <cudnn.h>
-#include <curand.h>
-#include <thrust/system/cuda/error.h>
-#include <thrust/system_error.h>
-
-#endif  // PADDLE_ONLY_CPU
-
-namespace paddle {
-namespace platform {
-
-#ifndef PADDLE_ONLY_CPU
-
-inline void throw_on_error(cudaError_t e, const char* message) {
-  if (e) {
-    throw thrust::system_error(e, thrust::cuda_category(), message);
-  }
-}
-
-inline void throw_on_error(curandStatus_t stat, const char* message) {
-  if (stat != CURAND_STATUS_SUCCESS) {
-    throw thrust::system_error(cudaErrorLaunchFailure, thrust::cuda_category(),
-                               message);
-  }
-}
-
-inline void throw_on_error(cudnnStatus_t stat, const char* message) {
-  std::stringstream ss;
-  if (stat == CUDNN_STATUS_SUCCESS) {
-    return;
-  } else {
-    ss << cudnnGetErrorString(stat);
-    ss << ", " << message;
-    throw std::runtime_error(ss.str());
-  }
-}
-
-inline void throw_on_error(cublasStatus_t stat, const char* message) {
-  std::stringstream ss;
-  if (stat == CUBLAS_STATUS_SUCCESS) {
-    return;
-  } else if (stat == CUBLAS_STATUS_NOT_INITIALIZED) {
-    ss << "CUBLAS: not initialized";
-  } else if (stat == CUBLAS_STATUS_ALLOC_FAILED) {
-    ss << "CUBLAS: alloc failed";
-  } else if (stat == CUBLAS_STATUS_INVALID_VALUE) {
-    ss << "CUBLAS: invalid value";
-  } else if (stat == CUBLAS_STATUS_ARCH_MISMATCH) {
-    ss << "CUBLAS: arch mismatch";
-  } else if (stat == CUBLAS_STATUS_MAPPING_ERROR) {
-    ss << "CUBLAS: mapping error";
-  } else if (stat == CUBLAS_STATUS_EXECUTION_FAILED) {
-    ss << "CUBLAS: execution failed";
-  } else if (stat == CUBLAS_STATUS_INTERNAL_ERROR) {
-    ss << "CUBLAS: internal error";
-  } else if (stat == CUBLAS_STATUS_NOT_SUPPORTED) {
-    ss << "CUBLAS: not supported";
-  } else if (stat == CUBLAS_STATUS_LICENSE_ERROR) {
-    ss << "CUBLAS: license error";
-  }
-  ss << ", " << message;
-  throw std::runtime_error(ss.str());
-}
-
-inline void throw_on_error(cublasStatus_t stat) {
-  const char* message = "";
-  throw_on_error(stat, message);
-}
-
-#endif  // PADDLE_ONLY_CPU
-
-inline void throw_on_error(int stat, const char* message) {
-  if (stat) {
-    throw std::runtime_error(message + (", stat = " + std::to_string(stat)));
-  }
-}
-
-}  // namespace platform
-}  // namespace paddle
--- a/paddle/platform/gpu_info.cc
+++ b/paddle/platform/gpu_info.cc
@@ -14,7 +14,7 @@ limitations under the License. */

 #include "paddle/platform/gpu_info.h"
 #include "gflags/gflags.h"
-#include "paddle/platform/error.h"
+#include "paddle/platform/enforce.h"

 DEFINE_double(fraction_of_gpu_memory_to_use, 0.95,
              "Default use 95% of GPU memory for PaddlePaddle,"
@@ -25,7 +25,7 @@ namespace platform {

 int GetDeviceCount() {
  int count;
-  throw_on_error(
+  PADDLE_ENFORCE(
      cudaGetDeviceCount(&count),
      "cudaGetDeviceCount failed in paddle::platform::GetDeviceCount");
  return count;
@@ -33,19 +33,19 @@ int GetDeviceCount() {

 int GetCurrentDeviceId() {
  int device_id;
-  throw_on_error(
+  PADDLE_ENFORCE(
      cudaGetDevice(&device_id),
      "cudaGetDevice failed in paddle::platform::GetCurrentDeviceId");
  return device_id;
 }

 void SetDeviceId(int id) {
-  throw_on_error(cudaSetDevice(id),
+  PADDLE_ENFORCE(cudaSetDevice(id),
                 "cudaSetDevice failed in paddle::platform::SetDeviceId");
 }

 void GpuMemoryUsage(size_t& available, size_t& total) {
-  throw_on_error(cudaMemGetInfo(&available, &total),
+  PADDLE_ENFORCE(cudaMemGetInfo(&available, &total),
                 "cudaMemGetInfo failed in paddle::platform::GetMemoryUsage");
 }


--- a/paddle/pybind/CMakeLists.txt
+++ b/paddle/pybind/CMakeLists.txt
 cc_library(paddle_pybind SHARED SRCS pybind.cc DEPS pybind python
-        add_op mul_op rowwise_add_op sigmoid_op softmax_op)
+        add_op fc_op sgd_op)
--- a/paddle/pybind/pybind.cc
+++ b/paddle/pybind/pybind.cc
@@ -14,6 +14,7 @@ limitations under the License. */

 #include <Python.h>
 #include <paddle/framework/op_registry.h>
+#include <paddle/framework/operator.h>
 #include <paddle/framework/scope.h>
 #include <paddle/pybind/tensor_bind.h>
 #include <pybind11/numpy.h>
@@ -26,10 +27,8 @@ namespace py = pybind11;
 namespace pd = paddle::framework;

 USE_OP(add_two);
-USE_OP(softmax);
-USE_OP(mul);
-USE_OP(rowwise_add);
-USE_OP(sigmoid);
+USE_OP_WITHOUT_KERNEL(fc);
+USE_OP(sgd);

 PYBIND11_PLUGIN(core) {
  py::module m("core", "C++ core of Paddle Paddle");
@@ -53,7 +52,9 @@ PYBIND11_PLUGIN(core) {
             self.mutable_data<int>(paddle::platform::CPUPlace());
           })
      .def("set", paddle::pybind::PyTensorSetFromArray<float>)
-      .def("set", paddle::pybind::PyTensorSetFromArray<int>);
+      .def("set", paddle::pybind::PyTensorSetFromArray<int>)
+      .def("shape",
+           [](pd::Tensor& self) { return pd::vectorize(self.dims()); });

  py::class_<pd::Variable>(m, "Variable", R"DOC(Variable Class.

@@ -83,15 +84,16 @@ All parameter, weight, gradient are variables in Paddle.

  //! @note: Be careful! PyBind will return std::string as an unicode, not
  //! Python str. If you want a str object, you should cast them in Python.
-  m.def("get_all_op_protos", []() -> std::vector<std::string> {
+  m.def("get_all_op_protos", []() -> std::vector<py::bytes> {
    auto& protos = pd::OpRegistry::protos();
-    std::vector<std::string> ret_values;
+    std::vector<py::bytes> ret_values;
    for (auto it = protos.begin(); it != protos.end(); ++it) {
      PADDLE_ENFORCE(it->second.IsInitialized(),
                     "OpProto must all be initialized");
-      ret_values.emplace_back();
-      PADDLE_ENFORCE(it->second.SerializeToString(&ret_values.back()),
+      std::string str;
+      PADDLE_ENFORCE(it->second.SerializeToString(&str),
                     "Serialize OpProto Error. This could be a bug of Paddle.");
+      ret_values.push_back(py::bytes(str));
    }
    return ret_values;
  });
@@ -101,17 +103,26 @@ All parameter, weight, gradient are variables in Paddle.
      .def("empty", pd::OperatorBase::EMPTY_VAR_NAME)
      .def("temp", pd::OperatorBase::TMP_VAR_NAME);

+  py::class_<paddle::platform::DeviceContext>(m, "DeviceContext")
+      .def_static("cpu_context", []() -> paddle::platform::DeviceContext* {
+        return new paddle::platform::CPUDeviceContext();
+      });
+
  py::class_<pd::OperatorBase, pd::OperatorPtr>(m, "Operator")
      .def("__str__", &pd::OperatorBase::DebugString)
-      .def_static("create", [](const std::string& protobin) {
-        pd::OpDesc desc;
-        PADDLE_ENFORCE(desc.ParsePartialFromString(protobin),
-                       "Cannot parse user input to OpDesc");
-        PADDLE_ENFORCE(desc.IsInitialized(),
-                       "User OpDesc is not initialized, reason %s",
-                       desc.InitializationErrorString());
-        return pd::OpRegistry::CreateOp(desc);
-      });
+      .def_static("create",
+                  [](py::bytes protobin) {
+                    pd::OpDesc desc;
+                    PADDLE_ENFORCE(desc.ParsePartialFromString(protobin),
+                                   "Cannot parse user input to OpDesc");
+                    PADDLE_ENFORCE(desc.IsInitialized(),
+                                   "User OpDesc is not initialized, reason %s",
+                                   desc.InitializationErrorString());
+                    return pd::OpRegistry::CreateOp(desc);
+                  })
+      .def("infer_shape", &pd::OperatorBase::InferShape)
+      .def("run", &pd::OperatorBase::Run)
+      .def("outputs", [](const pd::OperatorPtr& op) { return op->outputs_; });

  return m.ptr();
 }
--- a/python/paddle/v2/framework/create_op_creation_methods.py
+++ b/python/paddle/v2/framework/create_op_creation_methods.py
@@ -217,6 +217,10 @@ def create_op_creation_method(op_proto):
        return core.Operator.create(opdesc.SerializeToString())

    __impl__.__doc__ = get_docstring_from_op_proto(op_proto)
+    __impl__.all_input_args = [var.name for var in op_proto.inputs]
+    __impl__.all_output_args = [var.name for var in op_proto.outputs]
+    __impl__.all_attr_args = [attr.name for attr in op_proto.attrs]
+
    return __impl__



--- a/python/paddle/v2/framework/tests/CMakeLists.txt
+++ b/python/paddle/v2/framework/tests/CMakeLists.txt
 add_python_test(test_framework test_protobuf.py test_scope.py
    test_default_scope_funcs.py test_op_creation_methods.py
-    test_tensor.py)
+    test_tensor.py test_fc_op.py test_add_two_op.py test_sgd_op.py)
--- a/python/paddle/v2/framework/tests/op_test_util.py
+++ b/python/paddle/v2/framework/tests/op_test_util.py
+import paddle.v2.framework.core as core
+import unittest
+import numpy
+import paddle.v2.framework.create_op_creation_methods as creation
+
+
+class OpTestMeta(type):
+    """
+    Operator Test ClassMeta.
+    
+    It injects `test_all` method into user's OperatorTest class, to make Python 
+    unittest module run that method.
+    
+    The `test_all` read what value is stored in `self`. It use self's values to
+    create and run a operator, and check whether that op is OK or not.
+    
+    See `test_add_two_op` for example usage.
+    """
+
+    def __new__(cls, name, bases, attrs):
+        obj = super(OpTestMeta, cls).__new__(cls, name, bases, attrs)
+
+        def test_all(self):
+            func = getattr(creation.op_creations, self.type, None)
+            self.assertIsNotNone(func)
+
+            scope = core.Scope(None)
+            kwargs = dict()
+
+            for in_name in func.all_input_args:
+                if hasattr(self, in_name):
+                    kwargs[in_name] = in_name
+                    var = scope.create_var(in_name).get_tensor()
+                    arr = getattr(self, in_name)
+                    var.set_dims(arr.shape)
+                    var.set(arr)
+                else:
+                    kwargs[in_name] = "@EMPTY@"
+
+            for out_name in func.all_output_args:
+                if hasattr(self, out_name):
+                    kwargs[out_name] = out_name
+                    scope.create_var(out_name).get_tensor()
+
+            for attr_name in func.all_attr_args:
+                if hasattr(self, attr_name):
+                    kwargs[attr_name] = getattr(self, attr_name)
+
+            op = func(**kwargs)
+
+            op.infer_shape(scope)
+
+            ctx = core.DeviceContext.cpu_context()
+            op.run(scope, ctx)
+
+            for out_name in func.all_output_args:
+                actual = numpy.array(scope.get_var(out_name).get_tensor())
+                expect = getattr(self, out_name)
+                numpy.testing.assert_almost_equal(actual, expect)
+
+        obj.test_all = test_all
+        return obj
--- a/python/paddle/v2/framework/tests/test_add_two_op.py
+++ b/python/paddle/v2/framework/tests/test_add_two_op.py
+import unittest
+from op_test_util import OpTestMeta
+import numpy
+
+
+class TestAddOp(unittest.TestCase):
+    __metaclass__ = OpTestMeta
+
+    def setUp(self):
+        self.type = "add_two"
+        self.X = numpy.random.random((342, 345)).astype("float32")
+        self.Y = numpy.random.random((342, 345)).astype("float32")
+        self.Out = self.X + self.Y
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/python/paddle/v2/framework/tests/test_fc_op.py
+++ b/python/paddle/v2/framework/tests/test_fc_op.py
+import paddle.v2.framework.core as core
+import unittest
+import numpy
+import paddle.v2.framework.create_op_creation_methods as creation
+
+
+class TestFc(unittest.TestCase):
+    def test_fc(self):
+        scope = core.Scope(None)
+        x = scope.create_var("X")
+        x_tensor = x.get_tensor()
+        x_tensor.set_dims([1000, 784])
+        x_tensor.alloc_float()
+
+        w = scope.create_var("W")
+        w_tensor = w.get_tensor()
+        w_tensor.set_dims([784, 100])
+        w_tensor.alloc_float()
+
+        w_tensor.set(numpy.random.random((784, 100)).astype("float32"))
+
+        # Set a real numpy array here.
+        # x_tensor.set(numpy.array([]))
+
+        op = creation.op_creations.fc(X="X", Y="Y", W="W")
+
+        for out in op.outputs():
+            if scope.get_var(out) is None:
+                scope.create_var(out).get_tensor()
+
+        tensor = scope.get_var("Y").get_tensor()
+        op.infer_shape(scope)
+        self.assertEqual([1000, 100], tensor.shape())
+
+        ctx = core.DeviceContext.cpu_context()
+
+        op.run(scope, ctx)
+
+        # After complete all ops, check Y is expect or not.
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/python/paddle/v2/framework/tests/test_sgd_op.py
+++ b/python/paddle/v2/framework/tests/test_sgd_op.py
+import unittest
+import numpy
+from op_test_util import OpTestMeta
+
+
+class TestSGD(unittest.TestCase):
+    __metaclass__ = OpTestMeta
+
+    def setUp(self):
+        self.type = "sgd"
+        self.param = numpy.random.random((342, 345)).astype("float32")
+        self.grad = numpy.random.random((342, 345)).astype("float32")
+        self.learning_rate = 0.1
+        self.param_out = self.param - self.learning_rate * self.grad
+
+
+if __name__ == "__main__":
+    unittest.main()