diff --git a/doc/design/graph.md b/doc/design/graph.md
index 87f696f90f164a639ad5182823ddfb14aab7e065..51b7f87638f8ddff752328a562fe0dd0fe56cfd1 100644
--- a/doc/design/graph.md
+++ b/doc/design/graph.md
@@ -1,4 +1,4 @@
-# Design Doc: Computations as Graphs
+# Design Doc: Computations as a Graph
 
 A primary goal of the refactorization of PaddlePaddle is a more flexible representation of deep learning computation, in particular, a graph of operators and variables, instead of sequences of layers as before.
 
@@ -8,6 +8,8 @@ This document explains that the construction of a graph as three steps:
 - construct the backward part
 - construct the optimization part
 
+## The Construction of a Graph
+
 Let us take the problem of image classification as a simple example.  The application program that trains the model looks like:
 
 ```python
@@ -25,7 +27,9 @@ The first four lines of above program build the forward part of the graph.
 
 ![](images/graph_construction_example_forward_only.png)
 
-In particular, the first line `x = layer.data("images")` creates variable x and a Feed operator that copies a column from the minibatch to x.  `y = layer.fc(x)` creates not only the FC operator and output variable y, but also two parameters, W and b.
+In particular, the first line `x = layer.data("images")` creates variable x and a Feed operator that copies a column from the minibatch to x.  `y = layer.fc(x)` creates not only the FC operator and output variable y, but also two parameters, W and b, and the initialization operators.
+
+Initialization operators are kind of "run-once" operators -- the `Run` method increments a class data member counter so to run at most once.  By doing so, a parameter wouldn't be initialized repeatedly, say, in every minibatch.
 
 In this example, all operators are created as `OpDesc` protobuf messages, and all variables are `VarDesc`.  These protobuf messages are saved in a `BlockDesc` protobuf message.
 
@@ -49,3 +53,18 @@ According to the chain rule of gradient computation, `ConstructBackwardGraph` wo
 For each parameter, like W and b created by `layer.fc`, marked as double circles in above graphs, `ConstructOptimizationGraph` creates an optimization operator to apply its gradient.  Here results in the complete graph:
 
 ![](images/graph_construction_example_all.png)
+
+## Block and Graph
+
+The word block and graph are interchangable in the desgin of PaddlePaddle.  A [Block[(https://github.com/PaddlePaddle/Paddle/pull/3708) is a metaphore of the code and local variables in a pair of curly braces in programming languages, where operators are like statements or instructions.  A graph of operators and variables is a representation of the block.
+
+A Block keeps operators in an array `BlockDesc::ops`
+
+```protobuf
+message BlockDesc {
+  repeated OpDesc ops = 1;
+  repeated VarDesc vars = 2;
+}
+```
+
+in the order that there appear in user programs, like the Python program at the beginning of this article.  We can imagine that in `ops`,  we have some forward operators, followed by some gradient operators, and then some optimization operators.
diff --git a/doc/design/images/graph_construction_example.dot b/doc/design/images/graph_construction_example.dot
index bedb6de0111a8ccab4030d034d65cf72705fc25a..8d1b673abf6b78c851676fa379dc850c4818f0e5 100644
--- a/doc/design/images/graph_construction_example.dot
+++ b/doc/design/images/graph_construction_example.dot
@@ -2,6 +2,8 @@ digraph ImageClassificationGraph {
         ///////// The forward part /////////
         FeedX [label="Feed", color=blue, shape=box];
         FeedY [label="Feed", color=blue, shape=box];
+        InitW [label="Init", color=blue, shape=diamond];
+        Initb [label="Init", color=blue, shape=diamond];
         FC [label="FC", color=blue, shape=box];
         MSE [label="MSE", color=blue, shape=box];
 
@@ -14,6 +16,8 @@ digraph ImageClassificationGraph {
 
         FeedX -> x -> FC -> y -> MSE -> cost [color=blue];
         FeedY -> l [color=blue];
+        InitW -> W [color=blue];
+        Initb -> b [color=blue];
         W -> FC [color=blue];
         b -> FC [color=blue];
         l -> MSE [color=blue];
diff --git a/doc/design/images/graph_construction_example_all.png b/doc/design/images/graph_construction_example_all.png
index 18d8330b60e12720bb993c8cf588d64ff8db1ea9..181187503472d15779b87284105841168b3945c4 100644
Binary files a/doc/design/images/graph_construction_example_all.png and b/doc/design/images/graph_construction_example_all.png differ
diff --git a/doc/design/images/graph_construction_example_forward_backward.png b/doc/design/images/graph_construction_example_forward_backward.png
index 61c3a02a04bc8891ab5b921a889829bcce386df8..3049a9315fd616464dec54e33064cb75598ca536 100644
Binary files a/doc/design/images/graph_construction_example_forward_backward.png and b/doc/design/images/graph_construction_example_forward_backward.png differ
diff --git a/doc/design/images/graph_construction_example_forward_only.png b/doc/design/images/graph_construction_example_forward_only.png
index 14805df11fc09f64d6bc17f5e969f1400d615148..25d19088cbf0b5f68cf734f2ff21eba8af4a2860 100644
Binary files a/doc/design/images/graph_construction_example_forward_only.png and b/doc/design/images/graph_construction_example_forward_only.png differ
diff --git a/paddle/framework/framework.proto b/paddle/framework/framework.proto
index 368136a9729dd2c745cc71bc391031e0a390fc87..dfcb5fb6210a08f35193b83e3b5f7cee92f618d7 100644
--- a/paddle/framework/framework.proto
+++ b/paddle/framework/framework.proto
@@ -87,3 +87,24 @@ message OpProto {
   repeated Attr attrs = 4;
   required string comment = 5;
 }
+
+enum DataType {
+  BOOL = 0;
+  INT16 = 1;
+  INT32 = 2;
+  INT64 = 3;
+  FP16 = 4;
+  FP32 = 5;
+  FP64 = 6;
+}
+
+message LoDTensorDesc {
+  required DataType data_type = 1;
+  repeated int32 dims = 2; // [UNK, 640, 480] is saved as [-1, 640, 480]
+  optional int32 lod_level = 3 [ default = 0 ];
+}
+
+message VarDesc {
+  required string name = 1;
+  optional LoDTensorDesc lod_tensor = 2;
+}
diff --git a/paddle/framework/lod_tensor.cc b/paddle/framework/lod_tensor.cc
index 71eac4a10b34c3010a2758120c25754af58f669d..908a1f2fd0abe0aa4016c72dbcbc18dcc144232c 100644
--- a/paddle/framework/lod_tensor.cc
+++ b/paddle/framework/lod_tensor.cc
@@ -19,8 +19,8 @@
 namespace paddle {
 namespace framework {
 
-LOD SliceLevels(const LOD& in, size_t level_begin, size_t level_end) {
-  LOD new_lod;
+LoD SliceLevels(const LoD& in, size_t level_begin, size_t level_end) {
+  LoD new_lod;
   new_lod.reserve(level_end - level_begin);
   for (size_t i = level_begin; i < level_end; i++) {
     new_lod.emplace_back(in.at(i));
@@ -28,10 +28,10 @@ LOD SliceLevels(const LOD& in, size_t level_begin, size_t level_end) {
   return new_lod;
 }
 
-LOD SliceInLevel(const LOD& in, size_t level, size_t elem_begin,
+LoD SliceInLevel(const LoD& in, size_t level, size_t elem_begin,
                  size_t elem_end) {
   // slice the lod.
-  LOD new_lod;
+  LoD new_lod;
   new_lod.reserve(in.size() - level);
   auto start = in.at(level)[elem_begin];
   auto end = in.at(level)[elem_end];
@@ -46,13 +46,13 @@ LOD SliceInLevel(const LOD& in, size_t level, size_t elem_begin,
     std::transform(new_lod.back().begin(), new_lod.back().end(),
                    new_lod.back().begin(),
                    [start](int v) { return v - start; });
-    PADDLE_ENFORCE_EQ(new_lod.back().front(), 0, "error in slice LOD");
+    PADDLE_ENFORCE_EQ(new_lod.back().front(), 0, "error in slice LoD");
   }
   PADDLE_ENFORCE_LE(new_lod.size(), in.size());
   return new_lod;
 }
 
-bool operator==(const LOD& a, const LOD& b) {
+bool operator==(const LoD& a, const LoD& b) {
   if (a.size() != b.size()) {
     return false;
   }
@@ -72,12 +72,12 @@ bool operator==(const LOD& a, const LOD& b) {
   return true;
 }
 
-void LODTensor::SliceLevels(size_t level_begin, size_t level_end) {
+void LoDTensor::SliceLevels(size_t level_begin, size_t level_end) {
   auto new_lod = framework::SliceLevels(lod_, level_begin, level_end);
   lod_ = new_lod;
 }
 
-void LODTensor::SliceInLevel(size_t level, size_t elem_begin, size_t elem_end) {
+void LoDTensor::SliceInLevel(size_t level, size_t elem_begin, size_t elem_end) {
   PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level,
                  NumLevels());
   PADDLE_ENFORCE(elem_begin < NumElements(level),
diff --git a/paddle/framework/lod_tensor.h b/paddle/framework/lod_tensor.h
index 9e6b6b4aca41ed464292b56bf6f2d27514f874f7..154068fef69bc96edbd85b731fe8091b3b1ff823 100644
--- a/paddle/framework/lod_tensor.h
+++ b/paddle/framework/lod_tensor.h
@@ -35,34 +35,34 @@ template <typename T>
 using Vector = thrust::host_vector<T>;
 #endif
 
-using LOD = std::vector<Vector<size_t>>;
+using LoD = std::vector<Vector<size_t>>;
 
-LOD SliceLevels(const LOD& in, size_t level_begin, size_t level_end);
+LoD SliceLevels(const LoD& in, size_t level_begin, size_t level_end);
 
-LOD SliceInLevel(const LOD& in, size_t level, size_t elem_begin,
+LoD SliceInLevel(const LoD& in, size_t level, size_t elem_begin,
                  size_t elem_end);
 
-bool operator==(const LOD& a, const LOD& b);
+bool operator==(const LoD& a, const LoD& b);
 
 /*
- * LODTensor (Level of details Tensor)
+ * LoDTensor (Level of details Tensor)
  * see https://en.wikipedia.org/wiki/Level_of_details for reference.
  */
-class LODTensor {
+class LoDTensor {
  public:
-  LODTensor() {}
-  LODTensor(const LOD& lod, Tensor* t) : lod_(lod), tensor_(t) {}
+  LoDTensor() {}
+  LoDTensor(const LoD& lod, Tensor* t) : lod_(lod), tensor_(t) {}
 
-  void set_lod(const LOD& lod) { lod_ = lod; }
+  void set_lod(const LoD& lod) { lod_ = lod; }
 
   void set_tensor(Tensor* tensor) { tensor_ = tensor; }
 
   Tensor& tensor() { return *tensor_; }
 
-  LOD lod() { return lod_; }
+  LoD lod() { return lod_; }
 
   /*
-   * Get a element from LOD.
+   * Get a element from LoD.
    */
   size_t lod_element(size_t level, size_t elem) const {
     PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level,
@@ -74,7 +74,7 @@ class LODTensor {
   }
 
   /*
-   * Number of LODTensor's levels, each level has units of data, for example,
+   * Number of LoDTensor's levels, each level has units of data, for example,
    * in the sentence's view, article, paragraph, sentence are 3 levels.
    */
   size_t NumLevels() const { return lod_.size(); }
@@ -100,7 +100,7 @@ class LODTensor {
   void SliceInLevel(size_t level, size_t elem_begin, size_t elem_end);
 
  private:
-  LOD lod_;
+  LoD lod_;
   Tensor* tensor_;  // not owned
 };
 }  // namespace framework
diff --git a/paddle/framework/lod_tensor.md b/paddle/framework/lod_tensor.md
index 8dfe3ee823084cb8c38550a82e761a741eabe135..769b61f175a2f462258c1242d027c04c0abd12a9 100644
--- a/paddle/framework/lod_tensor.md
+++ b/paddle/framework/lod_tensor.md
@@ -94,7 +94,7 @@ Let's go on slicing this slice.  Its <1,1>-slice is
 |||
 ```
 
-### The General Slicing Algorithm
+### The Slicing Algorithm
 
 The algorithm, with over-simplified data structure, is defined as
 
@@ -106,17 +106,41 @@ struct LoDTensor {
   float* tensor_;
 };
 
-LoDTensor Slice(const LoDTensor& lodt, int level, int sequence) {
+LoDTensor Slice(const LoDTensor& lodt, int level, int sequence);
+```
+
+Let us revisit the example above
 
-}
+```
+         3
+3           1  2
+3   2  4    1  2  3
+||| || |||| |  || |||
 ```
 
-### Slicing the Top Level
+Suppose that we want to retrieve the <1,2>-slice
 
-Please be aware that an RNN operator only slices the top level of a LoD Tensor to get the step inputs.
+```
+2
+2  3
+|| |||
+```
 
-```c++
-LoDTensor Slice(const LoDTensor& lodt, int sequence) {
+we will need to find out the starting position of this slice by summing over all leaf nodes in `LoD` to the left of the slice, i.e., 3 + 2 + 4 + 1 = 10.
+
+To avoid the traversal of the LoD tree at slcing time,  we can do it at the construction time -- instead of saving the lengths of the next level in the LoD tree, we can save the starting offset of the next level.  For example, above LoD Tensor can be transformed into
+
+```
+        0
+0           9  10
+0   3  5    9  10 12
+||| || |||| |  || |||
+```
+
+We don't really need the 0 on top, so the LoD Tensor could be
 
-}
+```
+0           9  10
+0   3  5    9  10 12
+||| || |||| |  || |||
 ```
diff --git a/paddle/framework/lod_tensor_test.cc b/paddle/framework/lod_tensor_test.cc
index 9a351605edb5013bdab2c6193bdd9ce401acc937..1da8553134f377f7a4fbe8008d12fe8d4a0e47f4 100644
--- a/paddle/framework/lod_tensor_test.cc
+++ b/paddle/framework/lod_tensor_test.cc
@@ -21,7 +21,7 @@
 namespace paddle {
 namespace framework {
 
-class LODTensorTester : public ::testing::Test {
+class LoDTensorTester : public ::testing::Test {
  public:
   virtual void SetUp() override {
     // tensor's batch_size: 30
@@ -29,7 +29,7 @@ class LODTensorTester : public ::testing::Test {
     // 0 10 20
     // 0 5 10 15 20
     // 0 2 5 7 10 12 15 20
-    LOD lod;
+    LoD lod;
     lod.push_back(std::vector<size_t>{0, 10, 20});
     lod.push_back(std::vector<size_t>{0, 5, 10, 15, 20});
     lod.push_back(std::vector<size_t>{0, 2, 5, 7, 10, 12, 15, 17, 20});
@@ -47,21 +47,21 @@ class LODTensorTester : public ::testing::Test {
  protected:
   platform::CPUPlace place;
   Tensor tensor;
-  LODTensor lod_tensor;
+  LoDTensor lod_tensor;
 };
 
-TEST_F(LODTensorTester, NumLevels) { ASSERT_EQ(lod_tensor.NumLevels(), 3UL); }
+TEST_F(LoDTensorTester, NumLevels) { ASSERT_EQ(lod_tensor.NumLevels(), 3UL); }
 
-TEST_F(LODTensorTester, NumElements) {
+TEST_F(LoDTensorTester, NumElements) {
   ASSERT_EQ(lod_tensor.NumElements(0), 2UL);
   ASSERT_EQ(lod_tensor.NumElements(1), 4UL);
   ASSERT_EQ(lod_tensor.NumElements(2), 8UL);
 }
 
-TEST_F(LODTensorTester, SliceLevels) {
+TEST_F(LoDTensorTester, SliceLevels) {
   // slice 1 level
   for (size_t level = 0; level < 3UL; ++level) {
-    LODTensor new_lod_tensor = lod_tensor;
+    LoDTensor new_lod_tensor = lod_tensor;
     new_lod_tensor.SliceLevels(level, level + 1);
     ASSERT_EQ(new_lod_tensor.NumLevels(), 1UL);
     ASSERT_EQ(new_lod_tensor.NumElements(0), lod_tensor.NumElements(level));
@@ -70,7 +70,7 @@ TEST_F(LODTensorTester, SliceLevels) {
   }
   // slice 2 level
   for (size_t level = 0; level < 2UL; ++level) {
-    LODTensor new_lod_tensor = lod_tensor;
+    LoDTensor new_lod_tensor = lod_tensor;
     new_lod_tensor.SliceLevels(level, level + 2);
     ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL);
     ASSERT_EQ(new_lod_tensor.NumElements(0), lod_tensor.NumElements(level));
@@ -80,9 +80,9 @@ TEST_F(LODTensorTester, SliceLevels) {
   }
 }
 
-TEST_F(LODTensorTester, SliceInLevel) {
+TEST_F(LoDTensorTester, SliceInLevel) {
   size_t level = 0;
-  LODTensor new_lod_tensor = lod_tensor;
+  LoDTensor new_lod_tensor = lod_tensor;
   new_lod_tensor.SliceInLevel(level, 0, 2);
   EXPECT_EQ(new_lod_tensor.NumLevels(), 3UL);
   EXPECT_EQ(new_lod_tensor.NumElements(0), 2UL);
diff --git a/paddle/framework/op_registry_test.cc b/paddle/framework/op_registry_test.cc
index b43f6a8cc56fdd2dc483bef303cf1213b171a5e4..0e2fb27b653e88846c71a025e694bfe3d4613641 100644
--- a/paddle/framework/op_registry_test.cc
+++ b/paddle/framework/op_registry_test.cc
@@ -80,7 +80,7 @@ TEST(OpRegistry, CreateOp) {
   paddle::framework::Scope scope;
   paddle::platform::CPUDeviceContext dev_ctx;
   op->Run(scope, dev_ctx);
-  float scale_get = op->GetAttr<float>("scale");
+  float scale_get = op->Attr<float>("scale");
   ASSERT_EQ(scale_get, scale);
 }
 
@@ -121,7 +121,7 @@ TEST(OpRegistry, DefaultValue) {
   paddle::framework::Scope scope;
   paddle::platform::CPUDeviceContext dev_ctx;
   op->Run(scope, dev_ctx);
-  ASSERT_EQ(op->GetAttr<float>("scale"), 1.0);
+  ASSERT_EQ(op->Attr<float>("scale"), 1.0);
 }
 
 TEST(OpRegistry, CustomChecker) {
@@ -172,6 +172,6 @@ TEST(OpRegistry, CustomChecker) {
   paddle::platform::CPUDeviceContext dev_ctx;
   paddle::framework::Scope scope;
   op->Run(scope, dev_ctx);
-  int test_attr = op->GetAttr<int>("test_attr");
+  int test_attr = op->Attr<int>("test_attr");
   ASSERT_EQ(test_attr, 4);
 }
\ No newline at end of file
diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h
index da92220b04e313e4743cc77241755b685d0791ad..9a98d4d3be0d1cb875d614b263f1e4365ede4113 100644
--- a/paddle/framework/operator.h
+++ b/paddle/framework/operator.h
@@ -69,7 +69,7 @@ class OperatorBase {
   virtual ~OperatorBase() {}
 
   template <typename T>
-  inline const T& GetAttr(const std::string& name) const {
+  inline const T& Attr(const std::string& name) const {
     PADDLE_ENFORCE(attrs_.count(name) != 0, "%s should be in AttributeMap",
                    name);
     return boost::get<T>(attrs_.at(name));
@@ -238,8 +238,8 @@ class InferShapeContext {
   const Scope& scope() const { return scope_; }
 
   template <typename T>
-  inline const T& GetAttr(const std::string& name) const {
-    return op_.GetAttr<T>(name);
+  inline const T& Attr(const std::string& name) const {
+    return op_.Attr<T>(name);
   }
 
   size_t InputSize(const std::string& name) const {
diff --git a/paddle/operators/gaussian_random_op.cc b/paddle/operators/gaussian_random_op.cc
index 8bb61275badfccec49953015a47b87b0879153bf..6574880c0eb6324b2dd175e39a364d2ef46e735e 100644
--- a/paddle/operators/gaussian_random_op.cc
+++ b/paddle/operators/gaussian_random_op.cc
@@ -19,12 +19,12 @@ template <typename T>
 class CPUGaussianRandomKernel : public framework::OpKernel {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
-    float mean = context.GetAttr<float>("mean");
-    float std = context.GetAttr<float>("std");
+    float mean = context.Attr<float>("mean");
+    float std = context.Attr<float>("std");
     auto* tensor = context.Output<framework::Tensor>("Out");
     T* data = tensor->mutable_data<T>(context.GetPlace());
 
-    unsigned int seed = static_cast<unsigned int>(context.GetAttr<int>("seed"));
+    unsigned int seed = static_cast<unsigned int>(context.Attr<int>("seed"));
     std::minstd_rand engine;
     if (seed == 0) {
       seed = std::random_device()();
@@ -45,7 +45,7 @@ class GaussianRandomOp : public framework::OperatorWithKernel {
  protected:
   void InferShape(const framework::InferShapeContext& context) const override {
     auto* tensor = context.Output<framework::Tensor>("Out");
-    auto dims = GetAttr<std::vector<int>>("dims");
+    auto dims = Attr<std::vector<int>>("dims");
     std::vector<int64_t> temp;
     temp.reserve(dims.size());
     for (auto dim : dims) {
diff --git a/paddle/operators/gaussian_random_op.cu b/paddle/operators/gaussian_random_op.cu
index 833a82bbf293a0892531283dc681ca2edd72f6a1..d9dbc1dcfe6a6676938d64be93c879ea69148018 100644
--- a/paddle/operators/gaussian_random_op.cu
+++ b/paddle/operators/gaussian_random_op.cu
@@ -42,13 +42,13 @@ class GPUGaussianRandomKernel : public framework::OpKernel {
   void Compute(const framework::ExecutionContext& context) const override {
     auto* tensor = context.Output<framework::Tensor>("Out");
     T* data = tensor->mutable_data<T>(context.GetPlace());
-    unsigned int seed = static_cast<unsigned int>(context.GetAttr<int>("seed"));
+    unsigned int seed = static_cast<unsigned int>(context.Attr<int>("seed"));
     if (seed == 0) {
       std::random_device rd;
       seed = rd();
     }
-    T mean = static_cast<T>(context.GetAttr<float>("mean"));
-    T std = static_cast<T>(context.GetAttr<float>("std"));
+    T mean = static_cast<T>(context.Attr<float>("mean"));
+    T std = static_cast<T>(context.Attr<float>("std"));
     thrust::counting_iterator<unsigned int> index_sequence_begin(0);
     ssize_t N = framework::product(tensor->dims());
     thrust::transform(index_sequence_begin, index_sequence_begin + N,
diff --git a/paddle/operators/rnn/recurrent_op_utils.cc b/paddle/operators/rnn/recurrent_op_utils.cc
index 69e723b4019fe553426bafbf02b3334ea4acfcf1..97872c67ac99fbf6c9c177d52f1d4069163e8548 100644
--- a/paddle/operators/rnn/recurrent_op_utils.cc
+++ b/paddle/operators/rnn/recurrent_op_utils.cc
@@ -109,7 +109,7 @@ void InitArgument(const ArgumentName& name, Argument* arg,
   arg->step_scopes = op.Output(name.step_scopes);
 
   auto inlinks = op.Inputs(name.inlinks);
-  auto inlink_alias = op.GetAttr<std::vector<std::string>>(name.inlink_alias);
+  auto inlink_alias = op.Attr<std::vector<std::string>>(name.inlink_alias);
   PADDLE_ENFORCE(inlinks.size() == inlink_alias.size(),
                  "the size of inlinks and inlink_alias don't match:%d,%d",
                  inlinks.size(), inlink_alias.size());
@@ -121,7 +121,7 @@ void InitArgument(const ArgumentName& name, Argument* arg,
   }
 
   auto outlinks = op.Outputs(name.outlinks);
-  auto outlink_alias = op.GetAttr<std::vector<std::string>>(name.outlink_alias);
+  auto outlink_alias = op.Attr<std::vector<std::string>>(name.outlink_alias);
   PADDLE_ENFORCE(outlinks.size() == outlink_alias.size(),
                  "the size of outlinks and outlink_alias don't match:%d,%d",
                  outlinks.size(), outlink_alias.size());
@@ -135,8 +135,8 @@ void InitArgument(const ArgumentName& name, Argument* arg,
   auto boot_memories = op.Inputs(name.boot_memories);
 
   // attributes
-  auto memories = op.GetAttr<std::vector<std::string>>(name.memories);
-  auto pre_memories = op.GetAttr<std::vector<std::string>>(name.pre_memories);
+  auto memories = op.Attr<std::vector<std::string>>(name.memories);
+  auto pre_memories = op.Attr<std::vector<std::string>>(name.pre_memories);
 
   PADDLE_ENFORCE(memories.size() == boot_memories.size(),
                  "the size of memories, boot_memories don't match:%d,%d",
diff --git a/paddle/operators/scale_op.cc b/paddle/operators/scale_op.cc
index 005152ed71f79a5a592b942ebe7ce1d460892a55..3d82b345829b0a554a204ada91c807e42b71dc58 100644
--- a/paddle/operators/scale_op.cc
+++ b/paddle/operators/scale_op.cc
@@ -60,7 +60,7 @@ class ScaleGradOp : public NetOp {
     AppendOp(framework::OpRegistry::CreateOp(
         "scale", {{"X", {Input(framework::GradVarName("Out"))}}},
         {{"Out", {Output(framework::GradVarName("X"))}}},
-        {{"scale", GetAttr<AttrType>("scale")}}));
+        {{"scale", Attr<AttrType>("scale")}}));
     CompleteAddOp(false);
   }
 };
diff --git a/paddle/operators/scale_op.h b/paddle/operators/scale_op.h
index 65fb77eefad812fa52ac053b791ba1b8f480375f..02fbdc52bbf89c9f2acc5eeaa1197e4ccbca9d31 100644
--- a/paddle/operators/scale_op.h
+++ b/paddle/operators/scale_op.h
@@ -27,7 +27,7 @@ class ScaleKernel : public framework::OpKernel {
     auto* in = context.Input<framework::Tensor>("X");
     tensor->mutable_data<T>(in->place());
 
-    auto scale = static_cast<T>(context.GetAttr<AttrType>("scale"));
+    auto scale = static_cast<T>(context.Attr<AttrType>("scale"));
 
     auto eigen_out = framework::EigenVector<T>::Flatten(*tensor);
     auto eigen_in = framework::EigenVector<T>::Flatten(*in);
diff --git a/paddle/operators/sgd_op.h b/paddle/operators/sgd_op.h
index 8422b622ee54ba76fb98b7dacfa9618031c1c88c..f8888f9c362e1c39af42236bb3a23be37aa3ae15 100644
--- a/paddle/operators/sgd_op.h
+++ b/paddle/operators/sgd_op.h
@@ -31,7 +31,7 @@ class SGDOpKernel : public framework::OpKernel {
     auto param = ctx.Input<Tensor>("param");
     auto grad = ctx.Input<Tensor>("grad");
     auto param_out = ctx.Output<Tensor>("param_out");
-    float lr = ctx.GetAttr<float>("learning_rate");
+    float lr = ctx.Attr<float>("learning_rate");
 
     param_out->mutable_data<T>(ctx.GetPlace());
 
diff --git a/paddle/operators/uniform_random_op.cc b/paddle/operators/uniform_random_op.cc
index 40cef8942a3648af5629e5a5db0f021ae3d6f1c1..f2aeef6c310df8535e67fa3906301a87f8ec4694 100644
--- a/paddle/operators/uniform_random_op.cc
+++ b/paddle/operators/uniform_random_op.cc
@@ -26,15 +26,15 @@ class CPUUniformRandomKernel : public framework::OpKernel {
   void Compute(const framework::ExecutionContext& context) const override {
     auto* tensor = context.Output<framework::Tensor>("Out");
     T* data = tensor->mutable_data<T>(context.GetPlace());
-    unsigned int seed = static_cast<unsigned int>(context.GetAttr<int>("seed"));
+    unsigned int seed = static_cast<unsigned int>(context.Attr<int>("seed"));
     std::minstd_rand engine;
     if (seed == 0) {
       seed = std::random_device()();
     }
     engine.seed(seed);
     std::uniform_real_distribution<T> dist(
-        static_cast<T>(context.GetAttr<float>("min")),
-        static_cast<T>(context.GetAttr<float>("max")));
+        static_cast<T>(context.Attr<float>("min")),
+        static_cast<T>(context.Attr<float>("max")));
     int64_t size = framework::product(tensor->dims());
     for (int64_t i = 0; i < size; ++i) {
       data[i] = dist(engine);
@@ -48,10 +48,10 @@ class UniformRandomOp : public framework::OperatorWithKernel {
 
  protected:
   void InferShape(const framework::InferShapeContext& ctx) const override {
-    PADDLE_ENFORCE(GetAttr<float>("min") < GetAttr<float>("max"),
+    PADDLE_ENFORCE(Attr<float>("min") < Attr<float>("max"),
                    "uniform_random's min must less then max");
     auto* tensor = ctx.Output<framework::Tensor>("Out");
-    auto dims = GetAttr<std::vector<int>>("dims");
+    auto dims = Attr<std::vector<int>>("dims");
     std::vector<int64_t> temp;
     temp.reserve(dims.size());
     for (auto dim : dims) {
diff --git a/paddle/operators/uniform_random_op.cu b/paddle/operators/uniform_random_op.cu
index df993c07794b0b2408e4edc8a45fae9a17aef01c..c2c041b144b6ca1f019f972e1301b756ec1c9301 100644
--- a/paddle/operators/uniform_random_op.cu
+++ b/paddle/operators/uniform_random_op.cu
@@ -45,13 +45,13 @@ class GPUUniformRandomKernel : public framework::OpKernel {
   void Compute(const framework::ExecutionContext& context) const override {
     auto* tensor = context.Output<framework::Tensor>("Out");
     T* data = tensor->mutable_data<T>(context.GetPlace());
-    unsigned int seed = static_cast<unsigned int>(context.GetAttr<int>("seed"));
+    unsigned int seed = static_cast<unsigned int>(context.Attr<int>("seed"));
     if (seed == 0) {
       std::random_device rd;
       seed = rd();
     }
-    T min = static_cast<T>(context.GetAttr<float>("min"));
-    T max = static_cast<T>(context.GetAttr<float>("max"));
+    T min = static_cast<T>(context.Attr<float>("min"));
+    T max = static_cast<T>(context.Attr<float>("max"));
     thrust::counting_iterator<unsigned int> index_sequence_begin(0);
     ssize_t N = framework::product(tensor->dims());
     thrust::transform(index_sequence_begin, index_sequence_begin + N,
diff --git a/paddle/platform/cudnn_helper.h b/paddle/platform/cudnn_helper.h
index 24ddf3441caa6e5f45a7b96af26a23ed324dc1b6..2841d2a2dbec5c17ef098a06c976ca01247820f5 100644
--- a/paddle/platform/cudnn_helper.h
+++ b/paddle/platform/cudnn_helper.h
@@ -14,6 +14,7 @@ limitations under the License. */
 
 #pragma once
 
+#include <vector>
 #include "paddle/platform/dynload/cudnn.h"
 #include "paddle/platform/enforce.h"
 #include "paddle/platform/macros.h"