diff --git a/paddle/framework/block_desc.cc b/paddle/framework/block_desc.cc
index 0668b08ff7ab3c8ca4f1e989fc7af45a8ec5f63c..54498e175dacfa0a220e3d839f4feb02502b2c03 100644
--- a/paddle/framework/block_desc.cc
+++ b/paddle/framework/block_desc.cc
@@ -53,12 +53,12 @@ VarDesc *BlockDesc::FindVarRecursive(const std::string &name) const {
   return it->second.get();
 }
 
-VarDesc *BlockDesc::FindRecursiveOrCreateVar(const std::string &name_bytes) {
+VarDesc &BlockDesc::FindRecursiveOrCreateVar(const std::string &name_bytes) {
   VarDesc *res = FindVarRecursive(name_bytes);
   if (res == nullptr) {
     res = Var(name_bytes);
   }
-  return res;
+  return *res;
 }
 
 bool BlockDesc::HasVarRecursive(const std::string &name) const {
diff --git a/paddle/framework/block_desc.h b/paddle/framework/block_desc.h
index 6c8c81b332d99e52db41018e117aa837be6745bc..4b609e4bcb67bb8dda5924a639e7a8165eda4353 100644
--- a/paddle/framework/block_desc.h
+++ b/paddle/framework/block_desc.h
@@ -57,7 +57,7 @@ class BlockDesc {
 
   VarDesc *FindVarRecursive(const std::string &name_bytes) const;
 
-  VarDesc *FindRecursiveOrCreateVar(const std::string &name_bytes);
+  VarDesc &FindRecursiveOrCreateVar(const std::string &name_bytes);
 
   bool HasVarRecursive(const std::string &var_name) const;
 
diff --git a/paddle/framework/executor.cc b/paddle/framework/executor.cc
index 844d98916ea5b1ffd88615825d79af37ba7d128e..c0418c9266e257bd7567861543e557f354451b17 100644
--- a/paddle/framework/executor.cc
+++ b/paddle/framework/executor.cc
@@ -21,6 +21,7 @@ limitations under the License. */
 #include "paddle/framework/lod_rank_table.h"
 #include "paddle/framework/lod_tensor_array.h"
 #include "paddle/framework/op_registry.h"
+#include "paddle/platform/place.h"
 
 DEFINE_bool(check_nan_inf, false,
             "Checking whether operator produce NAN/INF or not. It will be "
@@ -49,10 +50,13 @@ static void CreateTensor(Variable* var, proto::VarDesc::VarType var_type) {
     var->GetMutable<LoDRankTable>();
   } else if (var_type == proto::VarDesc::LOD_TENSOR_ARRAY) {
     var->GetMutable<LoDTensorArray>();
+  } else if (var_type == proto::VarDesc::PLACE_LIST) {
+    var->GetMutable<platform::PlaceList>();
   } else {
     PADDLE_THROW(
         "Variable type %d is not in "
-        "[LoDTensor, SelectedRows, FEED_MINIBATCH, FETCH_LIST, LOD_RANK_TABLE]",
+        "[LoDTensor, SelectedRows, FEED_MINIBATCH, FETCH_LIST, LOD_RANK_TABLE,"
+        " PLACE_LIST]",
         var_type);
   }
 }
diff --git a/paddle/framework/framework.proto b/paddle/framework/framework.proto
index 4f2746e4b86ee5fe095897ff6ef9d3f6473e8a14..ea69b87e2ac7dc587333b623c310182bb39eb452 100644
--- a/paddle/framework/framework.proto
+++ b/paddle/framework/framework.proto
@@ -123,6 +123,7 @@ message VarDesc {
     STEP_SCOPES = 5;
     LOD_RANK_TABLE = 6;
     LOD_TENSOR_ARRAY = 7;
+    PLACE_LIST = 8;
   }
   required string name = 1;
   required VarType type = 2;
diff --git a/paddle/framework/op_desc.cc b/paddle/framework/op_desc.cc
index 47c91290e4bf90897d35f1b3bce2e1f10ad0782c..1c0372bb16c04e155a68a0411939e4887322107a 100644
--- a/paddle/framework/op_desc.cc
+++ b/paddle/framework/op_desc.cc
@@ -384,7 +384,7 @@ void OpDesc::InferVarType(BlockDesc *block) const {
     for (auto &out_pair : this->outputs_) {
       for (auto &out_var_name : out_pair.second) {
         block->FindRecursiveOrCreateVar(out_var_name)
-            ->SetType(proto::VarDesc::LOD_TENSOR);
+            .SetType(proto::VarDesc::LOD_TENSOR);
       }
     }
   }
diff --git a/paddle/operators/get_places_op.cc b/paddle/operators/get_places_op.cc
new file mode 100644
index 0000000000000000000000000000000000000000..291bbbcb3a736a0b0b7ac9cff211896ef1e7a49c
--- /dev/null
+++ b/paddle/operators/get_places_op.cc
@@ -0,0 +1,114 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <thread>
+#include "paddle/framework/op_registry.h"
+#include "paddle/operators/detail/safe_ref.h"
+#include "paddle/platform/place.h"
+#ifdef PADDLE_WITH_CUDA
+#include "paddle/platform/gpu_info.h"
+#endif
+
+namespace paddle {
+namespace operators {
+
+static size_t CUDADevCount() {
+#ifdef PADDLE_WITH_CUDA
+  return platform::GetCUDADeviceCount();
+#else
+  return 0UL;
+#endif
+}
+
+class GetPlacesOp : public framework::OperatorBase {
+ public:
+  GetPlacesOp(const std::string &type, const framework::VariableNameMap &inputs,
+              const framework::VariableNameMap &outputs,
+              const framework::AttributeMap &attrs)
+      : OperatorBase(type, inputs, outputs, attrs) {}
+  void Run(const framework::Scope &scope,
+           const platform::Place &place) const override {
+    std::string device_type = Attr<std::string>("device_type");
+    auto device_count = static_cast<size_t>(Attr<int>("device_count"));
+    if (device_count == 0) {
+      if (device_type == "CUDA") {
+        device_count = CUDADevCount();
+      } else if (device_type == "CPU") {
+        device_count = std::thread::hardware_concurrency();
+      }
+    }
+    PADDLE_ENFORCE_NE(device_count, 0, "Cannot indicate %s device count",
+                      device_type);
+
+    auto out_var_name = Output("Out");
+    auto &places =
+        *(detail::Ref(scope.FindVar(out_var_name),
+                      "Output variable %s cannot be found", out_var_name)
+              .GetMutable<platform::PlaceList>());
+    places.reserve(device_count);
+    if (device_type == "CUDA") {
+      PADDLE_ENFORCE_LE(device_count, CUDADevCount(),
+                        "Only %d CUDA devices found, cannot set to %d",
+                        CUDADevCount(), device_count);
+      for (size_t i = 0; i < device_count; ++i) {
+        places.emplace_back(platform::CUDAPlace(i));
+      }
+    } else if (device_type == "CPU") {
+      for (size_t i = 0; i < device_count; ++i) {
+        places.emplace_back(platform::CPUPlace());
+      }
+    }
+  }
+};
+
+class GetPlacesOpProtoMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  GetPlacesOpProtoMaker(OpProto *proto, OpAttrChecker *op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddOutput("Out", "vector of Place");
+    AddAttr<int>("device_count", "device count").SetDefault(1);
+    AddAttr<std::string>("device_type",
+                         R"(device type must be in ["CPU", "CUDA"])")
+        .InEnum({"CPU", "CUDA"});
+    AddComment(R"DOC(
+Returns a list of places based on flags. The list will be used for parallel
+execution.
+)DOC");
+  }
+};
+
+class GetPlacesInferVarType : public framework::VarTypeInference {
+ public:
+  void operator()(const framework::OpDesc &op_desc,
+                  framework::BlockDesc *block) const override {
+    for (auto &o_name : op_desc.Output("Out")) {
+      block->FindRecursiveOrCreateVar(o_name).SetType(
+          framework::proto::VarDesc::PLACE_LIST);
+    }
+  }
+};
+
+class GetPlacesInferShape : public framework::InferShapeBase {
+ public:
+  void operator()(framework::InferShapeContext *context) const override {
+    // Do nothing
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+namespace ops = paddle::operators;
+
+REGISTER_OPERATOR(get_places, ops::GetPlacesOp, ops::GetPlacesOpProtoMaker,
+                  ops::GetPlacesInferVarType, ops::GetPlacesInferShape);
diff --git a/paddle/operators/lod_rank_table_op.cc b/paddle/operators/lod_rank_table_op.cc
index 8711dd62c886fdada083d316d6aabc93a050ff82..692b9bf3710d764eceafda8390eedb8590794ddf 100644
--- a/paddle/operators/lod_rank_table_op.cc
+++ b/paddle/operators/lod_rank_table_op.cc
@@ -66,7 +66,7 @@ class LoDRankTableInferVarType : public framework::VarTypeInference {
   void operator()(const framework::OpDesc &op_desc,
                   framework::BlockDesc *block) const override {
     for (auto &o : op_desc.Output("Out")) {
-      block->FindRecursiveOrCreateVar(o)->SetType(
+      block->FindRecursiveOrCreateVar(o).SetType(
           framework::proto::VarDesc::LOD_RANK_TABLE);
     }
   }
diff --git a/paddle/operators/sum_op.cc b/paddle/operators/sum_op.cc
index a4c08430d85ae418ec6a0c0e8e954415711cd23f..88ed67f7ba2527e1d91e6bb30762d5dcf818761d 100644
--- a/paddle/operators/sum_op.cc
+++ b/paddle/operators/sum_op.cc
@@ -122,17 +122,17 @@ class SumOpVarTypeInference : public framework::VarTypeInference {
 
     for (auto& name : op_desc.Input("X")) {
       VLOG(10) << name << " "
-               << block->FindRecursiveOrCreateVar(name)->GetType();
+               << block->FindRecursiveOrCreateVar(name).GetType();
     }
 
     bool any_input_is_lod_tensor = std::any_of(
         inputs.begin(), inputs.end(), [block](const std::string& name) {
-          return block->FindRecursiveOrCreateVar(name)->GetType() ==
+          return block->FindRecursiveOrCreateVar(name).GetType() ==
                  framework::proto::VarDesc::LOD_TENSOR;
         });
 
     auto is_tensor_array = [block](const std::string& name) {
-      return detail::Ref(block->FindRecursiveOrCreateVar(name)).GetType() ==
+      return block->FindRecursiveOrCreateVar(name).GetType() ==
              framework::proto::VarDesc::LOD_TENSOR_ARRAY;
     };
 
@@ -146,8 +146,7 @@ class SumOpVarTypeInference : public framework::VarTypeInference {
         std::ostringstream os;
         for (auto& each : inputs) {
           os << "    " << each << " type is "
-             << detail::Ref(block->FindRecursiveOrCreateVar(each)).GetType()
-             << "\n";
+             << block->FindRecursiveOrCreateVar(each).GetType() << "\n";
         }
         PADDLE_ENFORCE(all_inputs_are_tensor_array,
                        "Not all inputs are tensor array:\n%s", os.str());
@@ -158,7 +157,7 @@ class SumOpVarTypeInference : public framework::VarTypeInference {
     }
 
     auto out_var_name = op_desc.Output("Out").front();
-    auto& out_var = detail::Ref(block->FindRecursiveOrCreateVar(out_var_name));
+    auto& out_var = block->FindRecursiveOrCreateVar(out_var_name);
     out_var.SetType(var_type);
     auto& in_var = detail::Ref(block->FindVarRecursive(inputs.front()));
     out_var.SetDataType(in_var.GetDataType());
diff --git a/paddle/operators/tensor_array_read_write_op.cc b/paddle/operators/tensor_array_read_write_op.cc
index d5ff3e3fce29b1a888b2cd4d307c2655669e3e4c..a6dceb2e3a130dc61f3cbaf35e310c5b58edb916 100644
--- a/paddle/operators/tensor_array_read_write_op.cc
+++ b/paddle/operators/tensor_array_read_write_op.cc
@@ -106,8 +106,7 @@ class WriteToArrayInferVarType : public framework::VarTypeInference {
     auto x_name = op_desc.Input("X")[0];
     auto out_name = op_desc.Output("Out")[0];
     VLOG(10) << "Set Variable " << out_name << " as LOD_TENSOR_ARRAY";
-    auto &out = detail::Ref(block->FindRecursiveOrCreateVar(out_name),
-                            "Cannot found %s", out_name);
+    auto &out = block->FindRecursiveOrCreateVar(out_name);
     out.SetType(framework::proto::VarDesc::LOD_TENSOR_ARRAY);
     auto *x = block->FindVarRecursive(x_name);
     if (x != nullptr) {
diff --git a/paddle/platform/place.h b/paddle/platform/place.h
index ba32dd3be6199371bf4624be60a2955a320dc22c..fbb43fa043a44c302e6b1cc67d83c18d791f07c5 100644
--- a/paddle/platform/place.h
+++ b/paddle/platform/place.h
@@ -52,6 +52,8 @@ struct IsCUDAPlace : public boost::static_visitor<bool> {
 
 typedef boost::variant<CUDAPlace, CPUPlace> Place;
 
+using PlaceList = std::vector<Place>;
+
 void set_place(const Place &);
 const Place &get_place();
 
diff --git a/paddle/pybind/protobuf.cc b/paddle/pybind/protobuf.cc
index 564a3700011c1b0c294c27af4b86ab967a0f0e1e..4f959481537d29c089be24f9ae306f860c196c0f 100644
--- a/paddle/pybind/protobuf.cc
+++ b/paddle/pybind/protobuf.cc
@@ -231,7 +231,8 @@ void BindVarDsec(py::module &m) {
       .value("FETCH_LIST", proto::VarDesc::FETCH_LIST)
       .value("STEP_SCOPES", proto::VarDesc::STEP_SCOPES)
       .value("LOD_RANK_TABLE", proto::VarDesc::LOD_RANK_TABLE)
-      .value("LOD_TENSOR_ARRAY", proto::VarDesc::LOD_TENSOR_ARRAY);
+      .value("LOD_TENSOR_ARRAY", proto::VarDesc::LOD_TENSOR_ARRAY)
+      .value("PLACE_LIST", proto::VarDesc::PLACE_LIST);
 }
 
 void BindOpDesc(py::module &m) {
diff --git a/python/paddle/v2/fluid/layers/__init__.py b/python/paddle/v2/fluid/layers/__init__.py
index 249f570e13b7a1b50397fb971d1c6f77e0359a5e..50ac0aba01a4079e7caf49d552c9361977aaa65d 100644
--- a/python/paddle/v2/fluid/layers/__init__.py
+++ b/python/paddle/v2/fluid/layers/__init__.py
@@ -8,6 +8,8 @@ import tensor
 from tensor import *
 import control_flow
 from control_flow import *
+import device
+from device import *
 
 __all__ = []
 __all__ += nn.__all__
@@ -15,3 +17,4 @@ __all__ += io.__all__
 __all__ += tensor.__all__
 __all__ += control_flow.__all__
 __all__ += ops.__all__
+__all__ += device.__all__
diff --git a/python/paddle/v2/fluid/layers/device.py b/python/paddle/v2/fluid/layers/device.py
new file mode 100644
index 0000000000000000000000000000000000000000..c2355ed802000f4659147b103aee61e023bd847c
--- /dev/null
+++ b/python/paddle/v2/fluid/layers/device.py
@@ -0,0 +1,22 @@
+"""
+All util layers.
+"""
+
+from ..layer_helper import LayerHelper
+from ..framework import unique_name
+
+__all__ = ['get_places']
+
+
+def get_places(device_count=0, device_type="CPU"):
+    helper = LayerHelper('get_places', **locals())
+    out_places = helper.create_variable(name=unique_name(helper.name + ".out"))
+    helper.append_op(
+        type='get_places',
+        outputs={"Out": [out_places]},
+        attrs={
+            "device_type": device_type,
+            'device_count': device_count,
+        })
+
+    return out_places
diff --git a/python/paddle/v2/fluid/tests/test_get_places_op.py b/python/paddle/v2/fluid/tests/test_get_places_op.py
new file mode 100644
index 0000000000000000000000000000000000000000..c4346f6786c096026fa9cbd55fbd44c68f2f9981
--- /dev/null
+++ b/python/paddle/v2/fluid/tests/test_get_places_op.py
@@ -0,0 +1,17 @@
+import paddle.v2.fluid as fluid
+import decorators
+import unittest
+
+
+class TestGetPlaces(unittest.TestCase):
+    @decorators.prog_scope()
+    def test_get_places(self):
+        places = fluid.layers.get_places()
+        cpu = fluid.CPUPlace()
+        exe = fluid.Executor(cpu)
+        exe.run(fluid.default_main_program())
+        self.assertEqual(places.type, fluid.core.VarDesc.VarType.PLACE_LIST)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/v2/fluid/tests/test_layers.py b/python/paddle/v2/fluid/tests/test_layers.py
index 77f0f11f1bcd5fa88700a33eec5a2abc2666ed02..a56277d216c28ddeb752a0aad46daca305a685e4 100644
--- a/python/paddle/v2/fluid/tests/test_layers.py
+++ b/python/paddle/v2/fluid/tests/test_layers.py
@@ -196,6 +196,13 @@ class TestBook(unittest.TestCase):
             self.assertIsNotNone(layers.sequence_softmax(x=seq))
         print(str(program))
 
+    def test_get_places(self):
+        program = Program()
+        with program_guard(program):
+            x = layers.get_places(device_count=4)
+            self.assertIsNotNone(x)
+        print(str(program))
+
 
 if __name__ == '__main__':
     unittest.main()