Polish code

test=develop

Polish code
test=develop
289aba75 · Paddle CI · minqiyang · cf7229d2 · 289aba75 · 289aba75
9 changed file
--- a/paddle/fluid/imperative/layer.cc
+++ b/paddle/fluid/imperative/layer.cc
@@ -49,8 +49,7 @@ class TensorAddToFunctor : public boost::static_visitor<> {
  void operator()(const platform::CPUPlace& place) {
    platform::CPUDeviceContext* ctx = dynamic_cast<platform::CPUDeviceContext*>(
        platform::DeviceContextPool::Instance().Get(place));
-    auto blas =
-        operators::math::GetBlas<platform::CPUDeviceContext, float>(*ctx);
+    auto blas = operators::math::GetBlas<platform::CPUDeviceContext, T>(*ctx);
    blas.AXPY(numel_, 1., x_, y_);
  }

@@ -59,8 +58,7 @@ class TensorAddToFunctor : public boost::static_visitor<> {
    platform::CUDADeviceContext* ctx =
        dynamic_cast<platform::CUDADeviceContext*>(
            platform::DeviceContextPool::Instance().Get(place));
-    auto blas =
-        operators::math::GetBlas<platform::CUDADeviceContext, float>(*ctx);
+    auto blas = operators::math::GetBlas<platform::CUDADeviceContext, T>(*ctx);
    blas.AXPY(numel_, 1., x_, y_);
  }
 #else
@@ -82,7 +80,7 @@ class TensorAddToFunctor : public boost::static_visitor<> {

 }  // namespace detail

-void AddGradTo(Variable* src, Variable* dst, platform::Place place) {
+void AddTo(Variable* src, Variable* dst, platform::Place place) {
  framework::Tensor* dst_tensor = dst->GetMutable<framework::LoDTensor>();
  framework::Tensor* src_tensor = src->GetMutable<framework::LoDTensor>();

@@ -170,27 +168,34 @@ class Autograd {
  }
 };

-framework::LoDTensor* VarBase::CopiedTensor() const {
+VarBase* VarBase::NewVarBase(const platform::Place& dst_place,
+                             const bool blocking) const {
  PADDLE_ENFORCE(var_->IsInitialized(),
                 "Variable must be initialized when getting numpy tensor");
-  platform::Place place = var_->Get<framework::LoDTensor>().place();
-  framework::LoDTensor* result = new framework::LoDTensor();
-  result->Resize(var_->Get<framework::LoDTensor>().dims());
-  result->set_lod(var_->Get<framework::LoDTensor>().lod());
-  if (platform::is_gpu_place(place)) {
-    VLOG(3) << "fetch tensor " << var_desc_->Name() << " from gpu";

-    framework::TensorCopy(var_->Get<framework::LoDTensor>(),
-                          platform::CPUPlace(), result);
+  VarBase* new_var = new VarBase();
+  framework::LoDTensor* tensor =
+      new_var->var_->GetMutable<framework::LoDTensor>();
+  tensor->Resize(var_->Get<framework::LoDTensor>().dims());
+  tensor->set_lod(var_->Get<framework::LoDTensor>().lod());

+  if (blocking) {
    platform::DeviceContext* dev_ctx =
-        platform::DeviceContextPool::Instance().Get(place);
+        platform::DeviceContextPool::Instance().Get(dst_place);
+
+    framework::TensorCopySync(var_->Get<framework::LoDTensor>(), dst_place,
+                              tensor);
+
    dev_ctx->Wait();
  } else {
-    TensorCopy(var_->Get<framework::LoDTensor>(), platform::CPUPlace(), result);
+    framework::TensorCopy(var_->Get<framework::LoDTensor>(), dst_place, tensor);
+  }
+
+  if (platform::is_gpu_place(dst_place)) {
+    VLOG(3) << "copy tensor " << var_desc_->Name() << " from gpu";
  }

-  return result;
+  return new_var;
 }

 framework::LoDTensor& VarBase::GradValue() {
@@ -235,7 +240,7 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
    PADDLE_ENFORCE_NOT_NULL(op_kernel, "only support op with kernel");

    framework::Scope scope;
-    platform::Place place = expected_place_;
+    platform::Place place = place_;
    PreparedOp p = PreparedOp::Prepare(ctx, *op_kernel, place);
    p.op.RuntimeInferShape(scope, place, ctx);
    p.func(framework::ExecutionContext(p.op, scope, *p.dev_ctx, p.ctx));
@@ -249,7 +254,7 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
    for (size_t i = 0; i < outputs.size(); ++i) {
      framework::Variable* grad = outputs[i];
      framework::Variable* orig_grad = origin_outputs[i];
-      AddGradTo(grad, orig_grad, expected_place_);
+      AddTo(grad, orig_grad, place_);
      delete grad;
    }
  }

--- a/paddle/fluid/imperative/layer.h
+++ b/paddle/fluid/imperative/layer.h
@@ -153,7 +153,8 @@ class VarBase {

  framework::LoDTensor& GradValue();

-  framework::LoDTensor* CopiedTensor() const;
+  VarBase* NewVarBase(const platform::Place& dst_place,
+                      const bool blocking) const;

  inline std::string GradName() const {
    PADDLE_ENFORCE(
@@ -184,7 +185,7 @@ class OpBase {
        forward_id_(-1),
        grad_op_desc_(nullptr),
        backward_id_(-1),
-        expected_place_(platform::CPUPlace()) {}
+        place_(platform::CPUPlace()) {}

  virtual ~OpBase() {
    if (grad_op_desc_) delete grad_op_desc_;
@@ -201,7 +202,7 @@ class OpBase {
  framework::OpDesc* grad_op_desc_;
  int backward_id_;

-  platform::Place expected_place_;
+  platform::Place place_;

  VarBasePtrMap input_vars_;
  VarBasePtrMap output_vars_;

--- a/paddle/fluid/imperative/tracer.cc
+++ b/paddle/fluid/imperative/tracer.cc
@@ -131,10 +131,9 @@ void Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
  PADDLE_ENFORCE_NOT_NULL(op_kernel, "only support op with kernel");

  framework::Scope scope;
-  op->expected_place_ = GetExpectedPlace(expected_place, inputs);
-  PreparedOp prepared_op =
-      PreparedOp::Prepare(ctx, *op_kernel, op->expected_place_);
-  prepared_op.op.RuntimeInferShape(scope, op->expected_place_, ctx);
+  op->place_ = GetExpectedPlace(expected_place, inputs);
+  PreparedOp prepared_op = PreparedOp::Prepare(ctx, *op_kernel, op->place_);
+  prepared_op.op.RuntimeInferShape(scope, op->place_, ctx);
  prepared_op.func(framework::ExecutionContext(
      prepared_op.op, scope, *prepared_op.dev_ctx, prepared_op.ctx));


--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@@ -137,8 +137,13 @@ PYBIND11_MODULE(core, m) {
      .def("_grad_ivar",
           [](const imperative::VarBase &self) { return self.grads_; },
           py::return_value_policy::reference)
-      .def("_cpu_tensor",
-           [](const imperative::VarBase &self) { return self.CopiedTensor(); },
+      .def("_to",
+           [](const imperative::VarBase &self, const platform::CPUPlace &place,
+              bool blocking) { return self.NewVarBase(place, blocking); },
+           py::return_value_policy::take_ownership)
+      .def("_to",
+           [](const imperative::VarBase &self, const platform::CUDAPlace &place,
+              bool blocking) { return self.NewVarBase(place, blocking); },
           py::return_value_policy::take_ownership)
      .def("value", [](const imperative::VarBase &self) { return self.var_; },
           py::return_value_policy::reference)

--- a/python/paddle/fluid/framework.py
+++ b/python/paddle/fluid/framework.py
@@ -385,8 +385,8 @@ class Variable(object):
            self._ivar.stop_gradient = stop_gradient

    def _numpy(self):
-        tensor = self._ivar._cpu_tensor()
-        return np.array(tensor)
+        new_ivar = self._ivar._to(core.CPUPlace(), True)
+        return np.array(new_ivar.value().get_tensor())

    def _backward(self):
        self._ivar._run_backward()
@@ -2326,16 +2326,22 @@ def _get_var(name, program=None):


 @contextlib.contextmanager
-def _imperative_guard(tracer, place):
+def _imperative_guard(tracer):
    global _imperative_tracer_
    tmp_trace = _imperative_tracer_
    _imperative_tracer_ = tracer

+    yield
+
+    _imperative_tracer_ = tmp_trace
+
+
+@contextlib.contextmanager
+def _imperative_place_guard(place):
    global _current_expected_place_
    tmp_place = _current_expected_place_
    _current_expected_place_ = place

    yield

-    _imperative_tracer_ = tmp_trace
    _current_expected_place_ = tmp_place
--- a/python/paddle/fluid/imperative/base.py
+++ b/python/paddle/fluid/imperative/base.py
@@ -25,22 +25,21 @@ def enabled():


 @contextlib.contextmanager
-def guard(device=0):
+def guard(place=None):
    train = framework.Program()
    startup = framework.Program()
    tracer = core.Tracer(train.current_block().desc)

-    if device is None:
-        place = core.CPUPlace()
-    else:
+    if place is None:
        if core.is_compiled_with_cuda():
-            place = core.CUDAPlace(device)
+            place = core.CUDAPlace(0)
        else:
            place = core.CPUPlace()

    with framework.program_guard(train, startup):
        with framework.unique_name.guard():
-            with framework._imperative_guard(tracer, place):
+            with framework._imperative_guard(tracer):
+                with framework._imperative_place_guard(place):
                    yield



--- a/python/paddle/fluid/tests/unittests/test_imperative_gan.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_gan.py
@@ -135,7 +135,7 @@ class TestImperativeMnist(unittest.TestCase):
                    scope.find_var(param.name).get_tensor())

        dy_params = dict()
-        with fluid.imperative.guard(device=None):
+        with fluid.imperative.guard(place=fluid.CPUPlace()):
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed


--- a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py
@@ -101,7 +101,7 @@ class TestImperativeMnist(unittest.TestCase):
    def test_mnist_cpu_float32(self):
        seed = 90

-        with fluid.imperative.guard(device=None):
+        with fluid.imperative.guard(place=fuild.CPUPlace()):
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed


--- a/python/paddle/fluid/tests/unittests/test_imperative_resnet.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_resnet.py
@@ -207,6 +207,9 @@ class TestImperativeResnet(unittest.TestCase):
    def test_resnet_gpu_float32(self):
        seed = 90

+        if not core.is_compiled_with_cuda():
+            return
+
        batch_size = train_parameters["batch_size"]
        batch_num = 1
        with fluid.imperative.guard():
@@ -370,7 +373,7 @@ class TestImperativeResnet(unittest.TestCase):

        batch_size = train_parameters["batch_size"]
        batch_num = 1
-        with fluid.imperative.guard(device=None):
+        with fluid.imperative.guard(place=fluid.CPUPlace()):
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed