提交 289aba75 编写于 作者: P Paddle CI 提交者: minqiyang

Polish code

test=develop
上级 cf7229d2
...@@ -49,8 +49,7 @@ class TensorAddToFunctor : public boost::static_visitor<> { ...@@ -49,8 +49,7 @@ class TensorAddToFunctor : public boost::static_visitor<> {
void operator()(const platform::CPUPlace& place) { void operator()(const platform::CPUPlace& place) {
platform::CPUDeviceContext* ctx = dynamic_cast<platform::CPUDeviceContext*>( platform::CPUDeviceContext* ctx = dynamic_cast<platform::CPUDeviceContext*>(
platform::DeviceContextPool::Instance().Get(place)); platform::DeviceContextPool::Instance().Get(place));
auto blas = auto blas = operators::math::GetBlas<platform::CPUDeviceContext, T>(*ctx);
operators::math::GetBlas<platform::CPUDeviceContext, float>(*ctx);
blas.AXPY(numel_, 1., x_, y_); blas.AXPY(numel_, 1., x_, y_);
} }
...@@ -59,8 +58,7 @@ class TensorAddToFunctor : public boost::static_visitor<> { ...@@ -59,8 +58,7 @@ class TensorAddToFunctor : public boost::static_visitor<> {
platform::CUDADeviceContext* ctx = platform::CUDADeviceContext* ctx =
dynamic_cast<platform::CUDADeviceContext*>( dynamic_cast<platform::CUDADeviceContext*>(
platform::DeviceContextPool::Instance().Get(place)); platform::DeviceContextPool::Instance().Get(place));
auto blas = auto blas = operators::math::GetBlas<platform::CUDADeviceContext, T>(*ctx);
operators::math::GetBlas<platform::CUDADeviceContext, float>(*ctx);
blas.AXPY(numel_, 1., x_, y_); blas.AXPY(numel_, 1., x_, y_);
} }
#else #else
...@@ -82,7 +80,7 @@ class TensorAddToFunctor : public boost::static_visitor<> { ...@@ -82,7 +80,7 @@ class TensorAddToFunctor : public boost::static_visitor<> {
} // namespace detail } // namespace detail
void AddGradTo(Variable* src, Variable* dst, platform::Place place) { void AddTo(Variable* src, Variable* dst, platform::Place place) {
framework::Tensor* dst_tensor = dst->GetMutable<framework::LoDTensor>(); framework::Tensor* dst_tensor = dst->GetMutable<framework::LoDTensor>();
framework::Tensor* src_tensor = src->GetMutable<framework::LoDTensor>(); framework::Tensor* src_tensor = src->GetMutable<framework::LoDTensor>();
...@@ -170,27 +168,34 @@ class Autograd { ...@@ -170,27 +168,34 @@ class Autograd {
} }
}; };
framework::LoDTensor* VarBase::CopiedTensor() const { VarBase* VarBase::NewVarBase(const platform::Place& dst_place,
const bool blocking) const {
PADDLE_ENFORCE(var_->IsInitialized(), PADDLE_ENFORCE(var_->IsInitialized(),
"Variable must be initialized when getting numpy tensor"); "Variable must be initialized when getting numpy tensor");
platform::Place place = var_->Get<framework::LoDTensor>().place();
framework::LoDTensor* result = new framework::LoDTensor();
result->Resize(var_->Get<framework::LoDTensor>().dims());
result->set_lod(var_->Get<framework::LoDTensor>().lod());
if (platform::is_gpu_place(place)) {
VLOG(3) << "fetch tensor " << var_desc_->Name() << " from gpu";
framework::TensorCopy(var_->Get<framework::LoDTensor>(), VarBase* new_var = new VarBase();
platform::CPUPlace(), result); framework::LoDTensor* tensor =
new_var->var_->GetMutable<framework::LoDTensor>();
tensor->Resize(var_->Get<framework::LoDTensor>().dims());
tensor->set_lod(var_->Get<framework::LoDTensor>().lod());
if (blocking) {
platform::DeviceContext* dev_ctx = platform::DeviceContext* dev_ctx =
platform::DeviceContextPool::Instance().Get(place); platform::DeviceContextPool::Instance().Get(dst_place);
framework::TensorCopySync(var_->Get<framework::LoDTensor>(), dst_place,
tensor);
dev_ctx->Wait(); dev_ctx->Wait();
} else { } else {
TensorCopy(var_->Get<framework::LoDTensor>(), platform::CPUPlace(), result); framework::TensorCopy(var_->Get<framework::LoDTensor>(), dst_place, tensor);
}
if (platform::is_gpu_place(dst_place)) {
VLOG(3) << "copy tensor " << var_desc_->Name() << " from gpu";
} }
return result; return new_var;
} }
framework::LoDTensor& VarBase::GradValue() { framework::LoDTensor& VarBase::GradValue() {
...@@ -235,7 +240,7 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() { ...@@ -235,7 +240,7 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
PADDLE_ENFORCE_NOT_NULL(op_kernel, "only support op with kernel"); PADDLE_ENFORCE_NOT_NULL(op_kernel, "only support op with kernel");
framework::Scope scope; framework::Scope scope;
platform::Place place = expected_place_; platform::Place place = place_;
PreparedOp p = PreparedOp::Prepare(ctx, *op_kernel, place); PreparedOp p = PreparedOp::Prepare(ctx, *op_kernel, place);
p.op.RuntimeInferShape(scope, place, ctx); p.op.RuntimeInferShape(scope, place, ctx);
p.func(framework::ExecutionContext(p.op, scope, *p.dev_ctx, p.ctx)); p.func(framework::ExecutionContext(p.op, scope, *p.dev_ctx, p.ctx));
...@@ -249,7 +254,7 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() { ...@@ -249,7 +254,7 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
for (size_t i = 0; i < outputs.size(); ++i) { for (size_t i = 0; i < outputs.size(); ++i) {
framework::Variable* grad = outputs[i]; framework::Variable* grad = outputs[i];
framework::Variable* orig_grad = origin_outputs[i]; framework::Variable* orig_grad = origin_outputs[i];
AddGradTo(grad, orig_grad, expected_place_); AddTo(grad, orig_grad, place_);
delete grad; delete grad;
} }
} }
......
...@@ -153,7 +153,8 @@ class VarBase { ...@@ -153,7 +153,8 @@ class VarBase {
framework::LoDTensor& GradValue(); framework::LoDTensor& GradValue();
framework::LoDTensor* CopiedTensor() const; VarBase* NewVarBase(const platform::Place& dst_place,
const bool blocking) const;
inline std::string GradName() const { inline std::string GradName() const {
PADDLE_ENFORCE( PADDLE_ENFORCE(
...@@ -184,7 +185,7 @@ class OpBase { ...@@ -184,7 +185,7 @@ class OpBase {
forward_id_(-1), forward_id_(-1),
grad_op_desc_(nullptr), grad_op_desc_(nullptr),
backward_id_(-1), backward_id_(-1),
expected_place_(platform::CPUPlace()) {} place_(platform::CPUPlace()) {}
virtual ~OpBase() { virtual ~OpBase() {
if (grad_op_desc_) delete grad_op_desc_; if (grad_op_desc_) delete grad_op_desc_;
...@@ -201,7 +202,7 @@ class OpBase { ...@@ -201,7 +202,7 @@ class OpBase {
framework::OpDesc* grad_op_desc_; framework::OpDesc* grad_op_desc_;
int backward_id_; int backward_id_;
platform::Place expected_place_; platform::Place place_;
VarBasePtrMap input_vars_; VarBasePtrMap input_vars_;
VarBasePtrMap output_vars_; VarBasePtrMap output_vars_;
......
...@@ -131,10 +131,9 @@ void Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, ...@@ -131,10 +131,9 @@ void Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
PADDLE_ENFORCE_NOT_NULL(op_kernel, "only support op with kernel"); PADDLE_ENFORCE_NOT_NULL(op_kernel, "only support op with kernel");
framework::Scope scope; framework::Scope scope;
op->expected_place_ = GetExpectedPlace(expected_place, inputs); op->place_ = GetExpectedPlace(expected_place, inputs);
PreparedOp prepared_op = PreparedOp prepared_op = PreparedOp::Prepare(ctx, *op_kernel, op->place_);
PreparedOp::Prepare(ctx, *op_kernel, op->expected_place_); prepared_op.op.RuntimeInferShape(scope, op->place_, ctx);
prepared_op.op.RuntimeInferShape(scope, op->expected_place_, ctx);
prepared_op.func(framework::ExecutionContext( prepared_op.func(framework::ExecutionContext(
prepared_op.op, scope, *prepared_op.dev_ctx, prepared_op.ctx)); prepared_op.op, scope, *prepared_op.dev_ctx, prepared_op.ctx));
......
...@@ -137,8 +137,13 @@ PYBIND11_MODULE(core, m) { ...@@ -137,8 +137,13 @@ PYBIND11_MODULE(core, m) {
.def("_grad_ivar", .def("_grad_ivar",
[](const imperative::VarBase &self) { return self.grads_; }, [](const imperative::VarBase &self) { return self.grads_; },
py::return_value_policy::reference) py::return_value_policy::reference)
.def("_cpu_tensor", .def("_to",
[](const imperative::VarBase &self) { return self.CopiedTensor(); }, [](const imperative::VarBase &self, const platform::CPUPlace &place,
bool blocking) { return self.NewVarBase(place, blocking); },
py::return_value_policy::take_ownership)
.def("_to",
[](const imperative::VarBase &self, const platform::CUDAPlace &place,
bool blocking) { return self.NewVarBase(place, blocking); },
py::return_value_policy::take_ownership) py::return_value_policy::take_ownership)
.def("value", [](const imperative::VarBase &self) { return self.var_; }, .def("value", [](const imperative::VarBase &self) { return self.var_; },
py::return_value_policy::reference) py::return_value_policy::reference)
......
...@@ -385,8 +385,8 @@ class Variable(object): ...@@ -385,8 +385,8 @@ class Variable(object):
self._ivar.stop_gradient = stop_gradient self._ivar.stop_gradient = stop_gradient
def _numpy(self): def _numpy(self):
tensor = self._ivar._cpu_tensor() new_ivar = self._ivar._to(core.CPUPlace(), True)
return np.array(tensor) return np.array(new_ivar.value().get_tensor())
def _backward(self): def _backward(self):
self._ivar._run_backward() self._ivar._run_backward()
...@@ -2326,16 +2326,22 @@ def _get_var(name, program=None): ...@@ -2326,16 +2326,22 @@ def _get_var(name, program=None):
@contextlib.contextmanager @contextlib.contextmanager
def _imperative_guard(tracer, place): def _imperative_guard(tracer):
global _imperative_tracer_ global _imperative_tracer_
tmp_trace = _imperative_tracer_ tmp_trace = _imperative_tracer_
_imperative_tracer_ = tracer _imperative_tracer_ = tracer
yield
_imperative_tracer_ = tmp_trace
@contextlib.contextmanager
def _imperative_place_guard(place):
global _current_expected_place_ global _current_expected_place_
tmp_place = _current_expected_place_ tmp_place = _current_expected_place_
_current_expected_place_ = place _current_expected_place_ = place
yield yield
_imperative_tracer_ = tmp_trace
_current_expected_place_ = tmp_place _current_expected_place_ = tmp_place
...@@ -25,22 +25,21 @@ def enabled(): ...@@ -25,22 +25,21 @@ def enabled():
@contextlib.contextmanager @contextlib.contextmanager
def guard(device=0): def guard(place=None):
train = framework.Program() train = framework.Program()
startup = framework.Program() startup = framework.Program()
tracer = core.Tracer(train.current_block().desc) tracer = core.Tracer(train.current_block().desc)
if device is None: if place is None:
place = core.CPUPlace()
else:
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
place = core.CUDAPlace(device) place = core.CUDAPlace(0)
else: else:
place = core.CPUPlace() place = core.CPUPlace()
with framework.program_guard(train, startup): with framework.program_guard(train, startup):
with framework.unique_name.guard(): with framework.unique_name.guard():
with framework._imperative_guard(tracer, place): with framework._imperative_guard(tracer):
with framework._imperative_place_guard(place):
yield yield
......
...@@ -135,7 +135,7 @@ class TestImperativeMnist(unittest.TestCase): ...@@ -135,7 +135,7 @@ class TestImperativeMnist(unittest.TestCase):
scope.find_var(param.name).get_tensor()) scope.find_var(param.name).get_tensor())
dy_params = dict() dy_params = dict()
with fluid.imperative.guard(device=None): with fluid.imperative.guard(place=fluid.CPUPlace()):
fluid.default_startup_program().random_seed = seed fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed fluid.default_main_program().random_seed = seed
......
...@@ -101,7 +101,7 @@ class TestImperativeMnist(unittest.TestCase): ...@@ -101,7 +101,7 @@ class TestImperativeMnist(unittest.TestCase):
def test_mnist_cpu_float32(self): def test_mnist_cpu_float32(self):
seed = 90 seed = 90
with fluid.imperative.guard(device=None): with fluid.imperative.guard(place=fuild.CPUPlace()):
fluid.default_startup_program().random_seed = seed fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed fluid.default_main_program().random_seed = seed
......
...@@ -207,6 +207,9 @@ class TestImperativeResnet(unittest.TestCase): ...@@ -207,6 +207,9 @@ class TestImperativeResnet(unittest.TestCase):
def test_resnet_gpu_float32(self): def test_resnet_gpu_float32(self):
seed = 90 seed = 90
if not core.is_compiled_with_cuda():
return
batch_size = train_parameters["batch_size"] batch_size = train_parameters["batch_size"]
batch_num = 1 batch_num = 1
with fluid.imperative.guard(): with fluid.imperative.guard():
...@@ -370,7 +373,7 @@ class TestImperativeResnet(unittest.TestCase): ...@@ -370,7 +373,7 @@ class TestImperativeResnet(unittest.TestCase):
batch_size = train_parameters["batch_size"] batch_size = train_parameters["batch_size"]
batch_num = 1 batch_num = 1
with fluid.imperative.guard(device=None): with fluid.imperative.guard(place=fluid.CPUPlace()):
fluid.default_startup_program().random_seed = seed fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed fluid.default_main_program().random_seed = seed
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册