提交 289aba75 编写于 作者: P Paddle CI 提交者: minqiyang

Polish code

test=develop
上级 cf7229d2
......@@ -49,8 +49,7 @@ class TensorAddToFunctor : public boost::static_visitor<> {
void operator()(const platform::CPUPlace& place) {
platform::CPUDeviceContext* ctx = dynamic_cast<platform::CPUDeviceContext*>(
platform::DeviceContextPool::Instance().Get(place));
auto blas =
operators::math::GetBlas<platform::CPUDeviceContext, float>(*ctx);
auto blas = operators::math::GetBlas<platform::CPUDeviceContext, T>(*ctx);
blas.AXPY(numel_, 1., x_, y_);
}
......@@ -59,8 +58,7 @@ class TensorAddToFunctor : public boost::static_visitor<> {
platform::CUDADeviceContext* ctx =
dynamic_cast<platform::CUDADeviceContext*>(
platform::DeviceContextPool::Instance().Get(place));
auto blas =
operators::math::GetBlas<platform::CUDADeviceContext, float>(*ctx);
auto blas = operators::math::GetBlas<platform::CUDADeviceContext, T>(*ctx);
blas.AXPY(numel_, 1., x_, y_);
}
#else
......@@ -82,7 +80,7 @@ class TensorAddToFunctor : public boost::static_visitor<> {
} // namespace detail
void AddGradTo(Variable* src, Variable* dst, platform::Place place) {
void AddTo(Variable* src, Variable* dst, platform::Place place) {
framework::Tensor* dst_tensor = dst->GetMutable<framework::LoDTensor>();
framework::Tensor* src_tensor = src->GetMutable<framework::LoDTensor>();
......@@ -170,27 +168,34 @@ class Autograd {
}
};
framework::LoDTensor* VarBase::CopiedTensor() const {
VarBase* VarBase::NewVarBase(const platform::Place& dst_place,
const bool blocking) const {
PADDLE_ENFORCE(var_->IsInitialized(),
"Variable must be initialized when getting numpy tensor");
platform::Place place = var_->Get<framework::LoDTensor>().place();
framework::LoDTensor* result = new framework::LoDTensor();
result->Resize(var_->Get<framework::LoDTensor>().dims());
result->set_lod(var_->Get<framework::LoDTensor>().lod());
if (platform::is_gpu_place(place)) {
VLOG(3) << "fetch tensor " << var_desc_->Name() << " from gpu";
framework::TensorCopy(var_->Get<framework::LoDTensor>(),
platform::CPUPlace(), result);
VarBase* new_var = new VarBase();
framework::LoDTensor* tensor =
new_var->var_->GetMutable<framework::LoDTensor>();
tensor->Resize(var_->Get<framework::LoDTensor>().dims());
tensor->set_lod(var_->Get<framework::LoDTensor>().lod());
if (blocking) {
platform::DeviceContext* dev_ctx =
platform::DeviceContextPool::Instance().Get(place);
platform::DeviceContextPool::Instance().Get(dst_place);
framework::TensorCopySync(var_->Get<framework::LoDTensor>(), dst_place,
tensor);
dev_ctx->Wait();
} else {
TensorCopy(var_->Get<framework::LoDTensor>(), platform::CPUPlace(), result);
framework::TensorCopy(var_->Get<framework::LoDTensor>(), dst_place, tensor);
}
if (platform::is_gpu_place(dst_place)) {
VLOG(3) << "copy tensor " << var_desc_->Name() << " from gpu";
}
return result;
return new_var;
}
framework::LoDTensor& VarBase::GradValue() {
......@@ -235,7 +240,7 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
PADDLE_ENFORCE_NOT_NULL(op_kernel, "only support op with kernel");
framework::Scope scope;
platform::Place place = expected_place_;
platform::Place place = place_;
PreparedOp p = PreparedOp::Prepare(ctx, *op_kernel, place);
p.op.RuntimeInferShape(scope, place, ctx);
p.func(framework::ExecutionContext(p.op, scope, *p.dev_ctx, p.ctx));
......@@ -249,7 +254,7 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
for (size_t i = 0; i < outputs.size(); ++i) {
framework::Variable* grad = outputs[i];
framework::Variable* orig_grad = origin_outputs[i];
AddGradTo(grad, orig_grad, expected_place_);
AddTo(grad, orig_grad, place_);
delete grad;
}
}
......
......@@ -153,7 +153,8 @@ class VarBase {
framework::LoDTensor& GradValue();
framework::LoDTensor* CopiedTensor() const;
VarBase* NewVarBase(const platform::Place& dst_place,
const bool blocking) const;
inline std::string GradName() const {
PADDLE_ENFORCE(
......@@ -184,7 +185,7 @@ class OpBase {
forward_id_(-1),
grad_op_desc_(nullptr),
backward_id_(-1),
expected_place_(platform::CPUPlace()) {}
place_(platform::CPUPlace()) {}
virtual ~OpBase() {
if (grad_op_desc_) delete grad_op_desc_;
......@@ -201,7 +202,7 @@ class OpBase {
framework::OpDesc* grad_op_desc_;
int backward_id_;
platform::Place expected_place_;
platform::Place place_;
VarBasePtrMap input_vars_;
VarBasePtrMap output_vars_;
......
......@@ -131,10 +131,9 @@ void Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
PADDLE_ENFORCE_NOT_NULL(op_kernel, "only support op with kernel");
framework::Scope scope;
op->expected_place_ = GetExpectedPlace(expected_place, inputs);
PreparedOp prepared_op =
PreparedOp::Prepare(ctx, *op_kernel, op->expected_place_);
prepared_op.op.RuntimeInferShape(scope, op->expected_place_, ctx);
op->place_ = GetExpectedPlace(expected_place, inputs);
PreparedOp prepared_op = PreparedOp::Prepare(ctx, *op_kernel, op->place_);
prepared_op.op.RuntimeInferShape(scope, op->place_, ctx);
prepared_op.func(framework::ExecutionContext(
prepared_op.op, scope, *prepared_op.dev_ctx, prepared_op.ctx));
......
......@@ -137,8 +137,13 @@ PYBIND11_MODULE(core, m) {
.def("_grad_ivar",
[](const imperative::VarBase &self) { return self.grads_; },
py::return_value_policy::reference)
.def("_cpu_tensor",
[](const imperative::VarBase &self) { return self.CopiedTensor(); },
.def("_to",
[](const imperative::VarBase &self, const platform::CPUPlace &place,
bool blocking) { return self.NewVarBase(place, blocking); },
py::return_value_policy::take_ownership)
.def("_to",
[](const imperative::VarBase &self, const platform::CUDAPlace &place,
bool blocking) { return self.NewVarBase(place, blocking); },
py::return_value_policy::take_ownership)
.def("value", [](const imperative::VarBase &self) { return self.var_; },
py::return_value_policy::reference)
......
......@@ -385,8 +385,8 @@ class Variable(object):
self._ivar.stop_gradient = stop_gradient
def _numpy(self):
tensor = self._ivar._cpu_tensor()
return np.array(tensor)
new_ivar = self._ivar._to(core.CPUPlace(), True)
return np.array(new_ivar.value().get_tensor())
def _backward(self):
self._ivar._run_backward()
......@@ -2326,16 +2326,22 @@ def _get_var(name, program=None):
@contextlib.contextmanager
def _imperative_guard(tracer, place):
def _imperative_guard(tracer):
global _imperative_tracer_
tmp_trace = _imperative_tracer_
_imperative_tracer_ = tracer
yield
_imperative_tracer_ = tmp_trace
@contextlib.contextmanager
def _imperative_place_guard(place):
global _current_expected_place_
tmp_place = _current_expected_place_
_current_expected_place_ = place
yield
_imperative_tracer_ = tmp_trace
_current_expected_place_ = tmp_place
......@@ -25,22 +25,21 @@ def enabled():
@contextlib.contextmanager
def guard(device=0):
def guard(place=None):
train = framework.Program()
startup = framework.Program()
tracer = core.Tracer(train.current_block().desc)
if device is None:
place = core.CPUPlace()
else:
if place is None:
if core.is_compiled_with_cuda():
place = core.CUDAPlace(device)
place = core.CUDAPlace(0)
else:
place = core.CPUPlace()
with framework.program_guard(train, startup):
with framework.unique_name.guard():
with framework._imperative_guard(tracer, place):
with framework._imperative_guard(tracer):
with framework._imperative_place_guard(place):
yield
......
......@@ -135,7 +135,7 @@ class TestImperativeMnist(unittest.TestCase):
scope.find_var(param.name).get_tensor())
dy_params = dict()
with fluid.imperative.guard(device=None):
with fluid.imperative.guard(place=fluid.CPUPlace()):
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
......
......@@ -101,7 +101,7 @@ class TestImperativeMnist(unittest.TestCase):
def test_mnist_cpu_float32(self):
seed = 90
with fluid.imperative.guard(device=None):
with fluid.imperative.guard(place=fuild.CPUPlace()):
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
......
......@@ -207,6 +207,9 @@ class TestImperativeResnet(unittest.TestCase):
def test_resnet_gpu_float32(self):
seed = 90
if not core.is_compiled_with_cuda():
return
batch_size = train_parameters["batch_size"]
batch_num = 1
with fluid.imperative.guard():
......@@ -370,7 +373,7 @@ class TestImperativeResnet(unittest.TestCase):
batch_size = train_parameters["batch_size"]
batch_num = 1
with fluid.imperative.guard(device=None):
with fluid.imperative.guard(place=fluid.CPUPlace()):
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册