提交 315b133e 编写于 作者: M minqiyang

Add single GPU support to imperative

上级 91d87ec0
cc_library(layer SRCS layer.cc DEPS proto_desc operator) cc_library(layer SRCS layer.cc DEPS proto_desc operator device_context blas)
cc_library(tracer SRCS tracer.cc DEPS proto_desc) cc_library(tracer SRCS tracer.cc DEPS proto_desc device_context)
cc_library(engine SRCS engine.cc) cc_library(engine SRCS engine.cc)
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/imperative/layer.h" #include "paddle/fluid/imperative/layer.h"
#include <deque> #include <deque>
#include <limits> #include <limits>
#include <map> #include <map>
...@@ -22,6 +23,9 @@ ...@@ -22,6 +23,9 @@
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/string/printf.h" #include "paddle/fluid/string/printf.h"
namespace paddle { namespace paddle {
...@@ -31,22 +35,68 @@ std::map<int, py::object> py_funcs_; ...@@ -31,22 +35,68 @@ std::map<int, py::object> py_funcs_;
using framework::Variable; using framework::Variable;
void AddTo(Variable* src, Variable* dst) { namespace detail {
framework::LoDTensor* dst_tensor = dst->GetMutable<framework::LoDTensor>();
framework::LoDTensor* src_tensor = src->GetMutable<framework::LoDTensor>(); template <typename T>
class TensorAddToFunctor : public boost::static_visitor<> {
public:
TensorAddToFunctor(int64_t numel, const T* x, T* y)
: numel_(numel), x_(x), y_(y) {}
void operator()(const platform::CPUPlace& place) {
platform::CPUDeviceContext* ctx = dynamic_cast<platform::CPUDeviceContext*>(
platform::DeviceContextPool::Instance().Get(place));
auto blas =
operators::math::GetBlas<platform::CPUDeviceContext, float>(*ctx);
blas.AXPY(numel_, 1., x_, y_);
}
#ifdef PADDLE_WITH_CUDA
void operator()(const platform::CUDAPlace& place) {
platform::CUDADeviceContext* ctx =
dynamic_cast<platform::CUDADeviceContext*>(
platform::DeviceContextPool::Instance().Get(place));
auto blas =
operators::math::GetBlas<platform::CUDADeviceContext, float>(*ctx);
blas.AXPY(numel_, 1., x_, y_);
}
#else
void operator()(const platform::CUDAPlace& place) {
PADDLE_THROW("Do NOT support gradient merge in place %s", place);
}
#endif
// there is NO blas in CUDAPinnedPlace
void operator()(const platform::CUDAPinnedPlace& place) {
PADDLE_THROW("Do NOT support gradient merge in place %s", place);
}
private:
int64_t numel_;
const T* x_;
T* y_;
};
} // namespace detail
void AddGradTo(Variable* src, Variable* dst, platform::Place place) {
framework::Tensor* dst_tensor = dst->GetMutable<framework::LoDTensor>();
framework::Tensor* src_tensor = src->GetMutable<framework::LoDTensor>();
// FIXME(minqiyang): loss_grad op will pass a zero grad of label // FIXME(minqiyang): loss_grad op will pass a zero grad of label
// ugly fix for it // ugly fix for it
if (src_tensor->numel() == 0) { if (src_tensor->numel() == 0) {
return; return;
} }
PADDLE_ENFORCE(dst_tensor->numel() == src_tensor->numel(), PADDLE_ENFORCE(dst_tensor->numel() == src_tensor->numel(),
"dst_numel %lld vs. src_numel %lld", dst_tensor->numel(), "dst_numel %lld vs. src_numel %lld", dst_tensor->numel(),
src_tensor->numel()); src_tensor->numel());
float* dst_data = dst_tensor->mutable_data<float>(platform::CPUPlace());
const float* src_data = src_tensor->data<float>(); detail::TensorAddToFunctor<float> func(
for (int64_t i = 0; i < src_tensor->numel(); ++i) { src_tensor->numel(), src_tensor->data<float>(),
dst_data[i] += src_data[i]; dst_tensor->mutable_data<float>(place));
} boost::apply_visitor(func, place);
} }
class Autograd { class Autograd {
...@@ -158,7 +208,7 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() { ...@@ -158,7 +208,7 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
PADDLE_ENFORCE_NOT_NULL(op_kernel, "only support op with kernel"); PADDLE_ENFORCE_NOT_NULL(op_kernel, "only support op with kernel");
framework::Scope scope; framework::Scope scope;
platform::CPUPlace place; platform::Place place = expected_place_;
PreparedOp p = PreparedOp::Prepare(ctx, *op_kernel, place); PreparedOp p = PreparedOp::Prepare(ctx, *op_kernel, place);
p.op.RuntimeInferShape(scope, place, ctx); p.op.RuntimeInferShape(scope, place, ctx);
p.func(framework::ExecutionContext(p.op, scope, *p.dev_ctx, p.ctx)); p.func(framework::ExecutionContext(p.op, scope, *p.dev_ctx, p.ctx));
...@@ -172,7 +222,7 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() { ...@@ -172,7 +222,7 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
for (size_t i = 0; i < outputs.size(); ++i) { for (size_t i = 0; i < outputs.size(); ++i) {
framework::Variable* grad = outputs[i]; framework::Variable* grad = outputs[i];
framework::Variable* orig_grad = origin_outputs[i]; framework::Variable* orig_grad = origin_outputs[i];
AddTo(grad, orig_grad); AddGradTo(grad, orig_grad, expected_place_);
delete grad; delete grad;
} }
} }
...@@ -184,8 +234,10 @@ void VarBase::RunBackward() { ...@@ -184,8 +234,10 @@ void VarBase::RunBackward() {
VLOG(3) << "start backward"; VLOG(3) << "start backward";
auto grads_t = grads_->var_->GetMutable<framework::LoDTensor>(); auto grads_t = grads_->var_->GetMutable<framework::LoDTensor>();
float* data = grads_t->mutable_data<float>(platform::CPUPlace()); operators::math::set_constant(
std::fill(data, data + grads_t->numel(), 1.0); *(platform::DeviceContextPool::Instance().Get(
var_->GetMutable<framework::LoDTensor>()->place())),
grads_t, 1.0);
PADDLE_ENFORCE( PADDLE_ENFORCE(
grads_ == grads_ ==
......
...@@ -26,12 +26,15 @@ ...@@ -26,12 +26,15 @@
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/var_desc.h" #include "paddle/fluid/framework/var_desc.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/imperative/type_defs.h" #include "paddle/fluid/imperative/type_defs.h"
namespace paddle { namespace paddle {
namespace imperative { namespace imperative {
class VarBase;
namespace py = ::pybind11; namespace py = ::pybind11;
class PreparedOp { class PreparedOp {
...@@ -81,6 +84,8 @@ class PreparedOp { ...@@ -81,6 +84,8 @@ class PreparedOp {
return PreparedOp(op, ctx, kernel_iter->second, dev_ctx); return PreparedOp(op, ctx, kernel_iter->second, dev_ctx);
} }
inline platform::DeviceContext* GetDeviceContext() const { return dev_ctx; }
const framework::OperatorBase& op; const framework::OperatorBase& op;
const framework::RuntimeContext& ctx; const framework::RuntimeContext& ctx;
framework::OperatorWithKernel::OpKernelFunc func; framework::OperatorWithKernel::OpKernelFunc func;
...@@ -159,7 +164,8 @@ class OpBase { ...@@ -159,7 +164,8 @@ class OpBase {
: op_desc_(nullptr), : op_desc_(nullptr),
forward_id_(-1), forward_id_(-1),
grad_op_desc_(nullptr), grad_op_desc_(nullptr),
backward_id_(-1) {} backward_id_(-1),
expected_place_(platform::CPUPlace()) {}
virtual ~OpBase() { virtual ~OpBase() {
if (grad_op_desc_) delete grad_op_desc_; if (grad_op_desc_) delete grad_op_desc_;
...@@ -176,6 +182,8 @@ class OpBase { ...@@ -176,6 +182,8 @@ class OpBase {
framework::OpDesc* grad_op_desc_; framework::OpDesc* grad_op_desc_;
int backward_id_; int backward_id_;
platform::Place expected_place_;
VarBasePtrMap input_vars_; VarBasePtrMap input_vars_;
VarBasePtrMap output_vars_; VarBasePtrMap output_vars_;
OpBasePtrMap pre_ops_; OpBasePtrMap pre_ops_;
......
...@@ -14,6 +14,10 @@ ...@@ -14,6 +14,10 @@
#include "paddle/fluid/imperative/tracer.h" #include "paddle/fluid/imperative/tracer.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/enforce.h"
namespace paddle { namespace paddle {
namespace imperative { namespace imperative {
...@@ -31,16 +35,38 @@ void CreateGradOp(const framework::OpDesc& op_desc, ...@@ -31,16 +35,38 @@ void CreateGradOp(const framework::OpDesc& op_desc,
*grad_op_desc = grad_op_descs[0].release(); *grad_op_desc = grad_op_descs[0].release();
} }
void InitVar(framework::Variable* var, framework::Variable* grad_var) { void InitVar(framework::Variable* var, framework::Variable* grad_var,
platform::DeviceContext* dev_ctx) {
PADDLE_ENFORCE_NOT_NULL(dev_ctx,
"Could not get valid device from forward op");
auto& var_t = var->Get<framework::LoDTensor>(); auto& var_t = var->Get<framework::LoDTensor>();
float* data = grad_var->GetMutable<framework::LoDTensor>()->mutable_data<float>(
grad_var->GetMutable<framework::LoDTensor>()->mutable_data<float>( var_t.dims(), dev_ctx->GetPlace());
var_t.dims(), platform::CPUPlace()); operators::math::set_constant(
std::fill(data, data + var_t.numel(), 0.0); *dev_ctx, grad_var->GetMutable<framework::LoDTensor>(), .0f);
}
platform::Place GetExpectedPlace(platform::Place place, VarBasePtrMap inputs) {
platform::Place result = place;
for (auto it : inputs) {
for (VarBase* var : it.second) {
platform::Place tmp_place =
var->var_->Get<framework::LoDTensor>().place();
if (!platform::is_same_place(tmp_place, result)) {
PADDLE_THROW(
"Input variable should keep in the same place: %s, but get place: "
"%s of input %s instead",
result, tmp_place, it.first);
}
}
}
return result;
} }
void Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, void Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
const VarBasePtrMap& outputs, framework::BlockDesc* block, const VarBasePtrMap& outputs, framework::BlockDesc* block,
const platform::Place expected_place,
const bool stop_gradient) { const bool stop_gradient) {
std::map<std::string, VarBase*> vars; std::map<std::string, VarBase*> vars;
...@@ -108,10 +134,12 @@ void Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, ...@@ -108,10 +134,12 @@ void Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
PADDLE_ENFORCE_NOT_NULL(op_kernel, "only support op with kernel"); PADDLE_ENFORCE_NOT_NULL(op_kernel, "only support op with kernel");
framework::Scope scope; framework::Scope scope;
platform::CPUPlace place; op->expected_place_ = GetExpectedPlace(expected_place, inputs);
PreparedOp p = PreparedOp::Prepare(ctx, *op_kernel, place); PreparedOp prepared_op =
p.op.RuntimeInferShape(scope, place, ctx); PreparedOp::Prepare(ctx, *op_kernel, op->expected_place_);
p.func(framework::ExecutionContext(p.op, scope, *p.dev_ctx, p.ctx)); prepared_op.op.RuntimeInferShape(scope, op->expected_place_, ctx);
prepared_op.func(framework::ExecutionContext(
prepared_op.op, scope, *prepared_op.dev_ctx, prepared_op.ctx));
if (!stop_gradient) { if (!stop_gradient) {
framework::OpDesc* grad_op_desc; framework::OpDesc* grad_op_desc;
...@@ -134,7 +162,8 @@ void Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, ...@@ -134,7 +162,8 @@ void Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
} else { } else {
VarBase* var = vars[var_it->second]; VarBase* var = vars[var_it->second];
if (!var->grads_->var_->IsInitialized()) { if (!var->grads_->var_->IsInitialized()) {
InitVar(var->var_, var->grads_->var_); InitVar(var->var_, var->grads_->var_,
prepared_op.GetDeviceContext());
} }
// Douts. // Douts.
grad_in_vars.push_back(var->grads_->var_); grad_in_vars.push_back(var->grads_->var_);
...@@ -147,10 +176,13 @@ void Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, ...@@ -147,10 +176,13 @@ void Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
for (const std::string& grad_outvar : it.second) { for (const std::string& grad_outvar : it.second) {
block->FindRecursiveOrCreateVar(grad_outvar); block->FindRecursiveOrCreateVar(grad_outvar);
auto var_it = grad_to_var->find(grad_outvar); auto var_it = grad_to_var->find(grad_outvar);
PADDLE_ENFORCE(var_it != grad_to_var->end()); PADDLE_ENFORCE(var_it != grad_to_var->end(),
"Could not found the grad op output var, should this "
"operator %s's stop gradient be True",
op_desc->Type());
VarBase* var = vars[var_it->second]; VarBase* var = vars[var_it->second];
if (!var->grads_->var_->IsInitialized()) { if (!var->grads_->var_->IsInitialized()) {
InitVar(var->var_, var->grads_->var_); InitVar(var->var_, var->grads_->var_, prepared_op.GetDeviceContext());
} }
grad_out_vars.push_back(var->grads_->var_); grad_out_vars.push_back(var->grads_->var_);
} }
...@@ -193,16 +225,23 @@ std::vector<VarBase*> Tracer::PyTrace(OpBase* op, ...@@ -193,16 +225,23 @@ std::vector<VarBase*> Tracer::PyTrace(OpBase* op,
for (VarBase* out : outputs) { for (VarBase* out : outputs) {
grad_input_vars.push_back(out->var_); grad_input_vars.push_back(out->var_);
} }
platform::CPUPlace place;
for (VarBase* out : outputs) { for (VarBase* out : outputs) {
grad_input_vars.push_back(out->grads_->var_); grad_input_vars.push_back(out->grads_->var_);
if (!grad_input_vars.back()->IsInitialized()) { if (!grad_input_vars.back()->IsInitialized()) {
InitVar(out->var_, grad_input_vars.back()); // TODO(minqiyang): Add GPU support for PyLayer, only support CPU now
InitVar(out->var_, grad_input_vars.back(),
platform::DeviceContextPool::Instance().Get(place));
} }
} }
for (const VarBase* inp : inputs) { for (const VarBase* inp : inputs) {
grad_output_vars.push_back(inp->grads_->var_); grad_output_vars.push_back(inp->grads_->var_);
if (!grad_output_vars.back()->IsInitialized()) { if (!grad_output_vars.back()->IsInitialized()) {
InitVar(inp->var_, grad_output_vars.back()); // TODO(minqiyang): Add GPU support for PyLayer, only support CPU now
InitVar(inp->var_, grad_output_vars.back(),
platform::DeviceContextPool::Instance().Get(place));
} }
} }
} }
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/imperative/engine.h" #include "paddle/fluid/imperative/engine.h"
#include "paddle/fluid/imperative/layer.h" #include "paddle/fluid/imperative/layer.h"
#include "paddle/fluid/platform/place.h"
namespace paddle { namespace paddle {
namespace imperative { namespace imperative {
...@@ -34,21 +35,25 @@ void CreateGradOp(const framework::OpDesc& op_desc, ...@@ -34,21 +35,25 @@ void CreateGradOp(const framework::OpDesc& op_desc,
void InitVar(framework::Variable* var, framework::Variable* grad_var); void InitVar(framework::Variable* var, framework::Variable* grad_var);
platform::Place GetExpectedPlace(platform::Place place, VarBasePtrMap inputs);
class Tracer { class Tracer {
public: public:
explicit Tracer(framework::BlockDesc* root_block) : root_block_(root_block) {} explicit Tracer(framework::BlockDesc* root_block) : root_block_(root_block) {}
virtual ~Tracer() {} virtual ~Tracer() {}
void Trace(OpBase* op, void Trace(OpBase* op, const VarBasePtrMap& inputs,
const std::map<std::string, std::vector<VarBase*>>& inputs, const VarBasePtrMap& outputs, framework::BlockDesc* block,
const std::map<std::string, std::vector<VarBase*>>& outputs, const platform::Place expected_place,
framework::BlockDesc* block, const bool stop_gradient = false); const bool stop_gradient = false);
std::vector<VarBase*> PyTrace(OpBase* op, const std::vector<VarBase*>& inputs, std::vector<VarBase*> PyTrace(OpBase* op, const std::vector<VarBase*>& inputs,
bool stop_gradient = false); bool stop_gradient = false);
private: private:
platform::Place GetPlace(const VarBasePtrMap& inputs);
framework::BlockDesc* root_block_; framework::BlockDesc* root_block_;
}; };
......
...@@ -30,8 +30,9 @@ platform::DeviceContext* DeviceContextPool::Get(const platform::Place& place) { ...@@ -30,8 +30,9 @@ platform::DeviceContext* DeviceContextPool::Get(const platform::Place& place) {
auto it = device_contexts_.find(place); auto it = device_contexts_.find(place);
if (it == device_contexts_.end()) { if (it == device_contexts_.end()) {
PADDLE_THROW( PADDLE_THROW(
"'Place' is not supported, Please re-compile with WITH_GPU " "Place %s is not supported, Please re-compile with WITH_GPU "
"option"); "option",
place);
} }
return it->second.get().get(); return it->second.get().get();
} }
......
...@@ -15,18 +15,38 @@ limitations under the License. */ ...@@ -15,18 +15,38 @@ limitations under the License. */
#include "paddle/fluid/pybind/imperative.h" #include "paddle/fluid/pybind/imperative.h"
#include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/imperative/tracer.h" #include "paddle/fluid/imperative/tracer.h"
#include "paddle/fluid/imperative/type_defs.h"
namespace paddle { namespace paddle {
namespace pybind { namespace pybind {
// Bind Methods // Bind Methods
void BindTracer(pybind11::module *m) { void BindTracer(pybind11::module* m) {
pybind11::class_<imperative::Tracer>(*m, "Tracer", "") pybind11::class_<imperative::Tracer>(*m, "Tracer", "")
.def("__init__", .def("__init__",
[](imperative::Tracer &self, framework::BlockDesc *root_block) { [](imperative::Tracer& self, framework::BlockDesc* root_block) {
new (&self) imperative::Tracer(root_block); new (&self) imperative::Tracer(root_block);
}) })
.def("trace", &imperative::Tracer::Trace) .def("trace",
[](imperative::Tracer& self, imperative::OpBase* op,
const imperative::VarBasePtrMap& inputs,
const imperative::VarBasePtrMap& outputs,
framework::BlockDesc* block,
const platform::CPUPlace expected_place,
const bool stop_gradient = false) {
self.Trace(op, inputs, outputs, block, expected_place,
stop_gradient);
})
.def("trace",
[](imperative::Tracer& self, imperative::OpBase* op,
const imperative::VarBasePtrMap& inputs,
const imperative::VarBasePtrMap& outputs,
framework::BlockDesc* block,
const platform::CUDAPlace expected_place,
const bool stop_gradient = false) {
self.Trace(op, inputs, outputs, block, expected_place,
stop_gradient);
})
.def("py_trace", &imperative::Tracer::PyTrace, .def("py_trace", &imperative::Tracer::PyTrace,
pybind11::return_value_policy::take_ownership); pybind11::return_value_policy::take_ownership);
} }
......
...@@ -66,6 +66,7 @@ ZERO_VAR_SUFFIX = core.kZeroVarSuffix() ...@@ -66,6 +66,7 @@ ZERO_VAR_SUFFIX = core.kZeroVarSuffix()
CONTROL_DEP_VAR_PREFIX = core.kControlDepVarName() CONTROL_DEP_VAR_PREFIX = core.kControlDepVarName()
_imperative_tracer_ = None _imperative_tracer_ = None
_current_expected_place_ = None
def _in_imperative_mode(): def _in_imperative_mode():
...@@ -76,6 +77,10 @@ def _imperative_tracer(): ...@@ -76,6 +77,10 @@ def _imperative_tracer():
return _imperative_tracer_ return _imperative_tracer_
def _current_expected_place():
return _current_expected_place_
class NameScope(object): class NameScope(object):
def __init__(self, name="", parent=None): def __init__(self, name="", parent=None):
self._children = dict() self._children = dict()
...@@ -1299,7 +1304,7 @@ class Block(object): ...@@ -1299,7 +1304,7 @@ class Block(object):
def _trace_op(self, op, stop_gradient=False): def _trace_op(self, op, stop_gradient=False):
if _in_imperative_mode(): if _in_imperative_mode():
_imperative_tracer().trace(op.iop, op.inputs, op.outputs, self.desc, _imperative_tracer().trace(op.iop, op.inputs, op.outputs, self.desc,
stop_gradient) _current_expected_place_, stop_gradient)
def _insert_op(self, index, *args, **kwargs): def _insert_op(self, index, *args, **kwargs):
""" """
...@@ -2312,9 +2317,16 @@ def _get_var(name, program=None): ...@@ -2312,9 +2317,16 @@ def _get_var(name, program=None):
@contextlib.contextmanager @contextlib.contextmanager
def _imperative_guard(tracer): def _imperative_guard(tracer, place):
global _imperative_tracer_ global _imperative_tracer_
tmp_trace = _imperative_tracer_ tmp_trace = _imperative_tracer_
_imperative_tracer_ = tracer _imperative_tracer_ = tracer
global _current_expected_place_
tmp_place = _current_expected_place_
_current_expected_place_ = place
yield yield
_imperative_tracer_ = tmp_trace _imperative_tracer_ = tmp_trace
_current_expected_place_ = tmp_place
...@@ -25,17 +25,28 @@ def enabled(): ...@@ -25,17 +25,28 @@ def enabled():
@contextlib.contextmanager @contextlib.contextmanager
def guard(): def guard(device=0):
train = framework.Program() train = framework.Program()
startup = framework.Program() startup = framework.Program()
tracer = core.Tracer(train.current_block().desc) tracer = core.Tracer(train.current_block().desc)
if device is None:
place = core.CPUPlace()
else:
if core.is_compiled_with_cuda():
place = core.CUDAPlace(device)
else:
place = core.CPUPlace()
with framework.program_guard(train, startup): with framework.program_guard(train, startup):
with framework.unique_name.guard(): with framework.unique_name.guard():
with framework._imperative_guard(tracer): with framework._imperative_guard(tracer, place):
yield yield
def to_variable(value, block=None): def to_variable(value, block=None):
assert enabled(), "to_variable could only be called in imperative mode"
if isinstance(value, np.ndarray): if isinstance(value, np.ndarray):
if not block: if not block:
block = framework.default_main_program().current_block() block = framework.default_main_program().current_block()
...@@ -47,9 +58,7 @@ def to_variable(value, block=None): ...@@ -47,9 +58,7 @@ def to_variable(value, block=None):
dtype=value.dtype) dtype=value.dtype)
var = py_var._ivar.value() var = py_var._ivar.value()
tensor = var.get_tensor() tensor = var.get_tensor()
tensor.set(value, core.CPUPlace()) tensor.set(value, framework._current_expected_place())
return py_var return py_var
elif isinstance(value, framework.Variable): elif isinstance(value, framework.Variable):
return value return value
else:
raise ValueError("Unsupported type %s" % type(value))
...@@ -252,15 +252,15 @@ class FC(layers.Layer): ...@@ -252,15 +252,15 @@ class FC(layers.Layer):
"y_num_col_dims": 1 "y_num_col_dims": 1
}) })
out = self._helper.create_variable_for_type_inference(self._dtype) pre_bias = self._helper.create_variable_for_type_inference(self._dtype)
self._helper.append_op( self._helper.append_op(
type="sum", type="sum",
inputs={"X": [tmp]}, inputs={"X": [tmp]},
outputs={"Out": out}, outputs={"Out": pre_bias},
attrs={"use_mkldnn": False}) attrs={"use_mkldnn": False})
pre_activation = self._helper.append_bias_op( pre_activation = self._helper.append_bias_op(
pre_bias, dim_start=num_flatten_dims) pre_bias, dim_start=self._num_flatten_dims)
return self._helper.append_activation(pre_activation) return self._helper.append_activation(pre_activation)
...@@ -355,11 +355,11 @@ class BatchNorm(layers.Layer): ...@@ -355,11 +355,11 @@ class BatchNorm(layers.Layer):
variance_out = self._variance variance_out = self._variance
saved_mean = self._helper.create_variable_for_type_inference( saved_mean = self._helper.create_variable_for_type_inference(
dtype=dtype, stop_gradient=True) dtype=self._dtype, stop_gradient=True)
saved_variance = self._helper.create_variable_for_type_inference( saved_variance = self._helper.create_variable_for_type_inference(
dtype=dtype, stop_gradient=True) dtype=self._dtype, stop_gradient=True)
batch_norm_out = input if self._in_place else self._helper.create_variable_for_type_inference( batch_norm_out = input if self._in_place else self._helper.create_variable_for_type_inference(
dtype) self._dtype)
self._helper.append_op( self._helper.append_op(
type="batch_norm", type="batch_norm",
......
...@@ -321,7 +321,7 @@ def append_LARS(params_grads, learning_rate, weight_decay): ...@@ -321,7 +321,7 @@ def append_LARS(params_grads, learning_rate, weight_decay):
The decayed learning rate The decayed learning rate
Examples: Examples:
.. code-block:: python .. code-block:: python
learning_rate *= local_gw_ratio * sqrt(sumsq(param)) learning_rate *= local_gw_ratio * sqrt(sumsq(param))
/ (sqrt(sumsq(gradient))+ weight_decay * sqrt(sumsq(param))) / (sqrt(sumsq(gradient))+ weight_decay * sqrt(sumsq(param)))
""" """
......
...@@ -5810,7 +5810,8 @@ def autoincreased_step_counter(counter_name=None, begin=1, step=1): ...@@ -5810,7 +5810,8 @@ def autoincreased_step_counter(counter_name=None, begin=1, step=1):
type='increment', type='increment',
inputs={'X': [counter]}, inputs={'X': [counter]},
outputs={'Out': [counter]}, outputs={'Out': [counter]},
attrs={'step': float(step)}) attrs={'step': float(step)},
stop_gradient=True)
counter.stop_gradient = True counter.stop_gradient = True
return counter return counter
......
...@@ -382,7 +382,8 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None): ...@@ -382,7 +382,8 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None):
'dtype': out.dtype, 'dtype': out.dtype,
'value': float(value), 'value': float(value),
'force_cpu': force_cpu or force_init_on_cpu() 'force_cpu': force_cpu or force_init_on_cpu()
}) },
stop_gradient=True)
out.stop_gradient = True out.stop_gradient = True
return out return out
......
...@@ -301,10 +301,10 @@ class Optimizer(object): ...@@ -301,10 +301,10 @@ class Optimizer(object):
no_grad_set (set|None): set of Variables should be ignored. no_grad_set (set|None): set of Variables should be ignored.
callbacks (list|None): list of callables to run when appending backward callbacks (list|None): list of callables to run when appending backward
operator for one parameter. operator for one parameter.
Return: Return:
list: list of (param, grad) pair, grad is the output of backward. list: list of (param, grad) pair, grad is the output of backward.
Examples: Examples:
See examples in `apply_gradients`. See examples in `apply_gradients`.
""" """
...@@ -322,10 +322,10 @@ class Optimizer(object): ...@@ -322,10 +322,10 @@ class Optimizer(object):
Args: Args:
params_grads (list): list of (param, grad) pair to do optimization. params_grads (list): list of (param, grad) pair to do optimization.
Returns: Returns:
list: A list of operators appended to the current program. list: A list of operators appended to the current program.
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -364,7 +364,7 @@ class Optimizer(object): ...@@ -364,7 +364,7 @@ class Optimizer(object):
This method combines interface `backward()` and This method combines interface `backward()` and
`apply_gradients()` into one. `apply_gradients()` into one.
Args: Args:
loss (Variable): loss variable to run optimizations. loss (Variable): loss variable to run optimizations.
startup_program (Program): startup_program for initializing parameters startup_program (Program): startup_program for initializing parameters
...@@ -381,18 +381,19 @@ class Optimizer(object): ...@@ -381,18 +381,19 @@ class Optimizer(object):
optimize_ops = [] optimize_ops = []
if imperative_base.enabled(): if imperative_base.enabled():
if parameter_list is not None: if parameter_list is not None:
params_grads = parameter_list parameters = parameter_list
else: else:
parameters = program.global_block().all_parameters() parameters = program.global_block().all_parameters()
params_grads = []
for param in parameters: params_grads = []
# create gradient variable for param in parameters:
grad_var = Variable( # create gradient variable
block=loss.block, grad_var = Variable(
name=param._ivar._grad_name(), block=loss.block,
stop_gradient=True, name=param._ivar._grad_name(),
ivar=param._ivar._grad_ivar()) stop_gradient=True,
params_grads.append((param, grad_var)) ivar=param._ivar._grad_ivar())
params_grads.append((param, grad_var))
with program_guard(program, startup_program): with program_guard(program, startup_program):
optimize_ops = self._create_optimization_pass(params_grads) optimize_ops = self._create_optimization_pass(params_grads)
else: else:
......
...@@ -107,7 +107,6 @@ if(WITH_DISTRIBUTE) ...@@ -107,7 +107,6 @@ if(WITH_DISTRIBUTE)
endif() endif()
py_test_modules(test_parallel_executor_crf MODULES test_parallel_executor_crf SERIAL) py_test_modules(test_parallel_executor_crf MODULES test_parallel_executor_crf SERIAL)
py_test_modules(test_parallel_executor_fetch_feed MODULES test_parallel_executor_fetch_feed SERIAL) py_test_modules(test_parallel_executor_fetch_feed MODULES test_parallel_executor_fetch_feed SERIAL)
set_tests_properties(test_parallel_executor_fetch_feed PROPERTIES TIMEOUT 150)
py_test_modules(test_parallel_executor_transformer MODULES test_parallel_executor_transformer SERIAL) py_test_modules(test_parallel_executor_transformer MODULES test_parallel_executor_transformer SERIAL)
if(NOT APPLE) if(NOT APPLE)
py_test_modules(test_image_classification_resnet MODULES test_image_classification_resnet SERIAL) py_test_modules(test_image_classification_resnet MODULES test_image_classification_resnet SERIAL)
......
...@@ -82,7 +82,7 @@ class MLP(fluid.imperative.Layer): ...@@ -82,7 +82,7 @@ class MLP(fluid.imperative.Layer):
class TestImperative(unittest.TestCase): class TestImperative(unittest.TestCase):
def test_layer(self): def test_layer(self):
with fluid.imperative.guard(): with fluid.imperative.guard(device=None):
cl = core.Layer() cl = core.Layer()
cl.forward([]) cl.forward([])
l = fluid.imperative.Layer() l = fluid.imperative.Layer()
...@@ -90,7 +90,7 @@ class TestImperative(unittest.TestCase): ...@@ -90,7 +90,7 @@ class TestImperative(unittest.TestCase):
def test_pylayer_func_id(self): def test_pylayer_func_id(self):
with fluid.imperative.guard(): with fluid.imperative.guard(device=None):
class PyLayer1(fluid.imperative.PyLayer): class PyLayer1(fluid.imperative.PyLayer):
def __init__(self): def __init__(self):
...@@ -130,7 +130,7 @@ class TestImperative(unittest.TestCase): ...@@ -130,7 +130,7 @@ class TestImperative(unittest.TestCase):
def test_pylayer(self): def test_pylayer(self):
np_inp = np.ones([2, 2], np.float32) np_inp = np.ones([2, 2], np.float32)
with fluid.imperative.guard(): with fluid.imperative.guard(device=None):
my_py_layer = MyPyLayer() my_py_layer = MyPyLayer()
var_inp = fluid.imperative.base.to_variable(np_inp) var_inp = fluid.imperative.base.to_variable(np_inp)
outs = my_py_layer(var_inp) outs = my_py_layer(var_inp)
...@@ -158,7 +158,7 @@ class TestImperative(unittest.TestCase): ...@@ -158,7 +158,7 @@ class TestImperative(unittest.TestCase):
def test_layer_in_out(self): def test_layer_in_out(self):
np_inp = np.array([1.0, 2.0, -1.0], dtype=np.float32) np_inp = np.array([1.0, 2.0, -1.0], dtype=np.float32)
with fluid.imperative.guard(): with fluid.imperative.guard(device=None):
var_inp = fluid.imperative.base.to_variable(np_inp) var_inp = fluid.imperative.base.to_variable(np_inp)
l = MyLayer() l = MyLayer()
x = l(var_inp)[0] x = l(var_inp)[0]
...@@ -185,7 +185,7 @@ class TestImperative(unittest.TestCase): ...@@ -185,7 +185,7 @@ class TestImperative(unittest.TestCase):
def test_mlp(self): def test_mlp(self):
np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32) np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
with fluid.imperative.guard(): with fluid.imperative.guard(device=None):
var_inp = fluid.imperative.base.to_variable(np_inp) var_inp = fluid.imperative.base.to_variable(np_inp)
mlp = MLP() mlp = MLP()
out = mlp(var_inp) out = mlp(var_inp)
......
...@@ -101,7 +101,7 @@ class TestImperativeMnist(unittest.TestCase): ...@@ -101,7 +101,7 @@ class TestImperativeMnist(unittest.TestCase):
def test_mnist_cpu_float32(self): def test_mnist_cpu_float32(self):
seed = 90 seed = 90
with fluid.imperative.guard(): with fluid.imperative.guard(device=None):
fluid.default_startup_program().random_seed = seed fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed fluid.default_main_program().random_seed = seed
......
...@@ -34,7 +34,10 @@ train_parameters = { ...@@ -34,7 +34,10 @@ train_parameters = {
"batch_size": 256, "batch_size": 256,
"epochs": [30, 60, 90], "epochs": [30, 60, 90],
"steps": [0.1, 0.01, 0.001, 0.0001] "steps": [0.1, 0.01, 0.001, 0.0001]
} },
"batch_size": 256,
"lr": 0.1,
"total_images": 1281164,
} }
...@@ -52,24 +55,33 @@ def optimizer_setting(params): ...@@ -52,24 +55,33 @@ def optimizer_setting(params):
base_lr = params["lr"] base_lr = params["lr"]
lr = [] lr = []
lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)] lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)]
optimizer = fluid.optimizer.Momentum( optimizer = fluid.optimizer.SGD(learning_rate=params["lr"])
learning_rate=fluid.layers.piecewise_decay( # optimizer = fluid.optimizer.Momentum(
boundaries=bd, values=lr), # learning_rate=params["lr"],
momentum=0.9, # learning_rate=fluid.layers.piecewise_decay(
regularization=fluid.regularizer.L2Decay(1e-4)) # boundaries=bd, values=lr),
# momentum=0.9,
# regularization=fluid.regularizer.L2Decay(1e-4))
return optimizer return optimizer
class ConvBNLayer(fluid.imperative.Layer): class ConvBNLayer(fluid.imperative.Layer):
def __init__(self, num_filters, filter_size, stride=1, groups=1, act=None): def __init__(self,
num_channels,
num_filters,
filter_size,
stride=1,
groups=1,
act=None):
super(ConvBNLayer, self).__init__() super(ConvBNLayer, self).__init__()
self._conv = Conv2D( self._conv = Conv2D(
3, num_channels=num_channels,
num_filters, num_filters=num_filters,
filter_size, filter_size=filter_size,
stride, (filter_size - 1) // 2, stride=stride,
padding=(filter_size - 1) // 2,
groups=groups, groups=groups,
act=None, act=None,
bias_attr=None) bias_attr=None)
...@@ -84,36 +96,54 @@ class ConvBNLayer(fluid.imperative.Layer): ...@@ -84,36 +96,54 @@ class ConvBNLayer(fluid.imperative.Layer):
class BottleneckBlock(fluid.imperative.Layer): class BottleneckBlock(fluid.imperative.Layer):
def __init__(self, num_filters, stride, shortcut=False): def __init__(self, num_channels, num_filters, stride, shortcut=True):
super(BottleneckBlock, self).__init__() super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer( self.conv0 = ConvBNLayer(
num_filters=num_filters, filter_size=1, act='relu') num_channels=num_channels,
num_filters=num_filters,
filter_size=1,
act='relu')
self.conv1 = ConvBNLayer( self.conv1 = ConvBNLayer(
num_filters=num_filters, filter_size=3, stride=stride, act='relu') num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
stride=stride,
act='relu')
self.conv2 = ConvBNLayer( self.conv2 = ConvBNLayer(
num_filters=num_filters * 4, filter_size=1, act=None) num_channels=num_filters,
num_filters=num_filters * 4,
filter_size=1,
act=None)
if shortcut: if not shortcut:
self.short = ConvBNLayer( self.short = ConvBNLayer(
num_filters=num_filters * 4, filter_size=1, stride=stride) num_channels=num_channels,
num_filters=num_filters * 4,
filter_size=1,
stride=stride)
self.shortcut = shortcut self.shortcut = shortcut
self._num_channels_out = num_filters * 4
def forward(self, inputs): def forward(self, inputs):
self.conv0() y = self.conv0(inputs)
self.conv1() conv1 = self.conv1(y)
self.conv2() conv2 = self.conv2(conv1)
if self.shortcut: if self.shortcut:
self.short() short = inputs
else:
short = self.short(inputs)
return fluid.layers.elementwise_add( return fluid.layers.elementwise_add(x=short, y=conv2, act='relu')
x=self.short, y=self.conv2, act='relu')
class ResNet(fluid.imperative.Layer): class ResNet(fluid.imperative.Layer):
def __init__(self, layers=50, class_dim=1000): def __init__(self, layers=50, class_dim=1000):
super(ResNet, self).__init__()
self.layers = layers self.layers = layers
supported_layers = [50, 101, 152] supported_layers = [50, 101, 152]
assert layers in supported_layers, \ assert layers in supported_layers, \
...@@ -128,20 +158,23 @@ class ResNet(fluid.imperative.Layer): ...@@ -128,20 +158,23 @@ class ResNet(fluid.imperative.Layer):
num_filters = [64, 128, 256, 512] num_filters = [64, 128, 256, 512]
self.conv = ConvBNLayer( self.conv = ConvBNLayer(
num_filters=64, filter_size=7, stride=2, act='relu') num_channels=3, num_filters=64, filter_size=7, stride=2, act='relu')
self.pool2d_max = Pool2D( self.pool2d_max = Pool2D(
pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') pool_size=3, pool_stride=2, pool_padding=1, pool_type='max')
self.bottleneck_block_list = [] self.bottleneck_block_list = []
num_channels = 64
for block in range(len(depth)): for block in range(len(depth)):
shortcut = True shortcut = False
for i in range(depth[block]): for i in range(depth[block]):
bottleneck_block = BottleneckBlock( bottleneck_block = BottleneckBlock(
num_channels=num_channels,
num_filters=num_filters[block], num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1, stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut) shortcut=shortcut)
num_channels = bottleneck_block._num_channels_out
self.bottleneck_block_list.append(bottleneck_block) self.bottleneck_block_list.append(bottleneck_block)
shortcut = False shortcut = True
self.pool2d_avg = Pool2D( self.pool2d_avg = Pool2D(
pool_size=7, pool_type='avg', global_pooling=True) pool_size=7, pool_type='avg', global_pooling=True)
...@@ -160,12 +193,12 @@ class ResNet(fluid.imperative.Layer): ...@@ -160,12 +193,12 @@ class ResNet(fluid.imperative.Layer):
for bottleneck_block in self.bottleneck_block_list: for bottleneck_block in self.bottleneck_block_list:
y = bottleneck_block(y) y = bottleneck_block(y)
y = self.pool2d_avg(y) y = self.pool2d_avg(y)
y = self.out() y = self.out(y)
return y return y
class TestImperativeResnet(unittest.TestCase): class TestImperativeResnet(unittest.TestCase):
def test_resnet_cpu_float32(self): def test_resnet_gpu_float32(self):
seed = 90 seed = 90
with fluid.imperative.guard(): with fluid.imperative.guard():
...@@ -183,17 +216,17 @@ class TestImperativeResnet(unittest.TestCase): ...@@ -183,17 +216,17 @@ class TestImperativeResnet(unittest.TestCase):
break break
x_data = np.array( x_data = np.array(
[x[0].reshape(1, 28, 28) for x in data]).astype('float32') [x[0].reshape(3, 224, 224) for x in data]).astype('float32')
y_data = np.array([x[1] for x in data]).astype('int64').reshape( y_data = np.array([x[1] for x in data]).astype('int64').reshape(
128, 1) 256, 1)
img = to_variable(x_data) img = to_variable(x_data)
label = to_variable(y_data) label = to_variable(y_data)
label._stop_gradient = True label._stop_gradient = True
cost = resnet(img) out = resnet(img)
loss = fluid.layers.cross_entropy(input=out, label=label) loss = fluid.layers.cross_entropy(input=out, label=label)
avg_loss = fluid.layers.mean(x=cost) avg_loss = fluid.layers.mean(x=loss)
dy_out = avg_loss._numpy() dy_out = avg_loss._numpy()
if batch_id == 0: if batch_id == 0:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册