提交 c624417c 编写于 作者: Q Qiao Longfei

change sparse mode to lazy mode

上级 4035e4ba
...@@ -111,7 +111,7 @@ class AdamOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -111,7 +111,7 @@ class AdamOpMaker : public framework::OpProtoAndCheckerMaker {
"Constant for numerical stability") "Constant for numerical stability")
.SetDefault(1.0e-8f); .SetDefault(1.0e-8f);
AddAttr<bool>( AddAttr<bool>(
"sparse_mode", "lazy_mode",
"(bool, default false) " "(bool, default false) "
"only update the parameter that has gradient in sparse update") "only update the parameter that has gradient in sparse update")
.SetDefault(false); .SetDefault(false);
......
...@@ -177,13 +177,13 @@ struct SparseAdamFunctor { ...@@ -177,13 +177,13 @@ struct SparseAdamFunctor {
const int64_t* rows_; const int64_t* rows_;
int64_t row_numel_; int64_t row_numel_;
int64_t row_count_; int64_t row_count_;
bool sparse_mode_; bool lazy_mode_;
SparseAdamFunctor(T beta1, T beta2, T epsilon, const T* beta1_pow, SparseAdamFunctor(T beta1, T beta2, T epsilon, const T* beta1_pow,
const T* beta2_pow, const T* mom1, T* mom1_out, const T* beta2_pow, const T* mom1, T* mom1_out,
const T* mom2, T* mom2_out, const T* lr, const T* grad, const T* mom2, T* mom2_out, const T* lr, const T* grad,
const T* param, T* param_out, const int64_t* rows, const T* param, T* param_out, const int64_t* rows,
int64_t row_numel, int64_t row_count, bool sparse_mode) int64_t row_numel, int64_t row_count, bool lazy_mode)
: beta1_(beta1), : beta1_(beta1),
beta2_(beta2), beta2_(beta2),
epsilon_(epsilon), epsilon_(epsilon),
...@@ -200,7 +200,7 @@ struct SparseAdamFunctor { ...@@ -200,7 +200,7 @@ struct SparseAdamFunctor {
rows_(rows), rows_(rows),
row_numel_(row_numel), row_numel_(row_numel),
row_count_(row_count), row_count_(row_count),
sparse_mode_(sparse_mode) {} lazy_mode_(lazy_mode) {}
inline HOSTDEVICE void adam_update(size_t i, T g) const { inline HOSTDEVICE void adam_update(size_t i, T g) const {
// The following code is the same as dense // The following code is the same as dense
...@@ -245,7 +245,7 @@ class AdamOpKernel : public framework::OpKernel<T> { ...@@ -245,7 +245,7 @@ class AdamOpKernel : public framework::OpKernel<T> {
using paddle::framework::LoDTensor; using paddle::framework::LoDTensor;
using paddle::operators::detail::Ref; using paddle::operators::detail::Ref;
bool sparse_mode = ctx.Attr<bool>("sparse_mode"); bool lazy_mode = ctx.Attr<bool>("lazy_mode");
T beta1 = static_cast<T>(ctx.Attr<float>("beta1")); T beta1 = static_cast<T>(ctx.Attr<float>("beta1"));
T beta2 = static_cast<T>(ctx.Attr<float>("beta2")); T beta2 = static_cast<T>(ctx.Attr<float>("beta2"));
T epsilon = static_cast<T>(ctx.Attr<float>("epsilon")); T epsilon = static_cast<T>(ctx.Attr<float>("epsilon"));
...@@ -357,8 +357,8 @@ class AdamOpKernel : public framework::OpKernel<T> { ...@@ -357,8 +357,8 @@ class AdamOpKernel : public framework::OpKernel<T> {
mom2_out.template mutable_data<T>(ctx.GetPlace()), mom2_out.template mutable_data<T>(ctx.GetPlace()),
lr.template data<T>(), grad_data, param.template data<T>(), lr.template data<T>(), grad_data, param.template data<T>(),
param_out.template mutable_data<T>(ctx.GetPlace()), rows, row_numel, param_out.template mutable_data<T>(ctx.GetPlace()), rows, row_numel,
grad_merge.rows().size(), sparse_mode); grad_merge.rows().size(), lazy_mode);
if (sparse_mode) { if (lazy_mode) {
size_t row_count = grad_merge.rows().size(); size_t row_count = grad_merge.rows().size();
for (size_t row_index = 0; row_index < row_count; ++row_index) { for (size_t row_index = 0; row_index < row_count; ++row_index) {
for (size_t offset = 0; offset < row_numel; ++offset) { for (size_t offset = 0; offset < row_numel; ++offset) {
......
...@@ -664,7 +664,7 @@ class AdamOptimizer(Optimizer): ...@@ -664,7 +664,7 @@ class AdamOptimizer(Optimizer):
epsilon=1e-8, epsilon=1e-8,
regularization=None, regularization=None,
name=None, name=None,
sparse_mode=False): lazy_mode=False):
assert learning_rate is not None assert learning_rate is not None
assert beta1 is not None assert beta1 is not None
assert beta2 is not None assert beta2 is not None
...@@ -677,7 +677,7 @@ class AdamOptimizer(Optimizer): ...@@ -677,7 +677,7 @@ class AdamOptimizer(Optimizer):
self._beta1 = beta1 self._beta1 = beta1
self._beta2 = beta2 self._beta2 = beta2
self._epsilon = epsilon self._epsilon = epsilon
self._sparse_mode = sparse_mode self._lazy_mode = lazy_mode
def _create_accumulators(self, block, parameters): def _create_accumulators(self, block, parameters):
assert isinstance(block, framework.Block) assert isinstance(block, framework.Block)
...@@ -732,7 +732,7 @@ class AdamOptimizer(Optimizer): ...@@ -732,7 +732,7 @@ class AdamOptimizer(Optimizer):
"beta1": self._beta1, "beta1": self._beta1,
"beta2": self._beta2, "beta2": self._beta2,
"epsilon": self._epsilon, "epsilon": self._epsilon,
"sparse_mode": self._sparse_mode "lazy_mode": self._lazy_mode
}) })
return adam_op return adam_op
......
...@@ -195,7 +195,7 @@ def adam_step(inputs, attributes): ...@@ -195,7 +195,7 @@ def adam_step(inputs, attributes):
def adam_step_sparse(inputs, attributes, height, rows, row_numel, np_grad, def adam_step_sparse(inputs, attributes, height, rows, row_numel, np_grad,
sparse_mode): lazy_mode):
''' '''
Simulate one step of the adam optimizer Simulate one step of the adam optimizer
:param inputs: dict of inputs :param inputs: dict of inputs
...@@ -231,7 +231,7 @@ def adam_step_sparse(inputs, attributes, height, rows, row_numel, np_grad, ...@@ -231,7 +231,7 @@ def adam_step_sparse(inputs, attributes, height, rows, row_numel, np_grad,
class TestSparseAdamOp(unittest.TestCase): class TestSparseAdamOp(unittest.TestCase):
def setup(self, scope, place, sparse_mode): def setup(self, scope, place, lazy_mode):
beta1 = 0.78 beta1 = 0.78
beta2 = 0.836 beta2 = 0.836
epsilon = 1e-4 epsilon = 1e-4
...@@ -265,19 +265,19 @@ class TestSparseAdamOp(unittest.TestCase): ...@@ -265,19 +265,19 @@ class TestSparseAdamOp(unittest.TestCase):
param_out, mom1, mom2 = adam_step_sparse(self.dense_inputs, self.attrs, param_out, mom1, mom2 = adam_step_sparse(self.dense_inputs, self.attrs,
height, rows, row_numel, height, rows, row_numel,
np_array, sparse_mode) np_array, lazy_mode)
self.outputs = { self.outputs = {
"ParamOut": param_out, "ParamOut": param_out,
"Moment1Out": mom1, "Moment1Out": mom1,
"Moment2Out": mom2 "Moment2Out": mom2
} }
def check_with_place(self, place, sparse_mode): def check_with_place(self, place, lazy_mode):
scope = core.Scope() scope = core.Scope()
self.setup(scope, place, sparse_mode) self.setup(scope, place, lazy_mode)
op_args = dict() op_args = dict()
op_args['sparse_mode'] = sparse_mode op_args['lazy_mode'] = lazy_mode
for key, np_array in self.dense_inputs.items(): for key, np_array in self.dense_inputs.items():
var = scope.var(key).get_tensor() var = scope.var(key).get_tensor()
var.set(np_array, place) var.set(np_array, place)
...@@ -313,8 +313,8 @@ class TestSparseAdamOp(unittest.TestCase): ...@@ -313,8 +313,8 @@ class TestSparseAdamOp(unittest.TestCase):
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
places.append(core.CUDAPlace(0)) places.append(core.CUDAPlace(0))
for place in places: for place in places:
for sparse_mode in (True, False): for lazy_mode in (True, False):
self.check_with_place(place, sparse_mode) self.check_with_place(place, lazy_mode)
if __name__ == "__main__": if __name__ == "__main__":
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册