提交 8c516a24 编写于 作者: Q Qiao Longfei

remote min_row_size_to_use_multithread in adam interface test=develop

上级 7fd15ce5
......@@ -418,7 +418,7 @@ paddle.fluid.optimizer.AdagradOptimizer.__init__ ArgSpec(args=['self', 'learning
paddle.fluid.optimizer.AdagradOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None)
paddle.fluid.optimizer.AdagradOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None))
paddle.fluid.optimizer.AdagradOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.AdamOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'beta1', 'beta2', 'epsilon', 'regularization', 'name', 'lazy_mode', 'min_row_size_to_use_multithread'], varargs=None, keywords=None, defaults=(0.001, 0.9, 0.999, 1e-08, None, None, False, 0))
paddle.fluid.optimizer.AdamOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'beta1', 'beta2', 'epsilon', 'regularization', 'name', 'lazy_mode'], varargs=None, keywords=None, defaults=(0.001, 0.9, 0.999, 1e-08, None, None, False))
paddle.fluid.optimizer.AdamOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None)
paddle.fluid.optimizer.AdamOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None))
paddle.fluid.optimizer.AdamOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
......
......@@ -120,7 +120,7 @@ class AdamOpMaker : public framework::OpProtoAndCheckerMaker {
"min_row_size_to_use_multithread and "
"inner_op_parallelism is larger then 0, sparse update "
"will run in multithread mode")
.SetDefault(0);
.SetDefault(1000);
AddComment(R"DOC(
Adam Optimizer.
......
......@@ -494,16 +494,16 @@ class AdamOpKernel : public framework::OpKernel<T> {
<< " min_row_size_to_use_multithread="
<< min_row_size_to_use_multithread;
if (FLAGS_inner_op_parallelism > 10) {
LOG(WARNING) << "FLAGS_inner_op_parallelism "
<< FLAGS_inner_op_parallelism << " is two large!";
VLOG(1) << "FLAGS_inner_op_parallelism "
<< FLAGS_inner_op_parallelism << " is two large!";
}
auto& grad_rows = grad_merge.rows();
std::unordered_map<size_t, int> row_id_to_grad_row_offset;
size_t param_row_count = param.numel() / row_numel;
if (param_row_count < 1000) {
LOG(WARNING) << "param_row_count should be larger then 1000 to use "
"multi thread, currently "
<< param_row_count;
VLOG(1) << "param_row_count should be larger then 1000 to use "
"multi thread, currently "
<< param_row_count;
}
for (size_t i = 0; i < grad_rows.size(); ++i) {
row_id_to_grad_row_offset[grad_rows[i]] = i;
......
......@@ -734,8 +734,6 @@ class AdamOptimizer(Optimizer):
may be very slow. The lazy mode only update the element that has gradient is the current
mini-batch, so it will be much more faster. But this mode has different semantics with the
original Adam algorithm and may lead to different result.
min_row_size_to_use_multithread: if adam use sparse update and the param rows is very large,
you can use FLAGS_inner_op_parallelism and this flag to enable multi thread optimize.
Examples:
.. code-block:: python
......@@ -756,8 +754,7 @@ class AdamOptimizer(Optimizer):
epsilon=1e-8,
regularization=None,
name=None,
lazy_mode=False,
min_row_size_to_use_multithread=0):
lazy_mode=False):
assert learning_rate is not None
assert beta1 is not None
assert beta2 is not None
......@@ -771,7 +768,6 @@ class AdamOptimizer(Optimizer):
self._beta2 = beta2
self._epsilon = epsilon
self._lazy_mode = lazy_mode
self._min_row_size_to_use_multithread = min_row_size_to_use_multithread
def _create_accumulators(self, block, parameters):
assert isinstance(block, framework.Block)
......@@ -826,9 +822,7 @@ class AdamOptimizer(Optimizer):
"beta1": self._beta1,
"beta2": self._beta2,
"epsilon": self._epsilon,
"lazy_mode": self._lazy_mode,
"min_row_size_to_use_multithread":
self._min_row_size_to_use_multithread
"lazy_mode": self._lazy_mode
},
stop_gradient=True)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册