提交 44b30055 编写于 作者: Q Qiao Longfei

change min_row_size_to_use_multithread to parameter of adam

test=develop
上级 87b4eb1d
......@@ -30,7 +30,6 @@ DEFINE_bool(check_nan_inf, false,
"Checking whether operator produce NAN/INF or not. It will be "
"extremely slow so please use this flag wisely.");
DEFINE_int32(inner_op_parallelism, 0, "number of threads for inner op");
DEFINE_int32(min_row_size_to_use_multithread, 0, "");
namespace paddle {
namespace framework {
......
......@@ -35,7 +35,6 @@ limitations under the License. */
#include "paddle/fluid/platform/variant.h"
DECLARE_int32(inner_op_parallelism);
DECLARE_int32(min_row_size_to_use_multithread);
namespace paddle {
namespace framework {
......
......@@ -114,6 +114,13 @@ class AdamOpMaker : public framework::OpProtoAndCheckerMaker {
"(bool, default false) "
"only update the parameter that has gradient in sparse update")
.SetDefault(false);
AddAttr<int64_t>("min_row_size_to_use_multithread",
"(int64_t, default 0) "
"when not zero, if param row size is larger then "
"min_row_size_to_use_multithread and "
"inner_op_parallelism is larger then 0, sparse update "
"will run in multithread mode")
.SetDefault(0);
AddComment(R"DOC(
Adam Optimizer.
......
......@@ -354,6 +354,8 @@ class AdamOpKernel : public framework::OpKernel<T> {
using paddle::framework::LoDTensor;
using paddle::operators::detail::Ref;
int64_t min_row_size_to_use_multithread =
ctx.Attr<int64_t>("min_row_size_to_use_multithread");
bool lazy_mode = ctx.Attr<bool>("lazy_mode");
T beta1 = static_cast<T>(ctx.Attr<float>("beta1"));
T beta2 = static_cast<T>(ctx.Attr<float>("beta2"));
......@@ -478,12 +480,12 @@ class AdamOpKernel : public framework::OpKernel<T> {
}
}
} else if (FLAGS_inner_op_parallelism > 1 &&
FLAGS_min_row_size_to_use_multithread > 0 &&
param.dims()[0] > FLAGS_min_row_size_to_use_multithread) {
min_row_size_to_use_multithread > 0 &&
param.dims()[0] > min_row_size_to_use_multithread) {
VLOG(3) << "use multi thread, inner_op_parallelism="
<< FLAGS_inner_op_parallelism
<< " min_row_size_to_use_multithread="
<< FLAGS_min_row_size_to_use_multithread;
<< min_row_size_to_use_multithread;
if (FLAGS_inner_op_parallelism > 10) {
LOG(WARNING) << "FLAGS_inner_op_parallelism "
<< FLAGS_inner_op_parallelism << " is two large!";
......
......@@ -129,8 +129,7 @@ def __bootstrap__():
'eager_delete_tensor_gb', 'fast_eager_deletion_mode',
'allocator_strategy', 'reader_queue_speed_test_mode',
'print_sub_graph_dir', 'pe_profile_fname', 'warpctc_dir',
'inner_op_parallelism', 'min_row_size_to_use_multithread',
'enable_parallel_graph'
'inner_op_parallelism', 'enable_parallel_graph'
]
if 'Darwin' not in sysstr:
read_env_flags.append('use_pinned_memory')
......
......@@ -674,6 +674,8 @@ class AdamOptimizer(Optimizer):
may be very slow. The lazy mode only update the element that has gradient is the current
mini-batch, so it will be much more faster. But this mode has different semantics with the
original Adam algorithm and may lead to different result.
min_row_size_to_use_multithread: if adam use sparse update and the param rows is very large,
you can use FLAGS_inner_op_parallelism and this flag to enable multi thread optimize.
Examples:
.. code-block:: python
......@@ -694,7 +696,8 @@ class AdamOptimizer(Optimizer):
epsilon=1e-8,
regularization=None,
name=None,
lazy_mode=False):
lazy_mode=False,
min_row_size_to_use_multithread=0):
assert learning_rate is not None
assert beta1 is not None
assert beta2 is not None
......@@ -708,6 +711,7 @@ class AdamOptimizer(Optimizer):
self._beta2 = beta2
self._epsilon = epsilon
self._lazy_mode = lazy_mode
self._min_row_size_to_use_multithread = min_row_size_to_use_multithread
def _create_accumulators(self, block, parameters):
assert isinstance(block, framework.Block)
......@@ -762,7 +766,9 @@ class AdamOptimizer(Optimizer):
"beta1": self._beta1,
"beta2": self._beta2,
"epsilon": self._epsilon,
"lazy_mode": self._lazy_mode
"lazy_mode": self._lazy_mode,
"min_row_size_to_use_multithread":
self._min_row_size_to_use_multithread
},
stop_gradient=True)
......
......@@ -87,7 +87,7 @@ list(REMOVE_ITEM TEST_OPS test_nearest_interp_op)
foreach(TEST_OP ${TEST_OPS})
py_test_modules(${TEST_OP} MODULES ${TEST_OP})
endforeach(TEST_OP)
py_test_modules(test_adam_op_multi_thread MODULES test_adam_op ENVS FLAGS_inner_op_parallelism=4 FLAGS_min_row_size_to_use_multithread=2)
py_test_modules(test_adam_op_multi_thread MODULES test_adam_op ENVS FLAGS_inner_op_parallelism=4)
py_test_modules(test_warpctc_op MODULES test_warpctc_op ENVS FLAGS_warpctc_dir=${WARPCTC_LIB_DIR} SERIAL)
py_test_modules(test_bilinear_interp_op MODULES test_bilinear_interp_op SERIAL)
py_test_modules(test_nearest_interp_op MODULES test_nearest_interp_op SERIAL)
......
......@@ -261,7 +261,12 @@ class TestSparseAdamOp(unittest.TestCase):
"LearningRate": np.full((1), 2.0).astype("float32")
}
self.init_output = np.full((height, row_numel), 0.0).astype("float32")
self.attrs = {'epsilon': epsilon, 'beta1': beta1, 'beta2': beta2}
self.attrs = {
'epsilon': epsilon,
'beta1': beta1,
'beta2': beta2,
'min_row_size_to_use_multithread': 2
}
grad_selected_rows = scope.var('Grad').get_selected_rows()
grad_selected_rows.set_height(height)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册