From 44b300556dcdf26aa159bc31107355e8b3853d86 Mon Sep 17 00:00:00 2001 From: Qiao Longfei Date: Mon, 7 Jan 2019 17:34:52 +0800 Subject: [PATCH] change min_row_size_to_use_multithread to parameter of adam test=develop --- paddle/fluid/framework/operator.cc | 1 - paddle/fluid/framework/operator.h | 1 - paddle/fluid/operators/optimizers/adam_op.cc | 7 +++++++ paddle/fluid/operators/optimizers/adam_op.h | 8 +++++--- python/paddle/fluid/__init__.py | 3 +-- python/paddle/fluid/optimizer.py | 10 ++++++++-- python/paddle/fluid/tests/unittests/CMakeLists.txt | 2 +- python/paddle/fluid/tests/unittests/test_adam_op.py | 7 ++++++- 8 files changed, 28 insertions(+), 11 deletions(-) diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index 9cb2b5ee7..afece8e3d 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -30,7 +30,6 @@ DEFINE_bool(check_nan_inf, false, "Checking whether operator produce NAN/INF or not. It will be " "extremely slow so please use this flag wisely."); DEFINE_int32(inner_op_parallelism, 0, "number of threads for inner op"); -DEFINE_int32(min_row_size_to_use_multithread, 0, ""); namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h index 2962dff12..dd672c479 100644 --- a/paddle/fluid/framework/operator.h +++ b/paddle/fluid/framework/operator.h @@ -35,7 +35,6 @@ limitations under the License. */ #include "paddle/fluid/platform/variant.h" DECLARE_int32(inner_op_parallelism); -DECLARE_int32(min_row_size_to_use_multithread); namespace paddle { namespace framework { diff --git a/paddle/fluid/operators/optimizers/adam_op.cc b/paddle/fluid/operators/optimizers/adam_op.cc index e9c395a93..955f9f455 100644 --- a/paddle/fluid/operators/optimizers/adam_op.cc +++ b/paddle/fluid/operators/optimizers/adam_op.cc @@ -114,6 +114,13 @@ class AdamOpMaker : public framework::OpProtoAndCheckerMaker { "(bool, default false) " "only update the parameter that has gradient in sparse update") .SetDefault(false); + AddAttr("min_row_size_to_use_multithread", + "(int64_t, default 0) " + "when not zero, if param row size is larger then " + "min_row_size_to_use_multithread and " + "inner_op_parallelism is larger then 0, sparse update " + "will run in multithread mode") + .SetDefault(0); AddComment(R"DOC( Adam Optimizer. diff --git a/paddle/fluid/operators/optimizers/adam_op.h b/paddle/fluid/operators/optimizers/adam_op.h index 9cd790687..2c16a02f6 100644 --- a/paddle/fluid/operators/optimizers/adam_op.h +++ b/paddle/fluid/operators/optimizers/adam_op.h @@ -354,6 +354,8 @@ class AdamOpKernel : public framework::OpKernel { using paddle::framework::LoDTensor; using paddle::operators::detail::Ref; + int64_t min_row_size_to_use_multithread = + ctx.Attr("min_row_size_to_use_multithread"); bool lazy_mode = ctx.Attr("lazy_mode"); T beta1 = static_cast(ctx.Attr("beta1")); T beta2 = static_cast(ctx.Attr("beta2")); @@ -478,12 +480,12 @@ class AdamOpKernel : public framework::OpKernel { } } } else if (FLAGS_inner_op_parallelism > 1 && - FLAGS_min_row_size_to_use_multithread > 0 && - param.dims()[0] > FLAGS_min_row_size_to_use_multithread) { + min_row_size_to_use_multithread > 0 && + param.dims()[0] > min_row_size_to_use_multithread) { VLOG(3) << "use multi thread, inner_op_parallelism=" << FLAGS_inner_op_parallelism << " min_row_size_to_use_multithread=" - << FLAGS_min_row_size_to_use_multithread; + << min_row_size_to_use_multithread; if (FLAGS_inner_op_parallelism > 10) { LOG(WARNING) << "FLAGS_inner_op_parallelism " << FLAGS_inner_op_parallelism << " is two large!"; diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py index b577dfc3e..812694d99 100644 --- a/python/paddle/fluid/__init__.py +++ b/python/paddle/fluid/__init__.py @@ -129,8 +129,7 @@ def __bootstrap__(): 'eager_delete_tensor_gb', 'fast_eager_deletion_mode', 'allocator_strategy', 'reader_queue_speed_test_mode', 'print_sub_graph_dir', 'pe_profile_fname', 'warpctc_dir', - 'inner_op_parallelism', 'min_row_size_to_use_multithread', - 'enable_parallel_graph' + 'inner_op_parallelism', 'enable_parallel_graph' ] if 'Darwin' not in sysstr: read_env_flags.append('use_pinned_memory') diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 779cb5f96..64d7fd082 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -674,6 +674,8 @@ class AdamOptimizer(Optimizer): may be very slow. The lazy mode only update the element that has gradient is the current mini-batch, so it will be much more faster. But this mode has different semantics with the original Adam algorithm and may lead to different result. + min_row_size_to_use_multithread: if adam use sparse update and the param rows is very large, + you can use FLAGS_inner_op_parallelism and this flag to enable multi thread optimize. Examples: .. code-block:: python @@ -694,7 +696,8 @@ class AdamOptimizer(Optimizer): epsilon=1e-8, regularization=None, name=None, - lazy_mode=False): + lazy_mode=False, + min_row_size_to_use_multithread=0): assert learning_rate is not None assert beta1 is not None assert beta2 is not None @@ -708,6 +711,7 @@ class AdamOptimizer(Optimizer): self._beta2 = beta2 self._epsilon = epsilon self._lazy_mode = lazy_mode + self._min_row_size_to_use_multithread = min_row_size_to_use_multithread def _create_accumulators(self, block, parameters): assert isinstance(block, framework.Block) @@ -762,7 +766,9 @@ class AdamOptimizer(Optimizer): "beta1": self._beta1, "beta2": self._beta2, "epsilon": self._epsilon, - "lazy_mode": self._lazy_mode + "lazy_mode": self._lazy_mode, + "min_row_size_to_use_multithread": + self._min_row_size_to_use_multithread }, stop_gradient=True) diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index ac092e19b..4f7111df4 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -87,7 +87,7 @@ list(REMOVE_ITEM TEST_OPS test_nearest_interp_op) foreach(TEST_OP ${TEST_OPS}) py_test_modules(${TEST_OP} MODULES ${TEST_OP}) endforeach(TEST_OP) -py_test_modules(test_adam_op_multi_thread MODULES test_adam_op ENVS FLAGS_inner_op_parallelism=4 FLAGS_min_row_size_to_use_multithread=2) +py_test_modules(test_adam_op_multi_thread MODULES test_adam_op ENVS FLAGS_inner_op_parallelism=4) py_test_modules(test_warpctc_op MODULES test_warpctc_op ENVS FLAGS_warpctc_dir=${WARPCTC_LIB_DIR} SERIAL) py_test_modules(test_bilinear_interp_op MODULES test_bilinear_interp_op SERIAL) py_test_modules(test_nearest_interp_op MODULES test_nearest_interp_op SERIAL) diff --git a/python/paddle/fluid/tests/unittests/test_adam_op.py b/python/paddle/fluid/tests/unittests/test_adam_op.py index 463a0655a..2f4fc5772 100644 --- a/python/paddle/fluid/tests/unittests/test_adam_op.py +++ b/python/paddle/fluid/tests/unittests/test_adam_op.py @@ -261,7 +261,12 @@ class TestSparseAdamOp(unittest.TestCase): "LearningRate": np.full((1), 2.0).astype("float32") } self.init_output = np.full((height, row_numel), 0.0).astype("float32") - self.attrs = {'epsilon': epsilon, 'beta1': beta1, 'beta2': beta2} + self.attrs = { + 'epsilon': epsilon, + 'beta1': beta1, + 'beta2': beta2, + 'min_row_size_to_use_multithread': 2 + } grad_selected_rows = scope.var('Grad').get_selected_rows() grad_selected_rows.set_height(height) -- GitLab