From 6d1e03a2164b0dba7297d978e5b89c2cd8676e0f Mon Sep 17 00:00:00 2001 From: zyfncg Date: Wed, 13 Apr 2022 15:06:27 +0800 Subject: [PATCH] Add yaml and unittest for SGD (#41485) * add sgd yaml * change python api * open eager mode in sgd * fix bug --- paddle/fluid/operators/optimizers/sgd_op.cc | 2 +- paddle/phi/api/lib/api_custom_impl.cc | 170 ++++++++++++++++++ paddle/phi/api/lib/api_custom_impl.h | 8 + paddle/phi/api/lib/api_gen_utils.cc | 28 ++- paddle/phi/api/lib/api_gen_utils.h | 8 +- paddle/phi/infermeta/multiary.cc | 2 +- paddle/phi/infermeta/multiary.h | 2 +- python/paddle/fluid/optimizer.py | 7 +- .../fluid/tests/unittests/test_sgd_op.py | 6 + python/paddle/optimizer/sgd.py | 7 +- python/paddle/utils/code_gen/api.yaml | 6 + 11 files changed, 234 insertions(+), 12 deletions(-) diff --git a/paddle/fluid/operators/optimizers/sgd_op.cc b/paddle/fluid/operators/optimizers/sgd_op.cc index f51d776d71..a2af131cb5 100644 --- a/paddle/fluid/operators/optimizers/sgd_op.cc +++ b/paddle/fluid/operators/optimizers/sgd_op.cc @@ -126,7 +126,7 @@ $$param\_out = param - learning\_rate * grad$$ namespace ops = paddle::operators; DECLARE_INFER_SHAPE_FUNCTOR(sgd, SGDInferShapeFunctor, - PD_INFER_META(phi::SGDInferMeta)); + PD_INFER_META(phi::SgdInferMeta)); REGISTER_OPERATOR( sgd, ops::SGDOp, ops::SGDOpMaker, paddle::framework::EmptyGradOpMaker, diff --git a/paddle/phi/api/lib/api_custom_impl.cc b/paddle/phi/api/lib/api_custom_impl.cc index d7f148fff8..2b80094a39 100644 --- a/paddle/phi/api/lib/api_custom_impl.cc +++ b/paddle/phi/api/lib/api_custom_impl.cc @@ -656,6 +656,176 @@ std::tuple momentum_impl( return api_output; } +std::tuple sgd_impl( + const Tensor& param, + const Tensor& learning_rate, + const Tensor& grad, + paddle::optional master_param, + bool multi_precision) { + DataType kernel_data_type = ParseDataType(param); + auto kernel_key_set = ParseKernelKeyByInputArgs(param, learning_rate, grad); + auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey(); + VLOG(6) << "sgd API kernel key: [" << kernel_key.backend() << ", " + << kernel_key.layout() << ", " << kernel_data_type << "]"; + + const auto& param_tensor = param.impl(); + std::string kernel_name = "sgd"; + if (phi::DenseTensor::classof(param_tensor.get())) { + if (!phi::DenseTensor::classof(grad.impl().get())) { + kernel_name = "sgd_dense_param_sparse_grad"; + } + } else { + kernel_name = "sgd_sparse_param_sparse_grad"; + } + const auto& kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError( + kernel_name, + {kernel_key.backend(), kernel_key.layout(), kernel_data_type}); + VLOG(6) << kernel_name << " API kernel: " << kernel; + + auto* dev_ctx = GetDeviceContextByBackend(kernel_key.backend()); + + auto in_learning_rate = + PrepareData(learning_rate, kernel.InputAt(1), {false, true, true, true}); + + std::tuple out; + std::get<0>(out) = param; + if (master_param) { + std::get<1>(out) = *master_param; + } + phi::MetaTensor meta_out_0(std::get<0>(out).impl().get()); + phi::MetaTensor meta_out_1(master_param ? std::get<1>(out).impl().get() + : nullptr); + + if (phi::DenseTensor::classof(param_tensor.get())) { + auto in_param = PrepareData(param, kernel.InputAt(0), {}); + auto in_master_param = PrepareData(master_param, kernel.InputAt(3), {}); + + paddle::optional in_master_param_opt = + master_param + ? paddle::make_optional(*in_master_param) + : paddle::none; + auto master_param_meta = MakeMetaTensor(in_master_param_opt); + paddle::optional master_param_meta_opt = + master_param + ? paddle::make_optional(*master_param_meta) + : paddle::none; + + phi::DenseTensor* kernel_out_0 = + SetKernelOutput(kernel_key.backend(), &std::get<0>(out)); + phi::DenseTensor* kernel_out_1 = + master_param + ? static_cast(std::get<1>(out).impl().get()) + : nullptr; + + if (phi::DenseTensor::classof(grad.impl().get())) { + auto in_grad = PrepareData(grad, kernel.InputAt(2), {}); + SgdInferMeta(MakeMetaTensor(*in_param), + MakeMetaTensor(*in_learning_rate), + MakeMetaTensor(*in_grad), + master_param_meta_opt, + multi_precision, + &meta_out_0, + &meta_out_1); + + using kernel_signature = + void (*)(const platform::DeviceContext&, + const phi::DenseTensor&, + const phi::DenseTensor&, + const phi::DenseTensor&, + paddle::optional, + bool, + phi::DenseTensor*, + phi::DenseTensor*); + + auto* kernel_fn = kernel.GetVariadicKernelFn(); + (*kernel_fn)(*dev_ctx, + *in_param, + *in_learning_rate, + *in_grad, + in_master_param_opt, + multi_precision, + kernel_out_0, + kernel_out_1); + } else { + auto in_grad = TensorToSelectedRows(grad); + SgdInferMeta(MakeMetaTensor(*in_param), + MakeMetaTensor(*in_learning_rate), + MakeMetaTensor(*in_grad), + master_param_meta_opt, + multi_precision, + &meta_out_0, + &meta_out_1); + + using kernel_signature = + void (*)(const platform::DeviceContext&, + const phi::DenseTensor&, + const phi::DenseTensor&, + const phi::SelectedRows&, + paddle::optional, + bool, + phi::DenseTensor*, + phi::DenseTensor*); + auto* kernel_fn = kernel.GetVariadicKernelFn(); + (*kernel_fn)(*dev_ctx, + *in_param, + *in_learning_rate, + *in_grad, + in_master_param_opt, + multi_precision, + kernel_out_0, + kernel_out_1); + } + } else { + auto in_param = TensorToSelectedRows(param); + auto in_grad = TensorToSelectedRows(grad); + auto in_master_param = TensorToSelectedRows(master_param); + auto in_master_param_opt = + master_param + ? paddle::make_optional(*in_master_param) + : paddle::none; + auto master_param_meta = MakeMetaTensor(in_master_param_opt); + paddle::optional master_param_meta_opt = + master_param + ? paddle::make_optional(*master_param_meta) + : paddle::none; + + phi::SelectedRows* kernel_out_0 = + SetSelectedRowsKernelOutput(kernel_key.backend(), &std::get<0>(out)); + phi::SelectedRows* kernel_out_1 = + master_param + ? static_cast(std::get<1>(out).impl().get()) + : nullptr; + + SgdInferMeta(MakeMetaTensor(*in_param), + MakeMetaTensor(*in_learning_rate), + MakeMetaTensor(*in_grad), + master_param_meta_opt, + multi_precision, + &meta_out_0, + &meta_out_1); + + using kernel_signature = + void (*)(const platform::DeviceContext&, + const phi::SelectedRows&, + const phi::DenseTensor&, + const phi::SelectedRows&, + paddle::optional, + bool, + phi::SelectedRows*, + phi::SelectedRows*); + auto* kernel_fn = kernel.GetVariadicKernelFn(); + (*kernel_fn)(*dev_ctx, + *in_param, + *in_learning_rate, + *in_grad, + in_master_param_opt, + multi_precision, + kernel_out_0, + kernel_out_1); + } + return out; +} + ////////////////// Backward(grad) api impls ////////////////////// // TODO(chenweihang): the original sum grad op can support higher-level diff --git a/paddle/phi/api/lib/api_custom_impl.h b/paddle/phi/api/lib/api_custom_impl.h index 5d46ed6918..4ddc3e5f4e 100644 --- a/paddle/phi/api/lib/api_custom_impl.h +++ b/paddle/phi/api/lib/api_custom_impl.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once +#include #include #include "paddle/phi/api/include/tensor.h" @@ -107,6 +108,13 @@ std::tuple momentum_impl( bool multi_precision, float rescale_grad); +std::tuple sgd_impl( + const Tensor& param, + const Tensor& learning_rate, + const Tensor& grad, + paddle::optional master_param, + bool multi_precision); + ////////////////// Backward(grad) api impls ////////////////////// std::vector add_n_grad_impl(const std::vector& x, diff --git a/paddle/phi/api/lib/api_gen_utils.cc b/paddle/phi/api/lib/api_gen_utils.cc index f9db152956..e0c910ba3d 100644 --- a/paddle/phi/api/lib/api_gen_utils.cc +++ b/paddle/phi/api/lib/api_gen_utils.cc @@ -20,13 +20,13 @@ namespace experimental { /* ------------------ for input ----------------------- */ std::shared_ptr TensorToDenseTensor(const Tensor& tensor) { - return std::dynamic_pointer_cast(tensor.impl()); + return std::static_pointer_cast(tensor.impl()); } std::shared_ptr TensorToDenseTensor( - const paddle::optional& tensor) { + const paddle::optional& tensor) { if (tensor) { - return std::dynamic_pointer_cast(tensor->impl()); + return std::static_pointer_cast(tensor->impl()); } return nullptr; } @@ -45,13 +45,13 @@ std::unique_ptr> TensorToDenseTensor( } std::shared_ptr TensorToSelectedRows(const Tensor& tensor) { - return std::dynamic_pointer_cast(tensor.impl()); + return std::static_pointer_cast(tensor.impl()); } std::shared_ptr TensorToSelectedRows( - const paddle::optional& tensor) { + const paddle::optional& tensor) { if (tensor) { - return std::dynamic_pointer_cast(tensor->impl()); + return std::static_pointer_cast(tensor->impl()); } return nullptr; } @@ -66,6 +66,14 @@ phi::MetaTensor MakeMetaTensor(const phi::DenseTensor& tensor) { return phi::MetaTensor(tensor); } +paddle::optional MakeMetaTensor( + const paddle::optional& tensor) { + if (tensor) { + return {phi::MetaTensor(*tensor)}; + } + return {paddle::none}; +} + std::vector MakeMetaTensor( const std::vector& tensors) { std::vector meta_tensors; @@ -90,6 +98,14 @@ phi::MetaTensor MakeMetaTensor(const phi::SelectedRows& tensor) { return phi::MetaTensor(tensor); } +paddle::optional MakeMetaTensor( + const paddle::optional& tensor) { + if (tensor) { + return {phi::MetaTensor(*tensor)}; + } + return {paddle::none}; +} + phi::MetaTensor MakeMetaTensor(const phi::StringTensor& tensor) { return phi::MetaTensor(tensor); } diff --git a/paddle/phi/api/lib/api_gen_utils.h b/paddle/phi/api/lib/api_gen_utils.h index 035dfc5204..47b80bb3fc 100644 --- a/paddle/phi/api/lib/api_gen_utils.h +++ b/paddle/phi/api/lib/api_gen_utils.h @@ -42,7 +42,7 @@ std::unique_ptr> TensorToDenseTensor( std::shared_ptr TensorToSelectedRows(const Tensor& tensor); std::shared_ptr TensorToSelectedRows( - const paddle::optional& tensor); + const paddle::optional& tensor); std::shared_ptr TensorToStringTensor(const Tensor& tensor); @@ -50,6 +50,9 @@ std::shared_ptr TensorToStringTensor(const Tensor& tensor); phi::MetaTensor MakeMetaTensor(const phi::DenseTensor& tensor); +paddle::optional MakeMetaTensor( + const paddle::optional& tensor); + std::vector MakeMetaTensor( const std::vector& tensors); @@ -58,6 +61,9 @@ std::vector MakeMetaTensor( phi::MetaTensor MakeMetaTensor(const phi::SelectedRows& tensor); +paddle::optional MakeMetaTensor( + const paddle::optional& tensor); + phi::MetaTensor MakeMetaTensor(const phi::StringTensor& tensor); /* ------------------ for output ----------------------- */ diff --git a/paddle/phi/infermeta/multiary.cc b/paddle/phi/infermeta/multiary.cc index f2acfe5a99..5fecd3740e 100644 --- a/paddle/phi/infermeta/multiary.cc +++ b/paddle/phi/infermeta/multiary.cc @@ -1887,7 +1887,7 @@ void RnnInferMeta(const MetaTensor& x, } } -void SGDInferMeta(const MetaTensor& param, +void SgdInferMeta(const MetaTensor& param, const MetaTensor& learning_rate, const MetaTensor& grad, paddle::optional master_param, diff --git a/paddle/phi/infermeta/multiary.h b/paddle/phi/infermeta/multiary.h index c037641d08..9137b574ac 100644 --- a/paddle/phi/infermeta/multiary.h +++ b/paddle/phi/infermeta/multiary.h @@ -292,7 +292,7 @@ void RnnInferMeta(const MetaTensor& x, std::vector state, MetaTensor* reserve); -void SGDInferMeta(const MetaTensor& param, +void SgdInferMeta(const MetaTensor& param, const MetaTensor& learning_rate, const MetaTensor& grad, paddle::optional master_param, diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 8242d8e339..95db9d39c1 100755 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -44,6 +44,7 @@ from .wrapped_decorator import signature_safe_contextmanager from .. import compat as cpt import warnings from paddle import _C_ops +from ..fluid.framework import _in_legacy_dygraph, in_dygraph_mode __all__ = [ 'SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'Dpsgd', 'DecayedAdagrad', @@ -1370,7 +1371,11 @@ class SGDOptimizer(Optimizer): if find_master else None) lr = self._create_param_lr(param_and_grad) - if framework._non_static_mode(): + if in_dygraph_mode(): + _C_ops.final_state_sgd(param_and_grad[0], lr, param_and_grad[1], + master_weight, find_master) + return None + if _in_legacy_dygraph(): _C_ops.sgd(param_and_grad[0], lr, param_and_grad[1], master_weight, param_and_grad[0], master_weight) return None diff --git a/python/paddle/fluid/tests/unittests/test_sgd_op.py b/python/paddle/fluid/tests/unittests/test_sgd_op.py index 817150a21f..ad03fa3000 100644 --- a/python/paddle/fluid/tests/unittests/test_sgd_op.py +++ b/python/paddle/fluid/tests/unittests/test_sgd_op.py @@ -21,6 +21,7 @@ import paddle.fluid.core as core from paddle.fluid.op import Operator from op_test import OpTest import paddle +from paddle.fluid.framework import _test_eager_guard paddle.enable_static() @@ -291,6 +292,11 @@ class TestSGDV2(unittest.TestCase): adam.step() adam.clear_gradients() + def test_eager(self): + with _test_eager_guard(): + self.test_sgd_dygraph() + self.test_sgd_group_dygraph() + class TestSGDMultiPrecision2_0(unittest.TestCase): def dygraph_sgd_mp(self, mp): diff --git a/python/paddle/optimizer/sgd.py b/python/paddle/optimizer/sgd.py index fdee57bb12..46dd0b73a5 100644 --- a/python/paddle/optimizer/sgd.py +++ b/python/paddle/optimizer/sgd.py @@ -22,6 +22,7 @@ import warnings from ..fluid.layer_helper import LayerHelper from ..fluid import unique_name from ..fluid import layers +from ..fluid.framework import _in_legacy_dygraph, in_dygraph_mode __all__ = [] @@ -144,7 +145,11 @@ class SGD(Optimizer): if find_master else None) lr = self._create_param_lr(param_and_grad) - if framework._non_static_mode(): + if in_dygraph_mode(): + _C_ops.final_state_sgd(param_and_grad[0], lr, param_and_grad[1], + master_weight, find_master) + return None + if _in_legacy_dygraph(): _C_ops.sgd(param_and_grad[0], lr, param_and_grad[1], master_weight, param_and_grad[0], master_weight) return None diff --git a/python/paddle/utils/code_gen/api.yaml b/python/paddle/utils/code_gen/api.yaml index 329882317e..b4abe5b303 100644 --- a/python/paddle/utils/code_gen/api.yaml +++ b/python/paddle/utils/code_gen/api.yaml @@ -1794,6 +1794,12 @@ func : selu backward : selu_grad +- api : sgd + args : (Tensor param, Tensor learning_rate, Tensor grad, Tensor master_param, bool multi_precision) + output : Tensor(param_out), Tensor(master_param_out) + invoke : sgd_impl(param, learning_rate, grad, master_param, multi_precision) + optional : master_param + - api : shape args : (Tensor input) output : Tensor -- GitLab