未验证 提交 6d1e03a2 编写于 作者: Z zyfncg 提交者: GitHub

Add yaml and unittest for SGD (#41485)

* add sgd yaml

* change python api

* open eager mode in sgd

* fix bug
上级 b0b75169
...@@ -126,7 +126,7 @@ $$param\_out = param - learning\_rate * grad$$ ...@@ -126,7 +126,7 @@ $$param\_out = param - learning\_rate * grad$$
namespace ops = paddle::operators; namespace ops = paddle::operators;
DECLARE_INFER_SHAPE_FUNCTOR(sgd, SGDInferShapeFunctor, DECLARE_INFER_SHAPE_FUNCTOR(sgd, SGDInferShapeFunctor,
PD_INFER_META(phi::SGDInferMeta)); PD_INFER_META(phi::SgdInferMeta));
REGISTER_OPERATOR( REGISTER_OPERATOR(
sgd, ops::SGDOp, ops::SGDOpMaker, sgd, ops::SGDOp, ops::SGDOpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>, paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
......
...@@ -656,6 +656,176 @@ std::tuple<Tensor, Tensor, Tensor> momentum_impl( ...@@ -656,6 +656,176 @@ std::tuple<Tensor, Tensor, Tensor> momentum_impl(
return api_output; return api_output;
} }
std::tuple<Tensor, Tensor> sgd_impl(
const Tensor& param,
const Tensor& learning_rate,
const Tensor& grad,
paddle::optional<const Tensor&> master_param,
bool multi_precision) {
DataType kernel_data_type = ParseDataType(param);
auto kernel_key_set = ParseKernelKeyByInputArgs(param, learning_rate, grad);
auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey();
VLOG(6) << "sgd API kernel key: [" << kernel_key.backend() << ", "
<< kernel_key.layout() << ", " << kernel_data_type << "]";
const auto& param_tensor = param.impl();
std::string kernel_name = "sgd";
if (phi::DenseTensor::classof(param_tensor.get())) {
if (!phi::DenseTensor::classof(grad.impl().get())) {
kernel_name = "sgd_dense_param_sparse_grad";
}
} else {
kernel_name = "sgd_sparse_param_sparse_grad";
}
const auto& kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError(
kernel_name,
{kernel_key.backend(), kernel_key.layout(), kernel_data_type});
VLOG(6) << kernel_name << " API kernel: " << kernel;
auto* dev_ctx = GetDeviceContextByBackend(kernel_key.backend());
auto in_learning_rate =
PrepareData(learning_rate, kernel.InputAt(1), {false, true, true, true});
std::tuple<Tensor, Tensor> out;
std::get<0>(out) = param;
if (master_param) {
std::get<1>(out) = *master_param;
}
phi::MetaTensor meta_out_0(std::get<0>(out).impl().get());
phi::MetaTensor meta_out_1(master_param ? std::get<1>(out).impl().get()
: nullptr);
if (phi::DenseTensor::classof(param_tensor.get())) {
auto in_param = PrepareData(param, kernel.InputAt(0), {});
auto in_master_param = PrepareData(master_param, kernel.InputAt(3), {});
paddle::optional<const phi::DenseTensor&> in_master_param_opt =
master_param
? paddle::make_optional<const phi::DenseTensor&>(*in_master_param)
: paddle::none;
auto master_param_meta = MakeMetaTensor(in_master_param_opt);
paddle::optional<const phi::MetaTensor&> master_param_meta_opt =
master_param
? paddle::make_optional<const phi::MetaTensor&>(*master_param_meta)
: paddle::none;
phi::DenseTensor* kernel_out_0 =
SetKernelOutput(kernel_key.backend(), &std::get<0>(out));
phi::DenseTensor* kernel_out_1 =
master_param
? static_cast<phi::DenseTensor*>(std::get<1>(out).impl().get())
: nullptr;
if (phi::DenseTensor::classof(grad.impl().get())) {
auto in_grad = PrepareData(grad, kernel.InputAt(2), {});
SgdInferMeta(MakeMetaTensor(*in_param),
MakeMetaTensor(*in_learning_rate),
MakeMetaTensor(*in_grad),
master_param_meta_opt,
multi_precision,
&meta_out_0,
&meta_out_1);
using kernel_signature =
void (*)(const platform::DeviceContext&,
const phi::DenseTensor&,
const phi::DenseTensor&,
const phi::DenseTensor&,
paddle::optional<const phi::DenseTensor&>,
bool,
phi::DenseTensor*,
phi::DenseTensor*);
auto* kernel_fn = kernel.GetVariadicKernelFn<kernel_signature>();
(*kernel_fn)(*dev_ctx,
*in_param,
*in_learning_rate,
*in_grad,
in_master_param_opt,
multi_precision,
kernel_out_0,
kernel_out_1);
} else {
auto in_grad = TensorToSelectedRows(grad);
SgdInferMeta(MakeMetaTensor(*in_param),
MakeMetaTensor(*in_learning_rate),
MakeMetaTensor(*in_grad),
master_param_meta_opt,
multi_precision,
&meta_out_0,
&meta_out_1);
using kernel_signature =
void (*)(const platform::DeviceContext&,
const phi::DenseTensor&,
const phi::DenseTensor&,
const phi::SelectedRows&,
paddle::optional<const phi::DenseTensor&>,
bool,
phi::DenseTensor*,
phi::DenseTensor*);
auto* kernel_fn = kernel.GetVariadicKernelFn<kernel_signature>();
(*kernel_fn)(*dev_ctx,
*in_param,
*in_learning_rate,
*in_grad,
in_master_param_opt,
multi_precision,
kernel_out_0,
kernel_out_1);
}
} else {
auto in_param = TensorToSelectedRows(param);
auto in_grad = TensorToSelectedRows(grad);
auto in_master_param = TensorToSelectedRows(master_param);
auto in_master_param_opt =
master_param
? paddle::make_optional<const phi::SelectedRows&>(*in_master_param)
: paddle::none;
auto master_param_meta = MakeMetaTensor(in_master_param_opt);
paddle::optional<const phi::MetaTensor&> master_param_meta_opt =
master_param
? paddle::make_optional<const phi::MetaTensor&>(*master_param_meta)
: paddle::none;
phi::SelectedRows* kernel_out_0 =
SetSelectedRowsKernelOutput(kernel_key.backend(), &std::get<0>(out));
phi::SelectedRows* kernel_out_1 =
master_param
? static_cast<phi::SelectedRows*>(std::get<1>(out).impl().get())
: nullptr;
SgdInferMeta(MakeMetaTensor(*in_param),
MakeMetaTensor(*in_learning_rate),
MakeMetaTensor(*in_grad),
master_param_meta_opt,
multi_precision,
&meta_out_0,
&meta_out_1);
using kernel_signature =
void (*)(const platform::DeviceContext&,
const phi::SelectedRows&,
const phi::DenseTensor&,
const phi::SelectedRows&,
paddle::optional<const phi::SelectedRows&>,
bool,
phi::SelectedRows*,
phi::SelectedRows*);
auto* kernel_fn = kernel.GetVariadicKernelFn<kernel_signature>();
(*kernel_fn)(*dev_ctx,
*in_param,
*in_learning_rate,
*in_grad,
in_master_param_opt,
multi_precision,
kernel_out_0,
kernel_out_1);
}
return out;
}
////////////////// Backward(grad) api impls ////////////////////// ////////////////// Backward(grad) api impls //////////////////////
// TODO(chenweihang): the original sum grad op can support higher-level // TODO(chenweihang): the original sum grad op can support higher-level
......
...@@ -14,6 +14,7 @@ limitations under the License. */ ...@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once #pragma once
#include <tuple>
#include <vector> #include <vector>
#include "paddle/phi/api/include/tensor.h" #include "paddle/phi/api/include/tensor.h"
...@@ -107,6 +108,13 @@ std::tuple<Tensor, Tensor, Tensor> momentum_impl( ...@@ -107,6 +108,13 @@ std::tuple<Tensor, Tensor, Tensor> momentum_impl(
bool multi_precision, bool multi_precision,
float rescale_grad); float rescale_grad);
std::tuple<Tensor, Tensor> sgd_impl(
const Tensor& param,
const Tensor& learning_rate,
const Tensor& grad,
paddle::optional<const Tensor&> master_param,
bool multi_precision);
////////////////// Backward(grad) api impls ////////////////////// ////////////////// Backward(grad) api impls //////////////////////
std::vector<Tensor> add_n_grad_impl(const std::vector<Tensor>& x, std::vector<Tensor> add_n_grad_impl(const std::vector<Tensor>& x,
......
...@@ -20,13 +20,13 @@ namespace experimental { ...@@ -20,13 +20,13 @@ namespace experimental {
/* ------------------ for input ----------------------- */ /* ------------------ for input ----------------------- */
std::shared_ptr<phi::DenseTensor> TensorToDenseTensor(const Tensor& tensor) { std::shared_ptr<phi::DenseTensor> TensorToDenseTensor(const Tensor& tensor) {
return std::dynamic_pointer_cast<phi::DenseTensor>(tensor.impl()); return std::static_pointer_cast<phi::DenseTensor>(tensor.impl());
} }
std::shared_ptr<phi::DenseTensor> TensorToDenseTensor( std::shared_ptr<phi::DenseTensor> TensorToDenseTensor(
const paddle::optional<Tensor>& tensor) { const paddle::optional<const Tensor&>& tensor) {
if (tensor) { if (tensor) {
return std::dynamic_pointer_cast<phi::DenseTensor>(tensor->impl()); return std::static_pointer_cast<phi::DenseTensor>(tensor->impl());
} }
return nullptr; return nullptr;
} }
...@@ -45,13 +45,13 @@ std::unique_ptr<std::vector<phi::DenseTensor>> TensorToDenseTensor( ...@@ -45,13 +45,13 @@ std::unique_ptr<std::vector<phi::DenseTensor>> TensorToDenseTensor(
} }
std::shared_ptr<phi::SelectedRows> TensorToSelectedRows(const Tensor& tensor) { std::shared_ptr<phi::SelectedRows> TensorToSelectedRows(const Tensor& tensor) {
return std::dynamic_pointer_cast<phi::SelectedRows>(tensor.impl()); return std::static_pointer_cast<phi::SelectedRows>(tensor.impl());
} }
std::shared_ptr<phi::SelectedRows> TensorToSelectedRows( std::shared_ptr<phi::SelectedRows> TensorToSelectedRows(
const paddle::optional<Tensor>& tensor) { const paddle::optional<const Tensor&>& tensor) {
if (tensor) { if (tensor) {
return std::dynamic_pointer_cast<phi::SelectedRows>(tensor->impl()); return std::static_pointer_cast<phi::SelectedRows>(tensor->impl());
} }
return nullptr; return nullptr;
} }
...@@ -66,6 +66,14 @@ phi::MetaTensor MakeMetaTensor(const phi::DenseTensor& tensor) { ...@@ -66,6 +66,14 @@ phi::MetaTensor MakeMetaTensor(const phi::DenseTensor& tensor) {
return phi::MetaTensor(tensor); return phi::MetaTensor(tensor);
} }
paddle::optional<phi::MetaTensor> MakeMetaTensor(
const paddle::optional<const phi::DenseTensor&>& tensor) {
if (tensor) {
return {phi::MetaTensor(*tensor)};
}
return {paddle::none};
}
std::vector<phi::MetaTensor> MakeMetaTensor( std::vector<phi::MetaTensor> MakeMetaTensor(
const std::vector<const phi::DenseTensor*>& tensors) { const std::vector<const phi::DenseTensor*>& tensors) {
std::vector<phi::MetaTensor> meta_tensors; std::vector<phi::MetaTensor> meta_tensors;
...@@ -90,6 +98,14 @@ phi::MetaTensor MakeMetaTensor(const phi::SelectedRows& tensor) { ...@@ -90,6 +98,14 @@ phi::MetaTensor MakeMetaTensor(const phi::SelectedRows& tensor) {
return phi::MetaTensor(tensor); return phi::MetaTensor(tensor);
} }
paddle::optional<phi::MetaTensor> MakeMetaTensor(
const paddle::optional<const phi::SelectedRows&>& tensor) {
if (tensor) {
return {phi::MetaTensor(*tensor)};
}
return {paddle::none};
}
phi::MetaTensor MakeMetaTensor(const phi::StringTensor& tensor) { phi::MetaTensor MakeMetaTensor(const phi::StringTensor& tensor) {
return phi::MetaTensor(tensor); return phi::MetaTensor(tensor);
} }
......
...@@ -42,7 +42,7 @@ std::unique_ptr<std::vector<phi::DenseTensor>> TensorToDenseTensor( ...@@ -42,7 +42,7 @@ std::unique_ptr<std::vector<phi::DenseTensor>> TensorToDenseTensor(
std::shared_ptr<phi::SelectedRows> TensorToSelectedRows(const Tensor& tensor); std::shared_ptr<phi::SelectedRows> TensorToSelectedRows(const Tensor& tensor);
std::shared_ptr<phi::SelectedRows> TensorToSelectedRows( std::shared_ptr<phi::SelectedRows> TensorToSelectedRows(
const paddle::optional<Tensor>& tensor); const paddle::optional<const Tensor&>& tensor);
std::shared_ptr<phi::StringTensor> TensorToStringTensor(const Tensor& tensor); std::shared_ptr<phi::StringTensor> TensorToStringTensor(const Tensor& tensor);
...@@ -50,6 +50,9 @@ std::shared_ptr<phi::StringTensor> TensorToStringTensor(const Tensor& tensor); ...@@ -50,6 +50,9 @@ std::shared_ptr<phi::StringTensor> TensorToStringTensor(const Tensor& tensor);
phi::MetaTensor MakeMetaTensor(const phi::DenseTensor& tensor); phi::MetaTensor MakeMetaTensor(const phi::DenseTensor& tensor);
paddle::optional<phi::MetaTensor> MakeMetaTensor(
const paddle::optional<const phi::DenseTensor&>& tensor);
std::vector<phi::MetaTensor> MakeMetaTensor( std::vector<phi::MetaTensor> MakeMetaTensor(
const std::vector<const phi::DenseTensor*>& tensors); const std::vector<const phi::DenseTensor*>& tensors);
...@@ -58,6 +61,9 @@ std::vector<phi::MetaTensor> MakeMetaTensor( ...@@ -58,6 +61,9 @@ std::vector<phi::MetaTensor> MakeMetaTensor(
phi::MetaTensor MakeMetaTensor(const phi::SelectedRows& tensor); phi::MetaTensor MakeMetaTensor(const phi::SelectedRows& tensor);
paddle::optional<phi::MetaTensor> MakeMetaTensor(
const paddle::optional<const phi::SelectedRows&>& tensor);
phi::MetaTensor MakeMetaTensor(const phi::StringTensor& tensor); phi::MetaTensor MakeMetaTensor(const phi::StringTensor& tensor);
/* ------------------ for output ----------------------- */ /* ------------------ for output ----------------------- */
......
...@@ -1887,7 +1887,7 @@ void RnnInferMeta(const MetaTensor& x, ...@@ -1887,7 +1887,7 @@ void RnnInferMeta(const MetaTensor& x,
} }
} }
void SGDInferMeta(const MetaTensor& param, void SgdInferMeta(const MetaTensor& param,
const MetaTensor& learning_rate, const MetaTensor& learning_rate,
const MetaTensor& grad, const MetaTensor& grad,
paddle::optional<const MetaTensor&> master_param, paddle::optional<const MetaTensor&> master_param,
......
...@@ -292,7 +292,7 @@ void RnnInferMeta(const MetaTensor& x, ...@@ -292,7 +292,7 @@ void RnnInferMeta(const MetaTensor& x,
std::vector<MetaTensor*> state, std::vector<MetaTensor*> state,
MetaTensor* reserve); MetaTensor* reserve);
void SGDInferMeta(const MetaTensor& param, void SgdInferMeta(const MetaTensor& param,
const MetaTensor& learning_rate, const MetaTensor& learning_rate,
const MetaTensor& grad, const MetaTensor& grad,
paddle::optional<const MetaTensor&> master_param, paddle::optional<const MetaTensor&> master_param,
......
...@@ -44,6 +44,7 @@ from .wrapped_decorator import signature_safe_contextmanager ...@@ -44,6 +44,7 @@ from .wrapped_decorator import signature_safe_contextmanager
from .. import compat as cpt from .. import compat as cpt
import warnings import warnings
from paddle import _C_ops from paddle import _C_ops
from ..fluid.framework import _in_legacy_dygraph, in_dygraph_mode
__all__ = [ __all__ = [
'SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'Dpsgd', 'DecayedAdagrad', 'SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'Dpsgd', 'DecayedAdagrad',
...@@ -1370,7 +1371,11 @@ class SGDOptimizer(Optimizer): ...@@ -1370,7 +1371,11 @@ class SGDOptimizer(Optimizer):
if find_master else None) if find_master else None)
lr = self._create_param_lr(param_and_grad) lr = self._create_param_lr(param_and_grad)
if framework._non_static_mode(): if in_dygraph_mode():
_C_ops.final_state_sgd(param_and_grad[0], lr, param_and_grad[1],
master_weight, find_master)
return None
if _in_legacy_dygraph():
_C_ops.sgd(param_and_grad[0], lr, param_and_grad[1], master_weight, _C_ops.sgd(param_and_grad[0], lr, param_and_grad[1], master_weight,
param_and_grad[0], master_weight) param_and_grad[0], master_weight)
return None return None
......
...@@ -21,6 +21,7 @@ import paddle.fluid.core as core ...@@ -21,6 +21,7 @@ import paddle.fluid.core as core
from paddle.fluid.op import Operator from paddle.fluid.op import Operator
from op_test import OpTest from op_test import OpTest
import paddle import paddle
from paddle.fluid.framework import _test_eager_guard
paddle.enable_static() paddle.enable_static()
...@@ -291,6 +292,11 @@ class TestSGDV2(unittest.TestCase): ...@@ -291,6 +292,11 @@ class TestSGDV2(unittest.TestCase):
adam.step() adam.step()
adam.clear_gradients() adam.clear_gradients()
def test_eager(self):
with _test_eager_guard():
self.test_sgd_dygraph()
self.test_sgd_group_dygraph()
class TestSGDMultiPrecision2_0(unittest.TestCase): class TestSGDMultiPrecision2_0(unittest.TestCase):
def dygraph_sgd_mp(self, mp): def dygraph_sgd_mp(self, mp):
......
...@@ -22,6 +22,7 @@ import warnings ...@@ -22,6 +22,7 @@ import warnings
from ..fluid.layer_helper import LayerHelper from ..fluid.layer_helper import LayerHelper
from ..fluid import unique_name from ..fluid import unique_name
from ..fluid import layers from ..fluid import layers
from ..fluid.framework import _in_legacy_dygraph, in_dygraph_mode
__all__ = [] __all__ = []
...@@ -144,7 +145,11 @@ class SGD(Optimizer): ...@@ -144,7 +145,11 @@ class SGD(Optimizer):
if find_master else None) if find_master else None)
lr = self._create_param_lr(param_and_grad) lr = self._create_param_lr(param_and_grad)
if framework._non_static_mode(): if in_dygraph_mode():
_C_ops.final_state_sgd(param_and_grad[0], lr, param_and_grad[1],
master_weight, find_master)
return None
if _in_legacy_dygraph():
_C_ops.sgd(param_and_grad[0], lr, param_and_grad[1], master_weight, _C_ops.sgd(param_and_grad[0], lr, param_and_grad[1], master_weight,
param_and_grad[0], master_weight) param_and_grad[0], master_weight)
return None return None
......
...@@ -1794,6 +1794,12 @@ ...@@ -1794,6 +1794,12 @@
func : selu func : selu
backward : selu_grad backward : selu_grad
- api : sgd
args : (Tensor param, Tensor learning_rate, Tensor grad, Tensor master_param, bool multi_precision)
output : Tensor(param_out), Tensor(master_param_out)
invoke : sgd_impl(param, learning_rate, grad, master_param, multi_precision)
optional : master_param
- api : shape - api : shape
args : (Tensor input) args : (Tensor input)
output : Tensor output : Tensor
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册