未验证 提交 12311ddc 编写于 作者: W Weilong Wu 提交者: GitHub

[Eager] Fix final state adam in selected rows case (#42219)

* [Eager] Support final_state_adam when argument grad (position 1) is selected_rows

* Remove needless code

* Add adam_dense_param_sparse_grad kernel
上级 9dadf7df
...@@ -69,7 +69,12 @@ std::tuple<Tensor, Tensor, Tensor, Tensor, Tensor, Tensor> adam_impl( ...@@ -69,7 +69,12 @@ std::tuple<Tensor, Tensor, Tensor, Tensor, Tensor, Tensor> adam_impl(
kernel_data_type = kernel_key.dtype(); kernel_data_type = kernel_key.dtype();
} }
} }
std::string kernel_name = "adam"; std::string kernel_name = "adam";
if (!phi::DenseTensor::classof(grad.impl().get())) {
kernel_name = "adam_dense_param_sparse_grad";
}
const auto& kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError( const auto& kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError(
kernel_name, {kernel_backend, kernel_layout, kernel_data_type}); kernel_name, {kernel_backend, kernel_layout, kernel_data_type});
VLOG(6) << kernel_name << " API kernel key: [" << kernel_backend << ", " VLOG(6) << kernel_name << " API kernel key: [" << kernel_backend << ", "
...@@ -77,9 +82,7 @@ std::tuple<Tensor, Tensor, Tensor, Tensor, Tensor, Tensor> adam_impl( ...@@ -77,9 +82,7 @@ std::tuple<Tensor, Tensor, Tensor, Tensor, Tensor, Tensor> adam_impl(
VLOG(6) << kernel_name << " API kernel: " << kernel; VLOG(6) << kernel_name << " API kernel: " << kernel;
auto* dev_ctx = GetDeviceContextByBackend(kernel_backend); auto* dev_ctx = GetDeviceContextByBackend(kernel_backend);
auto input_param = PrepareData(param, kernel.InputAt(0), {}); auto input_param = PrepareData(param, kernel.InputAt(0), {});
auto input_grad = PrepareData(grad, kernel.InputAt(1), {});
auto input_lr = PrepareData(learning_rate, kernel.InputAt(2), {}); auto input_lr = PrepareData(learning_rate, kernel.InputAt(2), {});
auto input_moment1 = PrepareData(moment1, kernel.InputAt(3), {}); auto input_moment1 = PrepareData(moment1, kernel.InputAt(3), {});
auto input_moment2 = PrepareData(moment2, kernel.InputAt(4), {}); auto input_moment2 = PrepareData(moment2, kernel.InputAt(4), {});
...@@ -140,78 +143,155 @@ std::tuple<Tensor, Tensor, Tensor, Tensor, Tensor, Tensor> adam_impl( ...@@ -140,78 +143,155 @@ std::tuple<Tensor, Tensor, Tensor, Tensor, Tensor, Tensor> adam_impl(
phi::MetaTensor meta_out_4(kernel_out_4); phi::MetaTensor meta_out_4(kernel_out_4);
phi::MetaTensor meta_out_5(kernel_out_5); phi::MetaTensor meta_out_5(kernel_out_5);
phi::AdamInferMeta(MakeMetaTensor(*input_param), if (phi::DenseTensor::classof(grad.impl().get())) {
MakeMetaTensor(*input_grad), auto input_grad = PrepareData(grad, kernel.InputAt(1), {});
MakeMetaTensor(*input_lr),
MakeMetaTensor(*input_moment1), phi::AdamInferMeta(MakeMetaTensor(*input_param),
MakeMetaTensor(*input_moment2), MakeMetaTensor(*input_grad),
MakeMetaTensor(*input_beta1_pow), MakeMetaTensor(*input_lr),
MakeMetaTensor(*input_beta2_pow), MakeMetaTensor(*input_moment1),
input_meta_ref_master_param, MakeMetaTensor(*input_moment2),
input_meta_ref_skip_update, MakeMetaTensor(*input_beta1_pow),
beta1, MakeMetaTensor(*input_beta2_pow),
beta2, input_meta_ref_master_param,
epsilon, input_meta_ref_skip_update,
lazy_mode, beta1,
min_row_size_to_use_multithread, beta2,
multi_precision, epsilon,
use_global_beta_pow, lazy_mode,
&meta_out_0, min_row_size_to_use_multithread,
&meta_out_1, multi_precision,
&meta_out_2, use_global_beta_pow,
&meta_out_3, &meta_out_0,
&meta_out_4, &meta_out_1,
&meta_out_5); &meta_out_2,
&meta_out_3,
using kernel_signature = void (*)(const platform::DeviceContext&, &meta_out_4,
const phi::DenseTensor&, &meta_out_5);
const phi::DenseTensor&,
const phi::DenseTensor&, using kernel_signature = void (*)(const platform::DeviceContext&,
const phi::DenseTensor&, const phi::DenseTensor&,
const phi::DenseTensor&, const phi::DenseTensor&,
const phi::DenseTensor&, const phi::DenseTensor&,
const phi::DenseTensor&, const phi::DenseTensor&,
paddle::optional<const phi::DenseTensor&>, const phi::DenseTensor&,
paddle::optional<const phi::DenseTensor&>, const phi::DenseTensor&,
const Scalar&, const phi::DenseTensor&,
const Scalar&, paddle::optional<const phi::DenseTensor&>,
const Scalar&, paddle::optional<const phi::DenseTensor&>,
bool, const Scalar&,
int64_t, const Scalar&,
bool, const Scalar&,
bool, bool,
phi::DenseTensor*, int64_t,
phi::DenseTensor*, bool,
phi::DenseTensor*, bool,
phi::DenseTensor*, phi::DenseTensor*,
phi::DenseTensor*, phi::DenseTensor*,
phi::DenseTensor*); phi::DenseTensor*,
auto* kernel_fn = kernel.GetVariadicKernelFn<kernel_signature>(); phi::DenseTensor*,
phi::DenseTensor*,
phi::DenseTensor*);
auto* kernel_fn = kernel.GetVariadicKernelFn<kernel_signature>();
(*kernel_fn)(*dev_ctx, (*kernel_fn)(*dev_ctx,
*input_param, *input_param,
*input_grad, *input_grad,
*input_lr, *input_lr,
*input_moment1, *input_moment1,
*input_moment2, *input_moment2,
*input_beta1_pow, *input_beta1_pow,
*input_beta2_pow, *input_beta2_pow,
input_master_param, input_master_param,
input_skip_update, input_skip_update,
beta1, beta1,
beta2, beta2,
epsilon, epsilon,
lazy_mode, lazy_mode,
min_row_size_to_use_multithread, min_row_size_to_use_multithread,
multi_precision, multi_precision,
use_global_beta_pow, use_global_beta_pow,
kernel_out_0, kernel_out_0,
kernel_out_1, kernel_out_1,
kernel_out_2, kernel_out_2,
kernel_out_3, kernel_out_3,
kernel_out_4, kernel_out_4,
kernel_out_5); kernel_out_5);
} else {
auto input_grad = TensorToSelectedRows(grad);
phi::AdamInferMeta(MakeMetaTensor(*input_param),
MakeMetaTensor(*input_grad),
MakeMetaTensor(*input_lr),
MakeMetaTensor(*input_moment1),
MakeMetaTensor(*input_moment2),
MakeMetaTensor(*input_beta1_pow),
MakeMetaTensor(*input_beta2_pow),
input_meta_ref_master_param,
input_meta_ref_skip_update,
beta1,
beta2,
epsilon,
lazy_mode,
min_row_size_to_use_multithread,
multi_precision,
use_global_beta_pow,
&meta_out_0,
&meta_out_1,
&meta_out_2,
&meta_out_3,
&meta_out_4,
&meta_out_5);
using kernel_signature = void (*)(const platform::DeviceContext&,
const phi::DenseTensor&,
const phi::SelectedRows&,
const phi::DenseTensor&,
const phi::DenseTensor&,
const phi::DenseTensor&,
const phi::DenseTensor&,
const phi::DenseTensor&,
paddle::optional<const phi::DenseTensor&>,
paddle::optional<const phi::DenseTensor&>,
const Scalar&,
const Scalar&,
const Scalar&,
bool,
int64_t,
bool,
bool,
phi::DenseTensor*,
phi::DenseTensor*,
phi::DenseTensor*,
phi::DenseTensor*,
phi::DenseTensor*,
phi::DenseTensor*);
auto* kernel_fn = kernel.GetVariadicKernelFn<kernel_signature>();
(*kernel_fn)(*dev_ctx,
*input_param,
*input_grad,
*input_lr,
*input_moment1,
*input_moment2,
*input_beta1_pow,
*input_beta2_pow,
input_master_param,
input_skip_update,
beta1,
beta2,
epsilon,
lazy_mode,
min_row_size_to_use_multithread,
multi_precision,
use_global_beta_pow,
kernel_out_0,
kernel_out_1,
kernel_out_2,
kernel_out_3,
kernel_out_4,
kernel_out_5);
}
return api_output; return api_output;
} }
......
...@@ -19,14 +19,13 @@ import unittest ...@@ -19,14 +19,13 @@ import unittest
import paddle import paddle
import paddle.nn as nn import paddle.nn as nn
import numpy as np import numpy as np
from paddle.fluid.framework import _enable_legacy_dygraph from paddle.fluid.framework import _test_eager_guard
_enable_legacy_dygraph()
paddle.disable_static() paddle.disable_static()
class EmbeddingDygraph(unittest.TestCase): class EmbeddingDygraph(unittest.TestCase):
def test_1(self): def func_1(self):
x_data = np.arange(3, 6).reshape((3, 1)).astype(np.int64) x_data = np.arange(3, 6).reshape((3, 1)).astype(np.int64)
paddle.disable_static(paddle.CPUPlace()) paddle.disable_static(paddle.CPUPlace())
x = paddle.to_tensor(x_data, stop_gradient=False) x = paddle.to_tensor(x_data, stop_gradient=False)
...@@ -44,7 +43,12 @@ class EmbeddingDygraph(unittest.TestCase): ...@@ -44,7 +43,12 @@ class EmbeddingDygraph(unittest.TestCase):
out.backward() out.backward()
adam.step() adam.step()
def test_2(self): def test_1(self):
with _test_eager_guard():
self.func_1()
self.func_1()
def func_2(self):
x_data = np.arange(3, 6).reshape((3, 1)).astype(np.int64) x_data = np.arange(3, 6).reshape((3, 1)).astype(np.int64)
y_data = np.arange(6, 12).reshape((3, 2)).astype(np.float32) y_data = np.arange(6, 12).reshape((3, 2)).astype(np.float32)
paddle.disable_static(paddle.CPUPlace()) paddle.disable_static(paddle.CPUPlace())
...@@ -60,6 +64,11 @@ class EmbeddingDygraph(unittest.TestCase): ...@@ -60,6 +64,11 @@ class EmbeddingDygraph(unittest.TestCase):
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
embedding = paddle.nn.Embedding(10, -3, sparse=True) embedding = paddle.nn.Embedding(10, -3, sparse=True)
def test_2(self):
with _test_eager_guard():
self.func_2()
self.func_2()
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册