Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
8cbf79a3
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
8cbf79a3
编写于
4月 13, 2022
作者:
C
chentianyu03
提交者:
GitHub
4月 13, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[Yaml]Add adam yaml (#41561)
* add adam yaml * add adam final_state api * add adam_impl
上级
a4d4c116
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
241 addition
and
1 deletion
+241
-1
paddle/phi/api/lib/api_custom_impl.cc
paddle/phi/api/lib/api_custom_impl.cc
+181
-0
paddle/phi/api/lib/api_custom_impl.h
paddle/phi/api/lib/api_custom_impl.h
+18
-0
python/paddle/fluid/tests/unittests/test_adam_op.py
python/paddle/fluid/tests/unittests/test_adam_op.py
+13
-0
python/paddle/fluid/tests/unittests/test_optimizer.py
python/paddle/fluid/tests/unittests/test_optimizer.py
+6
-0
python/paddle/optimizer/adam.py
python/paddle/optimizer/adam.py
+17
-1
python/paddle/utils/code_gen/api.yaml
python/paddle/utils/code_gen/api.yaml
+6
-0
未找到文件。
paddle/phi/api/lib/api_custom_impl.cc
浏览文件 @
8cbf79a3
...
...
@@ -33,6 +33,187 @@ limitations under the License. */
namespace
paddle
{
namespace
experimental
{
std
::
tuple
<
Tensor
,
Tensor
,
Tensor
,
Tensor
,
Tensor
,
Tensor
>
adam_impl
(
const
Tensor
&
param
,
const
Tensor
&
grad
,
const
Tensor
&
learning_rate
,
const
Tensor
&
moment1
,
const
Tensor
&
moment2
,
const
Tensor
&
beta1_pow
,
const
Tensor
&
beta2_pow
,
paddle
::
optional
<
const
Tensor
&>
master_param
,
paddle
::
optional
<
const
Tensor
&>
skip_update
,
const
Scalar
&
beta1
,
const
Scalar
&
beta2
,
const
Scalar
&
epsilon
,
bool
lazy_mode
,
int64_t
min_row_size_to_use_multithread
,
bool
multi_precision
,
bool
use_global_beta_pow
)
{
Backend
kernel_backend
=
Backend
::
UNDEFINED
;
DataLayout
kernel_layout
=
DataLayout
::
UNDEFINED
;
DataType
kernel_data_type
=
DataType
::
UNDEFINED
;
if
(
kernel_backend
==
Backend
::
UNDEFINED
||
kernel_layout
==
DataLayout
::
UNDEFINED
||
kernel_data_type
==
DataType
::
UNDEFINED
)
{
auto
kernel_key_set
=
ParseKernelKeyByInputArgs
(
param
);
auto
kernel_key
=
kernel_key_set
.
GetHighestPriorityKernelKey
();
if
(
kernel_backend
==
Backend
::
UNDEFINED
)
{
kernel_backend
=
kernel_key
.
backend
();
}
if
(
kernel_layout
==
DataLayout
::
UNDEFINED
)
{
kernel_layout
=
kernel_key
.
layout
();
}
if
(
kernel_data_type
==
DataType
::
UNDEFINED
)
{
kernel_data_type
=
kernel_key
.
dtype
();
}
}
std
::
string
kernel_name
=
"adam"
;
const
auto
&
kernel
=
phi
::
KernelFactory
::
Instance
().
SelectKernelOrThrowError
(
kernel_name
,
{
kernel_backend
,
kernel_layout
,
kernel_data_type
});
VLOG
(
6
)
<<
kernel_name
<<
" API kernel key: ["
<<
kernel_backend
<<
", "
<<
kernel_layout
<<
", "
<<
kernel_data_type
<<
"]"
;
VLOG
(
6
)
<<
kernel_name
<<
" API kernel: "
<<
kernel
;
auto
*
dev_ctx
=
GetDeviceContextByBackend
(
kernel_backend
);
auto
input_param
=
PrepareData
(
param
,
kernel
.
InputAt
(
0
),
{});
auto
input_grad
=
PrepareData
(
grad
,
kernel
.
InputAt
(
1
),
{});
auto
input_lr
=
PrepareData
(
learning_rate
,
kernel
.
InputAt
(
2
),
{});
auto
input_moment1
=
PrepareData
(
moment1
,
kernel
.
InputAt
(
3
),
{});
auto
input_moment2
=
PrepareData
(
moment2
,
kernel
.
InputAt
(
4
),
{});
auto
input_beta1_pow
=
PrepareData
(
beta1_pow
,
kernel
.
InputAt
(
5
),
{});
auto
input_beta2_pow
=
PrepareData
(
beta2_pow
,
kernel
.
InputAt
(
6
),
{});
paddle
::
optional
<
const
phi
::
DenseTensor
&>
input_master_param
(
paddle
::
none
);
auto
input_master_param_ptr
=
PrepareData
(
master_param
,
kernel
.
InputAt
(
7
),
{});
paddle
::
optional
<
const
phi
::
DenseTensor
&>
input_skip_update
(
paddle
::
none
);
auto
input_skip_update_ptr
=
PrepareData
(
skip_update
,
kernel
.
InputAt
(
8
),
{});
std
::
tuple
<
Tensor
,
Tensor
,
Tensor
,
Tensor
,
Tensor
,
Tensor
>
api_output
;
auto
kernel_out_0
=
input_param
.
get
();
auto
kernel_out_1
=
input_moment1
.
get
();
auto
kernel_out_2
=
input_moment2
.
get
();
auto
kernel_out_3
=
input_beta1_pow
.
get
();
auto
kernel_out_4
=
input_beta2_pow
.
get
();
phi
::
DenseTensor
*
kernel_out_5
=
nullptr
;
if
(
input_master_param_ptr
)
{
input_master_param
=
paddle
::
make_optional
<
const
phi
::
DenseTensor
&>
(
*
input_master_param_ptr
);
kernel_out_5
=
paddle
::
make_optional
<
phi
::
DenseTensor
&>
(
*
input_master_param_ptr
)
.
get_ptr
();
}
if
(
input_skip_update_ptr
)
{
input_skip_update
=
paddle
::
make_optional
<
const
phi
::
DenseTensor
&>
(
*
input_skip_update_ptr
);
}
paddle
::
optional
<
const
phi
::
MetaTensor
&>
input_meta_ref_master_param
(
paddle
::
none
);
phi
::
DenseTensor
dt
;
phi
::
MetaTensor
input_meta_tmp_master_param
(
dt
);
if
(
input_master_param_ptr
)
{
input_meta_tmp_master_param
.
set_dtype
(
input_master_param_ptr
->
dtype
());
input_meta_tmp_master_param
.
set_dims
(
input_master_param_ptr
->
dims
());
input_meta_tmp_master_param
.
set_layout
(
input_master_param_ptr
->
layout
());
input_meta_ref_master_param
=
input_meta_tmp_master_param
;
}
paddle
::
optional
<
const
phi
::
MetaTensor
&>
input_meta_ref_skip_update
(
paddle
::
none
);
phi
::
DenseTensor
dt1
;
phi
::
MetaTensor
input_meta_tmp_skip_update
(
dt1
);
if
(
input_skip_update_ptr
)
{
input_meta_tmp_skip_update
.
set_dtype
(
input_skip_update_ptr
->
dtype
());
input_meta_tmp_skip_update
.
set_dims
(
input_skip_update_ptr
->
dims
());
input_meta_tmp_skip_update
.
set_layout
(
input_skip_update_ptr
->
layout
());
input_meta_ref_skip_update
=
input_meta_tmp_skip_update
;
}
phi
::
MetaTensor
meta_out_0
(
kernel_out_0
);
phi
::
MetaTensor
meta_out_1
(
kernel_out_1
);
phi
::
MetaTensor
meta_out_2
(
kernel_out_2
);
phi
::
MetaTensor
meta_out_3
(
kernel_out_3
);
phi
::
MetaTensor
meta_out_4
(
kernel_out_4
);
phi
::
MetaTensor
meta_out_5
(
kernel_out_5
);
phi
::
AdamInferMeta
(
MakeMetaTensor
(
*
input_param
),
MakeMetaTensor
(
*
input_grad
),
MakeMetaTensor
(
*
input_lr
),
MakeMetaTensor
(
*
input_moment1
),
MakeMetaTensor
(
*
input_moment2
),
MakeMetaTensor
(
*
input_beta1_pow
),
MakeMetaTensor
(
*
input_beta2_pow
),
input_meta_ref_master_param
,
input_meta_ref_skip_update
,
beta1
,
beta2
,
epsilon
,
lazy_mode
,
min_row_size_to_use_multithread
,
multi_precision
,
use_global_beta_pow
,
&
meta_out_0
,
&
meta_out_1
,
&
meta_out_2
,
&
meta_out_3
,
&
meta_out_4
,
&
meta_out_5
);
using
kernel_signature
=
void
(
*
)(
const
platform
::
DeviceContext
&
,
const
phi
::
DenseTensor
&
,
const
phi
::
DenseTensor
&
,
const
phi
::
DenseTensor
&
,
const
phi
::
DenseTensor
&
,
const
phi
::
DenseTensor
&
,
const
phi
::
DenseTensor
&
,
const
phi
::
DenseTensor
&
,
paddle
::
optional
<
const
phi
::
DenseTensor
&>
,
paddle
::
optional
<
const
phi
::
DenseTensor
&>
,
const
Scalar
&
,
const
Scalar
&
,
const
Scalar
&
,
bool
,
int64_t
,
bool
,
bool
,
phi
::
DenseTensor
*
,
phi
::
DenseTensor
*
,
phi
::
DenseTensor
*
,
phi
::
DenseTensor
*
,
phi
::
DenseTensor
*
,
phi
::
DenseTensor
*
);
auto
*
kernel_fn
=
kernel
.
GetVariadicKernelFn
<
kernel_signature
>
();
(
*
kernel_fn
)(
*
dev_ctx
,
*
input_param
,
*
input_grad
,
*
input_lr
,
*
input_moment1
,
*
input_moment2
,
*
input_beta1_pow
,
*
input_beta2_pow
,
input_master_param
,
input_skip_update
,
beta1
,
beta2
,
epsilon
,
lazy_mode
,
min_row_size_to_use_multithread
,
multi_precision
,
use_global_beta_pow
,
kernel_out_0
,
kernel_out_1
,
kernel_out_2
,
kernel_out_3
,
kernel_out_4
,
kernel_out_5
);
return
api_output
;
}
////////////////// Forward api impls //////////////////////
Tensor
conv2d_impl
(
const
Tensor
&
input
,
...
...
paddle/phi/api/lib/api_custom_impl.h
浏览文件 @
8cbf79a3
...
...
@@ -30,6 +30,24 @@ namespace experimental {
////////////////// Forward api impls //////////////////////
std
::
tuple
<
Tensor
,
Tensor
,
Tensor
,
Tensor
,
Tensor
,
Tensor
>
adam_impl
(
const
Tensor
&
param
,
const
Tensor
&
grad
,
const
Tensor
&
learning_rate
,
const
Tensor
&
moment1
,
const
Tensor
&
moment2
,
const
Tensor
&
beta1_pow
,
const
Tensor
&
beta2_pow
,
paddle
::
optional
<
const
Tensor
&>
master_param
,
paddle
::
optional
<
const
Tensor
&>
skip_update
,
const
Scalar
&
beta1
,
const
Scalar
&
beta2
,
const
Scalar
&
epsilon
,
bool
lazy_mode
,
int64_t
min_row_size_to_use_multithread
,
bool
multi_precision
,
bool
use_global_beta_pow
);
std
::
tuple
<
Tensor
,
Tensor
,
Tensor
,
Tensor
,
Tensor
,
Tensor
>
batch_norm_impl
(
const
Tensor
&
x
,
const
Tensor
&
scale
,
...
...
python/paddle/fluid/tests/unittests/test_adam_op.py
浏览文件 @
8cbf79a3
...
...
@@ -21,6 +21,7 @@ from paddle.fluid import core
from
paddle.fluid.op
import
Operator
import
paddle.fluid
as
fluid
import
paddle
from
paddle.fluid.framework
import
_test_eager_guard
class
TestAdamOp1
(
OpTest
):
...
...
@@ -189,6 +190,10 @@ class TestAdamOpMultipleSteps(OpTest):
self
.
inputs
[
'Grad'
]
=
np
.
random
.
uniform
(
-
1
,
1
,
(
102
,
105
)).
astype
(
"float32"
)
def
test_api_eager_dygraph
(
self
):
with
_test_eager_guard
():
self
.
test_check_output
()
def
adam_step
(
inputs
,
attributes
):
'''
...
...
@@ -732,6 +737,14 @@ class TestAdamOpV2(unittest.TestCase):
adam
.
step
()
paddle
.
enable_static
()
def
test_api_eager_dygraph
(
self
):
with
_test_eager_guard
():
self
.
test_adam_op_dygraph
()
self
.
test_adam_op_with_state_dict
()
self
.
test_adam_with_grad_clip
()
self
.
test_adam_op_with_set_lr
()
self
.
test_adam_op_with_sparse_input_and_weight_decay
()
class
TestAdamOptimizer
(
unittest
.
TestCase
):
def
_test
(
self
,
...
...
python/paddle/fluid/tests/unittests/test_optimizer.py
浏览文件 @
8cbf79a3
...
...
@@ -24,6 +24,7 @@ import paddle.compat as cpt
import
numpy
as
np
from
paddle.fluid.backward
import
append_backward
from
paddle.fluid.framework
import
Program
,
program_guard
,
convert_np_dtype_to_dtype_
from
paddle.fluid.framework
import
_test_eager_guard
import
paddle
from
paddle.io
import
Dataset
import
numpy
...
...
@@ -1114,6 +1115,11 @@ class TestOptimizerDtype(unittest.TestCase):
def
test_float32
(
self
):
self
.
check_with_dtype
(
'float32'
)
def
test_api_eager_dygraph
(
self
):
with
_test_eager_guard
():
self
.
test_float64
()
self
.
test_float32
()
class
TestMasterWeightSaveForFP16
(
unittest
.
TestCase
):
'''
...
...
python/paddle/optimizer/adam.py
浏览文件 @
8cbf79a3
...
...
@@ -336,7 +336,23 @@ class Adam(Optimizer):
lr
=
self
.
_create_param_lr
(
param_and_grad
)
# create the adam optimize op
if
framework
.
_non_static_mode
():
if
framework
.
in_dygraph_mode
():
found_inf
=
self
.
_get_auxiliary_var
(
'found_inf'
)
_beta1
=
self
.
_beta1
if
not
isinstance
(
self
.
_beta1
,
Variable
)
else
self
.
_beta1
.
numpy
().
item
(
0
)
_beta2
=
self
.
_beta2
if
not
isinstance
(
self
.
_beta2
,
Variable
)
else
self
.
_beta2
.
numpy
().
item
(
0
)
_
,
_
,
_
,
_
,
_
,
_
=
_C_ops
.
final_state_adam
(
param_and_grad
[
0
],
param_and_grad
[
1
],
lr
,
moment1
,
moment2
,
beta1_pow_acc
,
beta2_pow_acc
,
master_weight
,
found_inf
,
_beta1
,
_beta2
,
self
.
_epsilon
,
self
.
_lazy_mode
,
1000
,
find_master
,
False
)
return
None
if
framework
.
_in_legacy_dygraph
():
_beta1
=
self
.
_beta1
if
not
isinstance
(
self
.
_beta1
,
Variable
)
else
self
.
_beta1
.
numpy
().
item
(
0
)
...
...
python/paddle/utils/code_gen/api.yaml
浏览文件 @
8cbf79a3
...
...
@@ -45,6 +45,12 @@
kernel
:
func
:
adadelta
-
api
:
adam
args
:
(Tensor param, Tensor grad, Tensor learning_rate, Tensor moment1, Tensor moment2, Tensor beta1_pow, Tensor beta2_pow, Tensor master_param, Tensor skip_update, Scalar beta1, Scalar beta2, Scalar epsilon, bool lazy_mode, int64_t min_row_size_to_use_multithread, bool multi_precision, bool use_global_beta_pow)
output
:
Tensor(param_out), Tensor(moment1_out), Tensor(moment2_out), Tensor(beta1_pow_out), Tensor(beta2_pow_out), Tensor(master_param_outs)
optional
:
master_param, skip_update
invoke
:
adam_impl(param, grad, learning_rate, moment1, moment2, beta1_pow, beta2_pow, master_param, skip_update, beta1, beta2, epsilon, lazy_mode, min_row_size_to_use_multithread, multi_precision, use_global_beta_pow)
-
api
:
adamax
args
:
(Tensor param, Tensor grad, Tensor learning_rate, Tensor moment, Tensor inf_norm, Tensor beta1_pow, float beta1, float beta2, float epsilon)
output
:
Tensor(param_out), Tensor(avg_squared_grad_out), Tensor(avg_squared_update_out)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录